018 Ceph的mon和osd的删除和添加
一、OSD管理
1.1 移出故障osd
查看当前节点的osd的id
[root@ceph2 ceph]# df -hT
Filesystem Type Size Used Avail Use% Mounted on
/dev/vda1 xfs 40G .7G 38G % /
devtmpfs devtmpfs .9G .9G % /dev
tmpfs tmpfs .9G .9G % /dev/shm
tmpfs tmpfs .9G 201M .7G % /run
tmpfs tmpfs .9G .9G % /sys/fs/cgroup
/dev/vdb1 xfs 15G 213M 15G % /var/lib/ceph/osd/ceph-
/dev/vdc1 xfs 15G 228M 15G % /var/lib/ceph/osd/ceph-
/dev/vdd1 xfs 15G 152M 15G % /var/lib/ceph/osd/ceph-
tmpfs tmpfs 380M 380M % /run/user/
停止故障osd
[root@ceph2 ceph]# systemctl stop ceph-osd@0
产看状态
[root@ceph2 ceph]# ceph -s
cluster:
id: 35a91e48--4e96-a7ee-980ab989d20d
health: HEALTH_WARN
osds down
Degraded data redundancy: / objects degraded (10.106%), pgs unclean, pgs degraded
services:
mon: daemons, quorum ceph2,ceph3,ceph4
mgr: ceph4(active), standbys: ceph3, ceph2
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in
rbd-mirror: daemon active
data:
pools: pools, pgs
objects: objects, MB
usage: MB used, GB / GB avail
pgs: / objects degraded (10.106%)
active+clean
active+undersized+degraded
[root@ceph2 ceph]# ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
- 0.13499 root dc1
- 0.04500 rack rack1
- 0.04500 host ceph2
hdd 0.01500 osd. down 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
- 0.04500 rack rack2
- 0.04500 host ceph3
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
- 0.04500 rack rack3
- 0.04500 host ceph4
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 0.50000
hdd 0.01500 osd. up 1.00000 1.00000
- 0.13499 root default
- 0.04500 host ceph2
hdd 0.01500 osd. down 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
- 0.04500 host ceph3
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 1.00000
- 0.04500 host ceph4
hdd 0.01500 osd. up 1.00000 1.00000
hdd 0.01500 osd. up 1.00000 0.50000
hdd 0.01500 osd. up 1.00000 1.00000
[root@ceph2 ceph]# ceph osd out osd.0
marked out osd..
[root@ceph2 ceph]# ceph -s
cluster:
id: 35a91e48--4e96-a7ee-980ab989d20d
health: HEALTH_WARN
Degraded data redundancy: / objects degraded (19.005%), pgs unclean, pgs degraded
services:
mon: daemons, quorum ceph2,ceph3,ceph4
mgr: ceph4(active), standbys: ceph3, ceph2
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in
rbd-mirror: daemon active
data:
pools: pools, pgs
objects: objects, MB
usage: MB used, GB / GB avail
pgs: / objects degraded (19.005%)
active+clean
active+recovery_wait+degraded
active+recovering+degraded
io:
client: B/s rd, B/s wr, op/s rd, op/s wr
recovery: kB/s, keys/s, objects/s
[root@ceph2 ceph]# ceph osd crush rm osd.0
removed item id name 'osd.0' from crush map
[root@ceph2 ceph]# ceph auth list|grep osd.0
installed auth entries: osd.
[root@ceph2 ceph]# ceph auth rm osd.0
updated
[root@ceph2 ceph]# ceph -s
cluster:
id: 35a91e48--4e96-a7ee-980ab989d20d
health: HEALTH_OK
services:
mon: daemons, quorum ceph2,ceph3,ceph4
mgr: ceph4(active), standbys: ceph3, ceph2
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in
rbd-mirror: daemon active
data:
pools: pools, pgs
objects: objects, MB
usage: MB used, GB / GB avail
pgs: active+clean
io:
client: B/s wr, op/s rd, op/s wr
[root@ceph2 ceph]# ceph osd rm osd.0
removed osd.
[root@ceph2 ceph]# ceph -s
cluster:
id: 35a91e48--4e96-a7ee-980ab989d20d
health: HEALTH_OK services:
mon: daemons, quorum ceph2,ceph3,ceph4
mgr: ceph4(active), standbys: ceph3, ceph2
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in
rbd-mirror: daemon active data:
pools: pools, pgs
objects: objects, MB
usage: MB used, GB / GB avail
pgs: active+clean io:
client: B/s rd, B/s wr, op/s rd, op/s wr
方法二
ceph osd out osd.
systemctl stop ceph-osd@
ceph osd purge osd.3 #综合这一步,就可以完成操作
删除配置文件中针对该osd的配置
1.2添加回osd
上述实验,是模拟osd坏掉,然后进行移除操作,但是osd这个硬盘的挂载,分区并没有清除,也就是说,在本次添加的时候,直接从创建osd秘钥开始,但是在生产环境中,更换了osd后,需要从创建盘格式化开始,过程请参照https://www.cnblogs.com/zyxnhr/p/10553717.html
具体过程如下
[root@ceph2 ceph-]# ceph osd create [root@ceph2 ceph-]# ceph-authtool --create-keyring /etc/ceph/ceph.osd..keyring --gen-key -n osd. --cap mon 'allow profile osd' --cap mgr 'allow profile osd' --cap osd 'allow *'
creating /etc/ceph/ceph.osd..keyring
[root@ceph2 ceph-]# ceph auth import -i /etc/ceph/ceph.osd..keyring
imported keyring
[root@ceph2 ceph-]# ceph auth get-or-create osd. -o /var/lib/ceph/osd/ceph-/keyring
[root@ceph2 ceph-]# ceph-osd -i --mkfs --cluster ceph
-- ::58.928076 7f564d51fd00 - created object store /var/lib/ceph/osd/ceph- for osd. fsid 35a91e48--4e96-a7ee-980ab989d20d
[root@ceph2 ceph-]# cd /var/lib/ceph/osd/ceph-
[root@ceph2 ceph-]# rm -f journal
[root@ceph2 ceph-]# partuuid_0=`blkid /dev/vdb1|awk -F "[\"\"]" '{print $8}'`
[root@ceph2 ceph-]# echo $partuuid_0
745dce53-1c63-4c50-b434-d441038dafe4
[root@ceph2 ceph-]# ln -s /dev/disk/by-partuuid/$partuuid_0 ./journal
[root@ceph2 ceph-]# ll
total
-rw-r--r-- root root Mar : activate.monmap
-rw-r--r-- ceph ceph Mar : active
-rw-r--r-- ceph ceph Mar : ceph_fsid
drwxr-xr-x ceph ceph Mar : current
-rw-r--r-- ceph ceph Mar : fsid
lrwxrwxrwx root root Mar : journal -> /dev/disk/by-partuuid/745dce53-1c63-4c50-b434-d441038dafe4
-rw-r--r-- ceph ceph Mar : journal_uuid
-rw------- ceph ceph Mar : keyring
-rw-r--r-- ceph ceph Mar : magic
-rw-r--r-- ceph ceph Mar : ready
-rw-r--r-- ceph ceph Mar : store_version
-rw-r--r-- ceph ceph Mar : superblock
-rw-r--r-- ceph ceph Mar : systemd
-rw-r--r-- ceph ceph Mar : type
-rw-r--r-- ceph ceph Mar : whoami
[root@ceph2 ceph-]# chown ceph.ceph -R /var/lib/ceph
[root@ceph2 ceph-]# ceph-osd --mkjournal -i --cluster ceph
-- ::02.007442 7f416ec90d00 - journal read_header error decoding journal header
-- ::02.018206 7f416ec90d00 - created new journal /var/lib/ceph/osd/ceph-/journal for object store /var/lib/ceph/osd/ceph-
[root@ceph2 ceph-]# chown ceph.ceph /dev/disk/by-partuuid/$partuuid_0
[root@ceph2 ceph-]# ceph osd crush add-bucket ceph2 host --cluster ceph
bucket 'ceph2' already exists #不用创建bucket,在移除的时候,并没有移除主机的bucket
[root@ceph2 ceph-]# ceph osd crush move ceph2 root=default --cluster ceph #也不需要把ceph2添加到default的这个crushrule中
no need to move item id - name 'ceph2' to location {root=default} in crush map
[root@ceph2 ceph-]# ceph osd crush add osd. 0.01500 root=default host=ceph2
add item id name 'osd.0' weight 0.015 at location {host=ceph2,root=default} to crush map
[root@ceph2 ceph-]# systemctl start ceph-osd@
[root@ceph2 ceph-]# systemctl enable ceph-osd@
[root@ceph2 ceph-]# ps -ef|grep osd
ceph Mar28 ? :: /usr/bin/ceph-osd -f --cluster ceph --id --setuser ceph --setgroup ceph
ceph Mar28 ? :: /usr/bin/ceph-osd -f --cluster ceph --id --setuser ceph --setgroup ceph
ceph : ? :: /usr/bin/ceph-osd -f --cluster ceph --id --setuser ceph --setgroup ceph
root : pts/ :: grep --color=auto osd
[root@ceph2 ceph-]# ceph -s
cluster:
id: 35a91e48--4e96-a7ee-980ab989d20d
health: HEALTH_WARN
/ objects misplaced (0.040%)
Degraded data redundancy: / objects degraded (17.050%), pgs unclean, pgs degraded #有数据正在重平衡
services:
mon: daemons, quorum ceph2,ceph3,ceph4
mgr: ceph4(active), standbys: ceph3, ceph2
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in; remapped pgs
rbd-mirror: daemon active
data:
pools: pools, pgs
objects: objects, MB
usage: MB used, GB / GB avail
pgs: / objects degraded (17.050%)
/ objects misplaced (0.040%)
active+clean
active+recovery_wait+degraded
active+recovering+degraded
active+remapped+backfill_wait
io:
client: B/s rd, B/s wr, op/s rd, op/s wr
recovery: kB/s, keys/s, objects/s
[root@ceph2 ceph-]# ceph -s
cluster:
id: 35a91e48--4e96-a7ee-980ab989d20d
health: HEALTH_OK services:
mon: daemons, quorum ceph2,ceph3,ceph4
mgr: ceph4(active), standbys: ceph3, ceph2
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in #恢复正常,有9个OSD,切集群处于健康状态
rbd-mirror: daemon active
data:
pools: pools, pgs
objects: objects, MB
usage: MB used, GB / GB avail
pgs: active+clean
io:
client: B/s rd, B/s wr, op/s rd, op/s wr
1.2 移除故障节点
方法一:
1. 先移除节点上所有osd
2. ceph osd crush remove serverc
方法二:
1. 先迁移节点上所有osd
2. 修改crushmap,删除所有与该节点相关的配置
1.3 恢复和回填OSD
在OSD添加或移除时,Ceph会重平衡PG。数据回填和恢复操作可能会产生大量的后端流量,影响集群性能。
为避免性能降低,可对回填/恢复操作进行配置:
osd_recovery_op_priority # 值为1-63,默认为10,相对于客户端操作,恢复操作的优先级,默认客户端操作的优先级为63,参数为osd_client_op_priority
osd_recovery_max_active # 每个osd一次处理的活跃恢复请求数量,默认为15,增大此值可加速恢复,但会增加集群负载
osd_recovery_threads # 用于数据恢复时的线程数,默认为1
osd_max_backfills # 单个osd的最大回填操作数,默认为10
osd_backfill_scan_min # 回填操作时最小扫描对象数量,默认为64
osd_backfill_scan_max # 回填操作的最大扫描对象数量,默认为512
osd_backfill_full_ratio # osd的占满率达到多少时,拒绝接受回填请求,默认为0.85
osd_backfill_retry_interval # 回填重试的时间间隔
二、 monitor管理
2.1 摘除monitor
停止monitor进程
[root@ceph2 ceph]# systemctl stop ceph-mon@ceph2
删除monitor
[root@ceph2 ceph]# ceph mon remove ceph2
removing mon.ceph2 at 172.25.250.11:/, there will be monitors
[root@ceph2 ceph]# ceph -s
services:
mon: daemons, quorum ceph3,ceph4
mgr: ceph4(active), standbys: ceph2, ceph3
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in
rbd-mirror: daemon active
删除monitor文件
[root@ceph2 ceph]# cd /var/lib/ceph/mon/
[root@ceph2 mon]# ls
ceph-ceph2
[root@ceph2 mon]# rm -rf ceph-ceph2/
2.2 添加monitor节点
[root@ceph2 mon]# cd
创建文件
[root@ceph2 ~]# mkdir /var/lib/ceph/mon/ceph-ceph2
[root@ceph2 ~]# chown ceph.ceph -R !$
chown ceph.ceph -R /var/lib/ceph/mon/ceph-ceph2
[root@ceph2 ~]# ceph auth get mon.
exported keyring for mon.
[mon.]
key = AQDqfYxcAAAAABAAIc47ZLcYh013gzu3WWruew==
caps mon = "allow *"
[root@ceph2 ~]# ceph auth get mon. -o /tmp/ceph.mon.keyring
exported keyring for mon.
[root@ceph2 ~]# cat /tmp/ceph.mon.keyring
[mon.]
key = AQDqfYxcAAAAABAAIc47ZLcYh013gzu3WWruew==
caps mon = "allow *"
[root@ceph2 ~]# ceph mon getmap -o /tmp/monmap.bin
got monmap epoch
[root@ceph2 ~]# monmaptool --print /tmp/monmap.bin
monmaptool: monmap file /tmp/monmap.bin
epoch
fsid 35a91e48--4e96-a7ee-980ab989d20d
last_changed -- ::25.819243
created -- ::14.839999
: 172.25.250.12:/ mon.ceph3
: 172.25.250.13:/ mon.ceph4
[root@ceph2 ~]# sudo -u ceph ceph-mon -i ceph2 --mkfs --monmap /tmp/monmap.bin --keyring /tmp/ceph.mon.keyring
[root@ceph2 ~]# ll /var/lib/ceph/mon/ceph-ceph2/
total
-rw------- ceph ceph Mar : keyring
-rw-r--r-- ceph ceph Mar : kv_backend
drwxr-xr-x ceph ceph Mar : store.db
[root@ceph2 ~]# ps -ef |grep ceph-mon
root : pts/ :: grep --color=auto ceph-mon
[root@ceph2 ~]# sudo -u ceph ceph-mon -i ceph2 --public-addr 172.25.250.11:6789
[root@ceph2 ~]# !ps
ps -ef |grep ceph-mon
ceph : ? :: ceph-mon -i ceph2 --public-addr 172.25.250.11:
root : pts/ :: grep --color=auto ceph-mon
[root@ceph2 ~]# ceph -s
services:
mon: daemons, quorum ceph2,ceph3,ceph4
mgr: ceph4(active), standbys: ceph2, ceph3
mds: cephfs-// up {=ceph2=up:active}, up:standby
osd: osds: up, in
rbd-mirror: daemon active
2.3 monitor故障排查
[root@ceph2 ~]# ceph daemon mon.ceph2 quorum_status
{ #查看monipot票数
"election_epoch": ,
"quorum": [
,
, ],
"quorum_names": [
"ceph2",
"ceph3",
"ceph4"
],
"quorum_leader_name": "ceph2",
"monmap": {
"epoch": ,
"fsid": "35a91e48-8244-4e96-a7ee-980ab989d20d",
"modified": "2019-03-28 09:13:19.932456",
"created": "2019-03-16 12:39:14.839999",
"features": {
"persistent": [
"kraken",
"luminous"
],
"optional": []
},
"mons": [
{
"rank": ,
"name": "ceph2",
"addr": "172.25.250.11:6789/0",
"public_addr": "172.25.250.11:6789/0"
},
{
"rank": ,
"name": "ceph3",
"addr": "172.25.250.12:6789/0",
"public_addr": "172.25.250.12:6789/0"
},
{
"rank": ,
"name": "ceph4",
"addr": "172.25.250.13:6789/0",
"public_addr": "172.25.250.13:6789/0"
}
]
}
}
2.4 利用admin sockets管理守护进程
通过admin sockets,管理员可以直接与守护进程交互。如查看和修改守护进程的配置参数。
守护进程的socket文件一般是/var/run/ceph/$cluster-$type.$id.asok
基于admin sockets的操作:
ceph daemon $type.$id command
或者ceph --admin-daemon /var/run/ceph/$cluster-$type.$id.asok command
常用command如下:
help
config get parameter
config set parameter
config show
perf dump
查看
[root@ceph2 ceph]# ceph daemon osd.6 config show|grep osd_default
"osd_default_data_pool_replay_window": "",
"osd_default_notify_timeout": "",
[root@ceph2 ceph]# ceph daemon osd.6 config get xio_mp_max_64
{
"xio_mp_max_64": ""
}
修改
[root@ceph2 ceph]# ceph tell osd.* injectargs --xio_mp_max_64 65536
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
osd.: xio_mp_max_64 = '' (not observed, change may require restart)
博主声明:本文的内容来源主要来自誉天教育晏威老师,由本人实验完成操作验证,需要的博友请联系誉天教育(http://www.yutianedu.com/),获得官方同意或者晏老师(https://www.cnblogs.com/breezey/)本人同意即可转载,谢谢!
018 Ceph的mon和osd的删除和添加的更多相关文章
- ceph osd 批量删除
ceph osd 批量删除,注意删除的是当前节点正在使用的osd,数据删除有风险,所以最后一步没有去format磁盘,给你留下一剂后悔药. #!/bin/bash osd_list=`mount|gr ...
- 使用mx:Repeater在删除和添加item时列表闪烁
使用mx:Repeater在删除和添加item时列表闪烁 不可能在用户界面上闪闪的吧,recycleChildren属性可帮助我们 recycleChildren属性==缓存,设为true就可以了 本 ...
- VC/Wince 实现仿Win8 Metro风格界面3——按钮移动交换、删除、添加快捷方式(附效果图)
上一篇文章写了如何进行页面滑动切换,今天我讲一下如何实现两个按钮拖动交换位置,包括同一个页面按钮交换或者两个页面之间的按钮交换.另外就是如何拖动删除界面上的快捷方式.按钮交换和拖动删除,这两个功能基本 ...
- ASP.NET MVC5+EF6+EasyUI 后台管理系统(82)-Easyui Datagrid批量操作(编辑,删除,添加)
前言 有时候我们的后台系统表单比较复杂,做过进销存或者一些销售订单的都应该有过感觉 虽然Easyui Datagrid提供了行内编辑,但是不够灵活,但是我们稍微修改一下来达到批量编辑,批量删除,批量添 ...
- 解决由于服务器调用删除或添加字段后CXF客户端未更新导致异常问题org.apache.cxf.interceptor.Fault: Unmarshalling Error: Unexpected element
采用CXF客户端调用Webservice服务,由于服务端时不时会对Webservice服务删除或添加一些字段,而CXF未及时更新客户端代码导致再次调用服务时报异常错误: Interceptor for ...
- Word2010中的页眉怎样删除和添加横线
http://jingyan.baidu.com/article/f79b7cb3bb3c629144023e05.html 我们在使用Word2010编辑文档中时,有时需要在页眉下方删除或添加一条横 ...
- python --- 07 补充( join 删除和添加 fromkeys ) 深浅拷贝
一.基本数据类型补充 1.join() "*".join("马虎疼") # 马*虎*疼 把传递进去的参数进行迭代. 获取到的每个元素和前面的*进行拼接. 得到 ...
- json 删除、添加对象
1. 定义json对象 var entryJson = []; 2. 删除.添加对象 entryJson.pop(); //删除最后一个对象 entryJson.push({ //往 ...
- 如何在cmd命令行中查看、修改、删除与添加环境变量,语法格式例子:set path;echo %APPDATA%
如何在cmd命令行中查看.修改.删除与添加环境变量 首先明确一点: 所有的在cmd命令行下对环境变量的修改只对当前窗口有效,不是永久性的修改.也就是说当关闭此cmd命令行窗口后,将不再起作用.永久性修 ...
随机推荐
- C++2:函数与传递
C++2:函数与传递 赵强 201831061427 目录 一.函数 二.函数重载 三.值传递 四.地址传递 五.递归函数 一.函数 我们在代码编译中常常会用到函数,函数是模块 ...
- 18-2 djanjo中间件和orm多对多操作,以及ajax
一 中间件 0 怎样使用中间件 在setting配置文件里面注册你的中间件,如下: 'mymiddleware.MD1', 前面是文件名,后面是类名 然后在你的mymiddleware文件里导入: ...
- CF1238E.Keyboard Purchase 题解 状压/子集划分DP
作者:zifeiy 标签:状压DP,子集划分DP 题目链接:https://codeforces.com/contest/1238/problem/E 题目大意: 给你一个长度为 \(n(n \le ...
- 学linux内核与学linux操作系统有什么区别!?
linux内核包括:进程管理,存储管理,IO管理,文件系统等功能.linux操作系统则是linux内核再加上像shell或图形界面和其他的实用软件,比内核庞大的多.建议先学shell命令和linux下 ...
- 4 文件操作 支持图片 视频 mp3 文本等
#文件操作:send_file,支持图片 视频 mp3 文本等@app.route("/img")def img(): return send_file("1.jpg&q ...
- ipykernel_launcher.py: error: unrecognized arguments: -f /Users/apple/Library/Jupyter/runtime/kernel
当在jupyter下使用parser.parse_args()出错则改换为parser.parse_known_args()[0]其效用是差不多的,至于为什么出现错误,我也不知道…
- logging.basicConfig函数
在UI自动化应用中,经常会出错,打log就是一个很重要的环节,python的logging.basicConfig函数 真是既方便,又简单,每次粘贴到用例前,就可以打log了. logging模块是 ...
- hdu 1556 Color the ball(区间更新,单点求值)
Color the ball Time Limit: 9000/3000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others)To ...
- python起个简单web服务器
在 Linux 服务器上或安装了 Python 的机器上,Python自带了一个WEB服务器 SimpleHTTPServer. 我们可以很简单的使用 python -m SimpleHTTPSer ...
- codeforces 616D
题意:给你n个数,找出一个最大的区间,满足:不同的数值个数不超过k; //我开始又看错题了. 以为是找出一个最大区间,里面的数的最大值不超过k; 思路:利用一个窗口滑动,左端点表示当前位置,右端点表示 ...