015 Ceph的集群管理

一、理解Cluster Map

cluster map由monitor维护，用于跟踪ceph集群状态

当client启动时，会连接monitor获取cluster map副本，发现所有其他组件的位置，然后直接与所需的进程通信，以存储和检索数据

monitor跟踪这些集群组件的状态，也负责管理守护进程和客户端之间的身份验证

cluster map实际是多种map的集群，包含：monitor map、osd map、pg map、mds map、mgr map、service map

1.1 Clisuter Map内容

monitor map：包含集群ID、monitor节点名称、IP以及端口号以及monitor map的版本号

OSD map：包含集群ID、自身的版本号以及存储池相关的信息，包括存储池名称、ID、类型、副本级别和PG。还包括OSD的信息，比如数量、状态、权限以及OSD节点信息

PG map：包含PG的版本、时间戳、OSD map的最新版本号、容量相关的百分比。还记录了每个PG的ID、对象数量、状态、状态时间戳等

MDS map：包含MDS的地址、状态、数据池和元数据池的ID

MGR map：包含MGR的地址和状态，以及可用和已启用模块的列表

service map：跟踪通过librados部署的一些服务的实例，如RGW、rbd-mirror等。service map收集这些服务的信息然后提供给其他服务，如MGR的dashboard插件使用该map报告这些客户端服务的状态

1.2 Cluster Map基本查询

查询mon map

[root@ceph2 ~]# ceph mon dump

dumped monmap epoch

epoch

fsid 35a91e48--4e96-a7ee-980ab989d20d

last_changed -- ::14.839999

created -- ::14.839999

: 172.25.250.11:/ mon.ceph2

: 172.25.250.12:/ mon.ceph3

: 172.25.250.13:/ mon.ceph4

查询osd map

[root@ceph2 ~]# ceph osd dump

epoch

fsid 35a91e48--4e96-a7ee-980ab989d20d

created -- ::22.552356

modified -- ::15.354383

flags sortbitwise,recovery_deletes,purged_snapdirs

crush_version

full_ratio 0.95

backfillfull_ratio 0.9

nearfull_ratio 0.85

require_min_compat_client jewel

min_compat_client jewel

require_osd_release luminous

pool  'testpool' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

    snap  'testpool-snap-20190316' -- ::34.150433

    snap  'testpool-snap-2' -- ::15.430823

pool  'rbd' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

    removed_snaps [~]

pool  'rbdmirror' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

    removed_snaps [~]

pool  '.rgw.root' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'default.rgw.control' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'default.rgw.meta' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'default.rgw.log' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'xiantao.rgw.control' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  owner  flags hashpspool stripe_width  application rgw

pool  'xiantao.rgw.meta' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  owner  flags hashpspool stripe_width  application rgw

pool  'xiantao.rgw.log' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  owner  flags hashpspool stripe_width  application rgw

pool  'cephfs_metadata' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application cephfs

pool  'cephfs_data' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application cephfs

pool  'test' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

pool  'ssdpool' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

max_osd

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ exists,up 745dce53-1c63-4c50-b434-d441038dafe4

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ exists,up a7562276-6dfd--b248-a7cbdb64ebec

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ exists,up bbef1a00-3a31-48a0-a065-3a16b9edc3b1

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ exists,up e934a4fb--4e85-895c-f66cc5534ceb

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ exists,up e2c33bb3-02d2-4cce-85e8-25c419351673

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ exists,up d299e33c-0c24-4cd9-a37a-a6fcd420a529

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ exists,up debe7f4e-656b-48e2-a0b2-bdd8613afcc4

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ exists,up 8c403679--48d0-812b-72050ad43aae

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ exists,up bb73edf8-ca97-40c3-a727-d5fde1a9d1d9

查询osd crush map

[root@ceph2 ~]# ceph osd  crush dump

{

    "devices": [

        {

            "id": ,

            "name": "osd.0",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.1",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.2",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.3",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.4",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.5",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.6",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.7",

            "class": "hdd"

        },

        {

            "id": ,

            "name": "osd.8",

            "class": "hdd"

        }

    ],

    "types": [

        {

            "type_id": ,

            "name": "osd"

        },

        {

            "type_id": ,

            "name": "host"

        },

        {

            "type_id": ,

            "name": "chassis"

        },

        {

            "type_id": ,

            "name": "rack"

        },

        {

            "type_id": ,

            "name": "row"

        },

        {

            "type_id": ,

            "name": "pdu"

        },

        {

            "type_id": ,

            "name": "pod"

        },

        {

            "type_id": ,

            "name": "room"

        },

        {

            "type_id": ,

            "name": "datacenter"

        },

        {

            "type_id": ,

            "name": "region"

        },

        {

            "type_id": ,

            "name": "root"

        },

        {

            "type_id": ,

            "name": "aaa"

        }

    ],

    "buckets": [

        {

            "id": -,

            "name": "default",

            "type_id": ,

            "type_name": "root",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "default~hdd",

            "type_id": ,

            "type_name": "root",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph2",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                },

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph2~hdd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                },

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph4",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                },

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph4~hdd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                },

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph3",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                },

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph3~hdd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                },

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "dc1",

            "type_id": ,

            "type_name": "root",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "rack1",

            "type_id": ,

            "type_name": "rack",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "rack2",

            "type_id": ,

            "type_name": "rack",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "rack3",

            "type_id": ,

            "type_name": "rack",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "rack3~hdd",

            "type_id": ,

            "type_name": "rack",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "rack2~hdd",

            "type_id": ,

            "type_name": "rack",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "rack1~hdd",

            "type_id": ,

            "type_name": "rack",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "dc1~hdd",

            "type_id": ,

            "type_name": "root",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph2-ssd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph3-ssd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph4-ssd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ssd-root",

            "type_id": ,

            "type_name": "root",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph2-ssd~hdd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ssd-root~hdd",

            "type_id": ,

            "type_name": "root",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                },

                {

                    "id": -,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph4-ssd~hdd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        },

        {

            "id": -,

            "name": "ceph3-ssd~hdd",

            "type_id": ,

            "type_name": "host",

            "weight": ,

            "alg": "straw2",

            "hash": "rjenkins1",

            "items": [

                {

                    "id": ,

                    "weight": ,

                    "pos":

                }

            ]

        }

    ],

    "rules": [

        {

            "rule_id": ,

            "rule_name": "replicated_rule",

            "ruleset": ,

            "type": ,

            "min_size": ,

            "max_size": ,

            "steps": [

                {

                    "op": "take",

                    "item": -,

                    "item_name": "default"

                },

                {

                    "op": "chooseleaf_firstn",

                    "num": ,

                    "type": "host"

                },

                {

                    "op": "emit"

                }

            ]

        },

        {

            "rule_id": ,

            "rule_name": "replicated1_rule",

            "ruleset": ,

            "type": ,

            "min_size": ,

            "max_size": ,

            "steps": [

                {

                    "op": "take",

                    "item": -,

                    "item_name": "default"

                },

                {

                    "op": "chooseleaf_firstn",

                    "num": ,

                    "type": "host"

                },

                {

                    "op": "emit"

                }

            ]

        },

        {

            "rule_id": ,

            "rule_name": "indc1",

            "ruleset": ,

            "type": ,

            "min_size": ,

            "max_size": ,

            "steps": [

                {

                    "op": "take",

                    "item": -,

                    "item_name": "dc1"

                },

                {

                    "op": "chooseleaf_firstn",

                    "num": ,

                    "type": "rack"

                },

                {

                    "op": "emit"

                }

            ]

        },

        {

            "rule_id": ,

            "rule_name": "ssdrule",

            "ruleset": ,

            "type": ,

            "min_size": ,

            "max_size": ,

            "steps": [

                {

                    "op": "take",

                    "item": -,

                    "item_name": "ssd-root"

                },

                {

                    "op": "chooseleaf_firstn",

                    "num": ,

                    "type": "host"

                },

                {

                    "op": "emit"

                }

            ]

        }

    ],

    "tunables": {

        "choose_local_tries": ,

        "choose_local_fallback_tries": ,

        "choose_total_tries": ,

        "chooseleaf_descend_once": ,

        "chooseleaf_vary_r": ,

        "chooseleaf_stable": ,

        "straw_calc_version": ,

        "allowed_bucket_algs": ,

        "profile": "jewel",

        "optimal_tunables": ,

        "legacy_tunables": ,

        "minimum_required_version": "jewel",

        "require_feature_tunables": ,

        "require_feature_tunables2": ,

        "has_v2_rules": ,

        "require_feature_tunables3": ,

        "has_v3_rules": ,

        "has_v4_buckets": ,

        "require_feature_tunables5": ,

        "has_v5_rules":

    },

    "choose_args": {}

}

osd crush map

查询pg map

[root@ceph2 ~]# ceph pg dump|more

dumped all

version

stamp -- ::04.562309

last_osdmap_epoch

last_pg_scan

full_ratio

nearfull_ratio

PG_STAT OBJECTS MISSING_ON_PRIMARY DEGRADED MISPLACED UNFOUND BYTES    LOG  DISK_LOG STATE        STATE_STAMP                VERSION    REPORTED   UP      UP_PRIMARY ACTING  ACTING_PRIMARY LAST_SCRUB SCRUB_STAMP

             LAST_DEEP_SCRUB DEEP_SCRUB_STAMP

15.71                                                                        active+clean -- ::32.993398        '0    349:100 [1,8,3]          1 [1,8,3]              1        0' -- :

:33.926844             '0 2019-03-27 20:08:33.926844

1.7f                                                                         active+clean -- ::33.579411        '0    349:505 [8,7,6]          8 [8,7,6]              8        0' -- :

:11.129286             '0 2019-03-21 00:37:48.245632

15.70                                                                        active+clean -- ::33.014554        '0    349:118 [3,1,8]          3 [3,1,8]              3        0' -- :

:35.263257             '0 2019-03-26 17:10:19.390530

.7e                                                                         active+clean -- ::34.401107        '0     349:18 [6,4,8]          6 [6,4,8]              6        0' -- :

:30.900982             '0 2019-03-24 06:16:20.594466

15.73                                                                        active+clean -- ::32.722556        '0    349:107 [2,4,3]          2 [2,4,3]              2        0' -- :

:48.489676             '0 2019-03-26 17:10:19.390530

1.7d                                                                         active+clean -- ::32.509177        '0    349:611 [3,2,7]          3 [3,2,7]              3        0' -- :

:45.842781             '0 2019-03-24 00:45:38.159371

15.72                                                                        active+clean -- ::53.428161        '0    349:128 [2,4,6]          2 [2,4,6]              2        0' -- :

:37.129695             '0 2019-03-26 17:10:19.390530

.7c                                                                         active+clean -- ::31.590563        '0     349:18 [7,2,6]          7 [7,2,6]              7        0' -- :

:05.697728             '0 2019-03-27 05:33:02.267544

15.75                                                                        active+clean -- ::53.899879        '0     349:19 [6,7,8]          6 [6,7,8]              6        0' -- :

:45.705922             '0 2019-03-26 17:10:19.390530 

sum

OSD_STAT USED  AVAIL  TOTAL  HB_PEERS        PG_SUM PRIMARY_PG_SUM

         201M 15147M 15348M [,,,,,,]

         207M 15141M 15348M [,,,,,,]

         216M 15132M 15348M [,,,,,,]

         188M 15160M 15348M [,,,,,,]

         209M 15139M 15348M [,,,,,,]

         260M 15088M 15348M [,,,,,,]

         197M 15151M 15348M [,,,,,,]

         173M 15175M 15348M [,,,,,,]

sum      1656M   118G   119G

查询fs map

[root@ceph2 ~]# ceph fs dump

dumped fsmap epoch

e9

enable_multiple, ever_enabled_multiple: ,

compat: compat={},rocompat={},incompat={=base v0.,=client writeable ranges,=default file layouts on dirs,=dir inode in separate object,=mds uses versioned encoding,=dirfrag is stored in omap,=file layout v2}

legacy client fscid:

Filesystem 'cephfs' ()

fs_name    cephfs

epoch

flags    c

created    -- ::16.787966

modified    -- ::16.787966

tableserver

root

session_timeout

session_autoclose

max_file_size

last_failure

last_failure_osd_epoch

compat    compat={},rocompat={},incompat={=base v0.,=client writeable ranges,=default file layouts on dirs,=dir inode in separate object,=mds uses versioned encoding,=dirfrag is stored in omap,=file layout v2}

max_mds

in

up    {=}

failed

damaged

stopped

data_pools    []

metadata_pool

inline_data    disabled

balancer

standby_count_wanted

:    172.25.250.11:/ 'ceph2' mds.0.6 up:active seq

Standby daemons:

:    172.25.250.12:/ 'ceph3' mds.-1.0 up:standby seq

查询 mgr map

[root@ceph2 ~]# ceph mgr dump

{

    "epoch": ,

    "active_gid": ,

    "active_name": "ceph4",

    "active_addr": "172.25.250.13:6800/60569",

    "available": true,

    "standbys": [

        {

            "gid": ,

            "name": "ceph2",

            "available_modules": [

                "dashboard",

                "prometheus",

                "restful",

                "status",

                "zabbix"

            ]

        },

        {

            "gid": ,

            "name": "ceph3",

            "available_modules": [

                "dashboard",

                "prometheus",

                "restful",

                "status",

                "zabbix"

            ]

        }

    ],

    "modules": [

        "restful",

        "status"

    ],

    "available_modules": [

        "dashboard",

        "prometheus",

        "restful",

        "status",

        "zabbix"

    ]

}

查询 service dump

[root@ceph2 ~]# ceph service dump

{

    "epoch": ,

    "modified": "2019-03-18 21:19:18.667275",

    "services": {

        "rbd-mirror": {

            "daemons": {

                "summary": "",

                "admin": {

                    "start_epoch": ,

                    "start_stamp": "2019-03-18 21:19:18.318802",

                    "gid": ,

                    "addr": "172.25.250.11:0/4114752834",

                    "metadata": {

                        "arch": "x86_64",

                        "ceph_version": "ceph version 12.2.1-40.el7cp (c6d85fd953226c9e8168c9abe81f499d66cc2716) luminous (stable)",

                        "cpu": "QEMU Virtual CPU version 1.5.3",

                        "distro": "rhel",

                        "distro_description": "Red Hat Enterprise Linux Server 7.4 (Maipo)",

                        "distro_version": "7.4",

                        "hostname": "ceph2",

                        "instance_id": "",

                        "kernel_description": "#1 SMP Thu Dec 28 14:23:39 EST 2017",

                        "kernel_version": "3.10.0-693.11.6.el7.x86_64",

                        "mem_swap_kb": "",

                        "mem_total_kb": "",

                        "os": "Linux"

                    }

                }

            }

        }

    }

}

二、管理monitor map

2.1 多Momitor的同步机制

在生产环境建议最少三节点monitor，以确保cluster map的高可用性和冗余性

monitor使用paxos算法作为集群状态上达成一致的机制。paxos是一种分布式一致性算法。每当monitor修改map时，它会通过paxos发送更新到其他monitor。Ceph只有在大多数monitor就更新达成一致时提交map的新版本

cluster map的更新操作需要Paxos确认，但是读操作不经由Paxos，而是直接访问本地的kv存储

2.2 Monitor的选举机制

多个monitor之间需要建立仲裁并选择出一个leader，其他节点则作为工作节点（peon）

在选举完成并确定leader之后，leader将从所有其他monitor请求最新的map epoc，以确保leader具有集群的最新视图

要维护monitor集群的正常工作，必须有超过半数的节点正常

2.3 Monitor租期

在Monitor建立仲裁后，leader开始分发短期的租约到所有的monitors。让它们能够分发cluster map到OSD和client

Monitor租约默认每3s续期一次

当peon monitor没有确认它收到租约时，leader假定该monitor异常，它会召集新的选举以建立仲裁

如果peon monitor的租约到期后没有收到leader的续期，它会假定leader异常，并会召集新的选举

2.4 管理monitor map

将monitor map导出为一个二进制文件

[root@ceph2 ~]# ceph mon getmap -o ./monmap

got monmap epoch

打印导出的二进制文件的内容

[root@ceph2 ~]# monmaptool --print ./monmap

monmaptool: monmap file ./monmap

epoch

fsid 35a91e48--4e96-a7ee-980ab989d20d

last_changed -- ::14.839999

created -- ::14.839999

: 172.25.250.11:/ mon.ceph2

: 172.25.250.12:/ mon.ceph3

: 172.25.250.13:/ mon.ceph4

修改二进制文件，从monmap删除某个monitor

[root@ceph2 ~]# monmaptool ./monmap --rm ceph2

monmaptool: monmap file ./monmap

monmaptool: removing ceph2

monmaptool: writing epoch  to ./monmap ( monitors)

[root@ceph2 ~]# monmaptool --print ./monmap

monmaptool: monmap file ./monmap

epoch

fsid 35a91e48--4e96-a7ee-980ab989d20d

last_changed -- ::14.839999

created -- ::14.839999

: 172.25.250.12:/ mon.ceph3

: 172.25.250.13:/ mon.ceph4

[root@ceph2 ~]# ceph mon dump

dumped monmap epoch

epoch

fsid 35a91e48--4e96-a7ee-980ab989d20d

last_changed -- ::14.839999

created -- ::14.839999

: 172.25.250.11:/ mon.ceph2

: 172.25.250.12:/ mon.ceph3

: 172.25.250.13:/ mon.ceph4

修改二进制文件，往monmap中添加一个monitor

[root@ceph2 ~]# monmaptool ./monmap --add ceph2 172.25.254.11:6789

monmaptool: monmap file ./monmap

monmaptool: writing epoch  to ./monmap ( monitors)

[root@ceph2 ~]# monmaptool --print ./monmap

monmaptool: monmap file ./monmap

epoch

fsid 35a91e48--4e96-a7ee-980ab989d20d

last_changed -- ::14.839999

created -- ::14.839999

: 172.25.250.12:/ mon.ceph3

: 172.25.250.13:/ mon.ceph4

: 172.25.254.11:/ mon.ceph2

导入一个二进制文件，在导入之前，需要先停止monitor

ceph-mon -i <id> --inject-monmap ./monmap

三、管理osd map

3.1 OSD map生命周期

每当OSD加入或离开集群时，Ceph都会更新OSD map

OSD不使用leader来管理OSD map，它们会在自身之间传播map。OSD会利用OSD map epoch标记它们交换的每一条信息，当OSD检测到自己已落后时，它会使用其对等OSD执行map更新

在大型集群中OSD map更新会非常频繁，节点会执行递增map更新

Ceph也会利用epoch来标记OSD和client之间的消息。当client连接到OSD时OSD会检查epoch。如果发现epoch不匹配，则OSD会以正确的epoch响应，以便客户端可以更新其OSD map

OSD定期向monitor报告自己的状态，OSD之间会交换心跳，以便检测对等点的故障，并报告给monitor

leader monitor发现OSD故障时，它会更新map，递增epoch，并使用Paxos更新协议来通知其他monitor，同时撤销租约，并发布新的租约，以使monitor以分发最新的OSD map

3.2 管理 osd map

[root@ceph2 ~]#  ceph osd getmap -o ./osdmap

got osdmap epoch

[root@ceph2 ~]# osdmaptool --print ./osdmap

osdmaptool: osdmap file './osdmap'

epoch

fsid 35a91e48--4e96-a7ee-980ab989d20d

created -- ::22.552356

modified -- ::15.354383

flags sortbitwise,recovery_deletes,purged_snapdirs

crush_version

full_ratio 0.95

backfillfull_ratio 0.9

nearfull_ratio 0.85

require_min_compat_client jewel

min_compat_client jewel

require_osd_release luminous

pool  'testpool' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

    snap  'testpool-snap-20190316' -- ::34.150433

    snap  'testpool-snap-2' -- ::15.430823

pool  'rbd' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

    removed_snaps [~]

pool  'rbdmirror' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

    removed_snaps [~]

pool  '.rgw.root' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'default.rgw.control' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'default.rgw.meta' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'default.rgw.log' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rgw

pool  'xiantao.rgw.control' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  owner  flags hashpspool stripe_width  application rgw

pool  'xiantao.rgw.meta' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  owner  flags hashpspool stripe_width  application rgw

pool  'xiantao.rgw.log' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  owner  flags hashpspool stripe_width  application rgw

pool  'cephfs_metadata' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application cephfs

pool  'cephfs_data' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application cephfs

pool  'test' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

pool  'ssdpool' replicated size  min_size  crush_rule  object_hash rjenkins pg_num  pgp_num  last_change  flags hashpspool stripe_width  application rbd

max_osd

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ exists,up 745dce53-1c63-4c50-b434-d441038dafe4

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ exists,up a7562276-6dfd--b248-a7cbdb64ebec

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ exists,up bbef1a00-3a31-48a0-a065-3a16b9edc3b1

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ exists,up e934a4fb--4e85-895c-f66cc5534ceb

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ exists,up e2c33bb3-02d2-4cce-85e8-25c419351673

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ exists,up d299e33c-0c24-4cd9-a37a-a6fcd420a529

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ 172.25.250.11:/ exists,up debe7f4e-656b-48e2-a0b2-bdd8613afcc4

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ 172.25.250.13:/ exists,up 8c403679--48d0-812b-72050ad43aae

osd. up   in  weight  up_from  up_thru  down_at  last_clean_interval [,) 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ 172.25.250.12:/ exists,up bb73edf8-ca97-40c3-a727-d5fde1a9d1d9

博主声明：本文的内容来源主要来自誉天教育晏威老师，由本人实验完成操作验证，需要的博友请联系誉天教育（http://www.yutianedu.com/），获得官方同意或者晏老师（https://www.cnblogs.com/breezey/）本人同意即可转载，谢谢！