在分布式集群中,对机器的添加删除,或者机器故障后自动脱离集群这些操作是分布式集群管理最基本的功能。如果采用常用的hash(object)%N算 法,那么在有机器添加或者删除后,就需要大范围的移动原有数据,这种大规模的移动数据在大规模的分布式集群中是不可被接受的,因为移动过程中造成的‘抖动’或者可能出现的数据读写问题,都会大大降低集群的可用性。谷歌前一段(17年4月)时间对一致性哈希做了简单改进,即对每个节点最大连接数做限制,新来的请求如果发现目标节点达到最大限制,就会顺时针方向寻找下一个连接数没达到最大的节点,相关论文如下
Consistent Hashing with Bounded Loads
The distribution of loads for several values of ε. The load distribution is nearly uniform covering all ranges of loads from 0 to (1+ε) times average, and many bins with load equal to (1+ε) times average.
#include <iostream>
#include <algorithm>
#include <fstream>
#include <vector>
#include <map>
#include <list>
#include <random> #include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h> /*
* === FUNCTION ======================================================================
* Name: add_key
* Description: 找出ip地址对应的node,并存储到node对应的ip列表
* =====================================================================================
void add_key(const std::map<uint32_t, std::string> &node_info, std::string ip, std::map<uint32_t, std::vector<uint32_t>> &info)
if (ip.empty() || node_info.empty())
return; /* key映射的hash函数(简单的对ip地址取模) */
auto value = inet_addr(ip.c_str());
auto hash_value = value % 16384; auto it = node_info.begin();
auto right_value = it->first;
uint32_t left_value = right_value;
while (it != node_info.end()) {
left_value = right_value;
right_value = it->first; /* 如果当前key在最小的node左边或者两个node之间就进行处理 */
if (hash_value <= left_value || hash_value <= right_value) {
/* 查看key要访问的node是否存在与映射表中 */
auto itr = info.find(right_value);
if (itr == info.end()) {
std::vector<uint32_t> cli;
info.emplace(right_value, cli);
else {
} it++;
} /* 由于是环形0-16383范围,所以当没有找到比当前key大的node,就需要绕回,将其映射到node值最小的node上 */
if (it == node_info.end()) {
auto itr = info.find(node_info.begin()->first);
if (itr == info.end()) {
std::vector<uint32_t> cli;
info.emplace(right_value, cli);
else {
} /*
* === FUNCTION ======================================================================
* Name: del_key
* Description: 删除node映射表中的key
* =====================================================================================
bool del_key(std::map<uint32_t, std::vector<uint32_t>> &info, const std::string &ip)
if (ip.empty() || info.empty())
return false; /* hash函数取值 */
auto value = inet_addr(ip.c_str());
uint32_t hash_value = value % 16384; auto it = info.begin();
auto right_value = it->first;
uint32_t left_value = right_value;
while (it != info.end()) {
left_value = right_value;
right_value = it->first; if (hash_value <= left_value || hash_value <= right_value) {
auto target = std::find(it->second.begin(), it->second.end(), hash_value);
if (target != it->second.end())
std::cout << "delete " << ip << " from node " << right_value << std::endl; break;
} it++;
} if (it == info.end()) {
it = info.begin();
auto target = std::find(it->second.begin(), it->second.end(), hash_value);
if (target != it->second.end())
std::cout << "delete " << ip << " from node " << right_value << std::endl;
} } /*
* === FUNCTION ======================================================================
* Name: add_node
* Description: node列表中新增一个node,如果原有node映射表中有数据的话需要将新加node
* 之前范围内的key数据从node后面的node映射表中迁移到新加node中
* =====================================================================================
bool add_node(std::map<uint32_t, std::string> &node_info, std::map<uint32_t, std::vector<uint32_t>> &info, const std::string &node_name)
/* 验证node_name是否已经存在 */
for (const auto &ele : node_info) {
if (ele.second == node_name)
return false;
} /* c++11 provides random class */
std::random_device rd;
std::mt19937 gen(rd());
/* 获取0-16383范围内的随机数 */
std::uniform_int_distribution<> dis(0, 16383); /* 利用随机数来作为hash函数 */
auto node_key = dis(gen);
node_info.emplace(node_key, node_name); std::vector<uint32_t> cli; if (info.empty())
return true; auto it = info.begin();
auto right_value = it->first;
uint32_t left_value = right_value;
while (it != info.end()) {
left_value = right_value;
right_value = it->first; if (node_key < left_value || node_key < right_value) {
if (it->second.empty())
return true; /* 如果新node要插入的区间中有数据,需要进行数据迁移 */
auto ip_arr = it->second;
for (auto itr = ip_arr.begin(); itr != ip_arr.end(); itr++) {
if (*itr > node_key && *itr <= right_value)
continue; cli.push_back(*itr);
itr = ip_arr.erase(itr);
} it++;
} info.emplace(node_key, cli); return true;
} /*
* === FUNCTION ======================================================================
* Name: del_node
* Description: 将node删除,如果node上有映射数据,需要将数据迁移到顺时针方向上的下一个
* =====================================================================================
bool del_node(std::map<uint32_t, std::string> &node_info, std::map<uint32_t, std::vector<uint32_t>> &info, const std::string &node_name)
auto key_itr = node_info.begin();
while (key_itr->second != node_name)
key_itr++; if (key_itr == node_info.end())
return false; if (info.empty()) {
return true;
} auto target = info.find(key_itr->first);
if (target == info.end()) {
return true;
} /* 如果删除的是最后一个node,则将数据迁移到第一个node上 */
target++; /* 因为map的迭代器不是随机迭代器,因此只能用++或者--,而不能用+、- */
if (target == info.end()) {
auto &ip_arr = info.begin()->second;
ip_arr.insert(ip_arr.end(), target->second.begin(), target->second.end());
else {
auto &ip_arr = target->second;
ip_arr.insert(ip_arr.end(), target->second.begin(), target->second.end());
info.erase(target); node_info.erase(key_itr); return true;
} int main(int argc, char *argv[])
std::vector<std::string> addrs = {"", "","","","","","","",""}; std::cout << "convert to long is " << inet_addr("") % 16384 << std::endl; std::map<uint32_t, std::string> nodes;
std::map<uint32_t, std::vector<uint32_t>> info;
for (const auto & ele : addrs) {
add_node(nodes, info, ele);
} std::string ip;
std::ifstream ifs;
ifs.open("conf"); while (!ifs.eof()) {
ifs >> ip;
if (ip.empty())
std::cout << "Get ip addr " << ip << std::endl;
add_key(nodes, ip, info);
} del_node(nodes, info, "");
del_key(info, ""); return EXIT_SUCCESS;
} /* ---------- end of function main ---------- */
