golang consistent hash 菜鸟分析
一直找集群的算法,刚好golang上面有一个适合。下面作为菜鸟来分析一下
- // Copyright (C) 2012 Numerotron Inc.
- // Use of this source code is governed by an MIT-style license
- // that can be found in the LICENSE file.
- // Package consistent provides a consistent hashing function.
- //
- // Consistent hashing is often used to distribute requests to a changing set of servers. For example,
- // say you have some cache servers cacheA, cacheB, and cacheC. You want to decide which cache server
- // to use to look up information on a user.
- //
- // You could use a typical hash table and hash the user id
- // to one of cacheA, cacheB, or cacheC. But with a typical hash table, if you add or remove a server,
- // almost all keys will get remapped to different results, which basically could bring your service
- // to a grinding halt while the caches get rebuilt.
- //
- // With a consistent hash, adding or removing a server drastically reduces the number of keys that
- // get remapped.
- //
- // Read more about consistent hashing on wikipedia: http://en.wikipedia.org/wiki/Consistent_hashing
- //
- package main
- import (
- "errors"
- "fmt"
- "hash/crc32"
- "log"
- "sort"
- "strconv"
- "sync"
- )
- type uints []uint32
- // Len returns the length of the uints array.
- func (x uints) Len() int { return len(x) }
- // Less returns true if element i is less than element j.
- func (x uints) Less(i, j int) bool { return x[i] < x[j] }
- // Swap exchanges elements i and j.
- func (x uints) Swap(i, j int) { x[i], x[j] = x[j], x[i] }
- // ErrEmptyCircle is the error returned when trying to get an element when nothing has been added to hash.
- var ErrEmptyCircle = errors.New("empty circle")
- // Consistent holds the information about the members of the consistent hash circle.
- type Consistent struct {
- circle map[uint32]string
- members map[string]bool
- sortedHashes uints // 已经排好序的hashes slice , 主要有力搜索 (存储的内容是全部虚拟hashes值)
- NumberOfReplicas int
- count int64
- scratch [64]byte
- sync.RWMutex
- }
- // New creates a new Consistent object with a default setting of 20 replicas for each entry.
- //
- // To change the number of replicas, set NumberOfReplicas before adding entries.
- func New() *Consistent {
- c := new(Consistent)
- c.NumberOfReplicas = 20
- c.circle = make(map[uint32]string)
- c.members = make(map[string]bool)
- //log.Printf("%p", c)
- return c
- }
- // eltKey generates a string key for an element with an index.
- func (c *Consistent) eltKey(elt string, idx int) string {
- return elt + "|" + strconv.Itoa(idx)
- }
- // Add inserts a string element in the consistent hash.
- func (c *Consistent) Add(elt string) {
- c.Lock()
- defer c.Unlock()
- for i := 0; i < c.NumberOfReplicas; i++ {
- fmt.Println("i:",i,c.hashKey(c.eltKey(elt, i)))
- c.circle[c.hashKey(c.eltKey(elt, i))] = elt
- }
- //log.Fatal(len(c.circle))
- //log.Println(len(c.members), elt)
- c.members[elt] = true
- c.updateSortedHashes()
- c.count++
- }
- // Remove removes an element from the hash.
- func (c *Consistent) Remove(elt string) {
- c.Lock()
- defer c.Unlock()
- for i := 0; i < c.NumberOfReplicas; i++ {
- delete(c.circle, c.hashKey(c.eltKey(elt, i)))
- }
- delete(c.members, elt)
- c.updateSortedHashes()
- c.count--
- }
- // Set sets all the elements in the hash. If there are existing elements not present in elts, they will be removed.
- func (c *Consistent) Set(elts []string) {
- mems := c.Members()
- for _, k := range mems {
- found := false
- for _, v := range elts {
- if k == v {
- found = true
- break
- }
- }
- if !found {
- c.Remove(k)
- }
- }
- for _, v := range elts {
- c.RLock()
- _, exists := c.members[v]
- c.RUnlock()
- if exists {
- continue
- }
- c.Add(v)
- }
- }
- func (c *Consistent) Members() []string {
- c.RLock()
- defer c.RUnlock()
- var m []string
- for k := range c.members {
- m = append(m, k)
- }
- return m
- }
- // Get returns an element close to where name hashes to in the circle.
- func (c *Consistent) Get(name string) (string, error) {
- c.RLock()
- defer c.RUnlock()
- if len(c.circle) == 0 {
- return "", ErrEmptyCircle
- }
- key := c.hashKey(name)
- log.Println("need search --> key:",key,"servername:",name)
- i := c.search(key)
- fmt.Println(c.sortedHashes[i],c.circle[c.sortedHashes[i]])
- return c.circle[c.sortedHashes[i]], nil
- }
- func (c *Consistent) search(key uint32) (i int) {
- f := func(x int) bool {
- log.Println("i",i)
- // 拿不到相等的
- return c.sortedHashes[x] > key
- }
- i = sort.Search(len(c.sortedHashes), f)
- log.Println("I:",i)
- if i >= len(c.sortedHashes) {
- i = 0
- }
- return
- }
- // GetTwo returns the two closest distinct elements to the name input in the circle.
- func (c *Consistent) GetTwo(name string) (string, string, error) {
- c.RLock()
- defer c.RUnlock()
- if len(c.circle) == 0 {
- return "", "", ErrEmptyCircle
- }
- //得到hashesw 值
- key := c.hashKey(name)
- //搜索hashes
- i := c.search(key)
- //获取值
- a := c.circle[c.sortedHashes[i]]
- //如果节点只有一个时,直接返回
- if c.count == 1 {
- return a, "", nil
- }
- start := i
- var b string
- for i = start + 1; i != start; i++ {
- if i >= len(c.sortedHashes) {
- i = 0
- }
- b = c.circle[c.sortedHashes[i]]
- //两个时候否为相同的节点,不是就返回
- if b != a {
- break
- }
- }
- return a, b, nil
- }
- // GetN returns the N closest distinct elements to the name input in the circle.
- func (c *Consistent) GetN(name string, n int) ([]string, error) {
- c.RLock()
- defer c.RUnlock()
- if len(c.circle) == 0 {
- return nil, ErrEmptyCircle
- }
- if c.count < int64(n) {
- n = int(c.count)
- }
- var (
- key = c.hashKey(name)
- i = c.search(key)
- start = i
- res = make([]string, 0, n)
- elem = c.circle[c.sortedHashes[i]]
- )
- res = append(res, elem)
- if len(res) == n {
- return res, nil
- }
- for i = start + 1; i != start; i++ {
- if i >= len(c.sortedHashes) {
- i = 0
- }
- elem = c.circle[c.sortedHashes[i]]
- if !sliceContainsMember(res, elem) {
- res = append(res, elem)
- }
- if len(res) == n {
- break
- }
- }
- return res, nil
- }
- func (c *Consistent) hashKey(key string) uint32 {
- //
- log.Println("key string:",key)
- if len(key) < 64 {
- var scratch [64]byte
- copy(scratch[:], key)
- //log.Fatal(len(key), scratch)
- return crc32.ChecksumIEEE(scratch[:len(key)])
- }
- return crc32.ChecksumIEEE([]byte(key))
- }
- // 对hash 进行排序
- func (c *Consistent) updateSortedHashes() {
- hashes := c.sortedHashes[:0]
- //reallocate if we're holding on to too much (1/4th)
- //log.Fatal("exit test:",cap(c.sortedHashes))
- if cap(c.sortedHashes)/(c.NumberOfReplicas*4) > len(c.circle) {
- hashes = nil
- }
- for k := range c.circle {
- hashes = append(hashes, k)
- log.Println(k)
- }
- sort.Sort(hashes)
- c.sortedHashes = hashes
- log.Println("tem hashes size :",len(hashes),len(c.sortedHashes))
- }
- func sliceContainsMember(set []string, member string) bool {
- for _, m := range set {
- if m == member {
- return true
- }
- }
- return false
- }
- func main() {
- c := New()
- //fmt.Printf("%T", D)
- c.Add("redis-1")
- c.Add("redis-2")
- c.Add("redis-3")
- log.Fatal(c.GetN("redis-2",1))
- v, ok := c.Get("redis-one")
- if ok == nil {
- for i, vv := range v {
- fmt.Println(i, vv)
- }
- }
- log.Println("members size:",len(c.members),"\tcircle size :",len(c.circle),"sortHashes:",len(c.sortedHashes),"scratch:",c.scratch)
- log.Println("sortHashes value:",c.sortedHashes)
- //log.Fatal("...")
- }
其中有几点不是很理解,scratch 这个东西好像没用到,还有就是在计算虚拟节点时,他是使用'>'来计算的,假设我们设置一个节点Redis,那满默认回事redis|1,redis|2..,这样进行节点分布,如果获取redis时,使用redis|1进行搜索,搜索出来就不是redis|1这个虚拟节点了,可能是其他节点。还有在求近距离节点是它是按升排序进行搜索的,而不考虑左右这个方式找最近节点。
- 1 type Consistent struct {
- 2 »···circle map[uint32]string // 用来存储node(string) 和 vnode的对应关系, vnode 是一个hash出来的uint32的整数,也就是最大分区数为4294967296
- 3 »···members map[string]bool // string 为 node, bool表示实体节点是否存活
- 4 »···sortedHashes uints // 已经排好序的hashes slice , 主要有力搜索 (存储的内容是全部vnode hashes值)
- 5 »···NumberOfReplicas int // node 的权重, 也就是node对应的vnode的个数
- 6 »···count int64 // 物理节点
- 7 »···scratch [64]byte
- 8 »···sync.RWMutex
- 9 }
这种一致性hash和 Dynamo算法的一致性hash是有很大区别的,这种hash排序不是全有序的;
测试例子:
- func main() {
- c := New()
- c.Set([]string{"redisA", "redisB"})
- fmt.Println(c.NumberOfReplicas)
- fmt.Println(c.Members())
- for k, v := range c.sortedHashes {
- fmt.Println(k, c.circle[v])
- }
- }
输出:
- ▶ go run consistent.go
- 20
- [redisB redisA]
- 0 redisA
- 1 redisB
- 2 redisA
- 3 redisB
- 4 redisA
- 5 redisB
- 6 redisA
- 7 redisB
- 8 redisA
- 9 redisA
- 10 redisB
- 11 redisA
- 12 redisA
- 13 redisB
- 14 redisA
- 15 redisB
- 16 redisB
- 17 redisA
- 18 redisB
- 19 redisB
- 20 redisA
- 21 redisB
- 22 redisA
- 23 redisB
- 24 redisA
- 25 redisB
- 26 redisA
- 27 redisB
- 28 redisA
- 29 redisB
- 30 redisB
- 31 redisA
- 32 redisB
- 33 redisB
- 34 redisA
- 35 redisA
- 36 redisB
- 37 redisA
- 38 redisA
- 39 redisB
31 A -> 32B -> 33B ,如果是Dynamo,那么应该是31A -> 32B -> 33A这样循环下去,所以如果想使用这种一致性hash算法来做备份容灾,是不行的。
golang consistent hash 菜鸟分析的更多相关文章
- consistent hash(一致性哈希算法)
一.产生背景 今天咱不去长篇大论特别详细地讲解consistent hash,我争取用最轻松的方式告诉你consistent hash算法是什么,如果需要深入,Google一下~. 举个栗子吧: 比如 ...
- 一文了解 Consistent Hash
本文首发于 vivo互联网技术 微信公众号 链接:https://mp.weixin.qq.com/s/LGLqEOlGExKob8xEXXWckQ作者:钱幸川 在分布式环境下面,我们经常会通过一定的 ...
- golang 性能调优分析工具 pprof(下)
golang 性能调优分析工具 pprof(上)篇, 这是下篇. 四.net/http/pprof 4.1 代码例子 1 go version go1.13.9 把上面的程序例子稍微改动下,命名为 d ...
- Nginx的负载均衡 - 一致性哈希 (Consistent Hash)
Nginx版本:1.9.1 我的博客:http://blog.csdn.net/zhangskd 算法介绍 当后端是缓存服务器时,经常使用一致性哈希算法来进行负载均衡. 使用一致性哈希的好处在于,增减 ...
- 一致性Hash算法(Consistent Hash)
分布式算法 在做服务器负载均衡时候可供选择的负载均衡的算法有很多,包括: 轮循算法(Round Robin).哈希算法(HASH).最少连接算法(Least Connection).响应速度算法(Re ...
- golang thrift 源码分析,服务器和客户端究竟是如何工作的
首先编写thrift文件(rpcserver.thrift),运行thrift --gen go rpcserver.thrift,生成代码 namespace go rpc service RpcS ...
- golang (5) http 请求分析
http 分析包分析 fmt.Println("get Content-Type: ", r.Header.Get("Content-Type")) var r ...
- oralce之 10046对Hash Join分析
前两天解决了一个优化SQL的case,SQL语句如下,big_table为150G大小,small_table很小,9000多条记录,不到1M大小,hash_area_size, sort_area_ ...
- 【go】继续go go go,ubuntu环境搭建及golang的依赖关系分析
这次是在ubuntu14.04 amd64上搭建go的编译环境,使用的IDE换成了sublime text,具体步骤参照的是 http://blog.csdn.net/aqiang912/articl ...
随机推荐
- Python-学习-import语句导入模块
简单的学习一下调用外部的模块文件. 在Python中,模块是一种组织形式,它将彼此有关系的Pyrhon 代码组织到一个个独立的文件当中,模块可以包含可执行代码,函数,和类或者是这些东西的组合. 当我们 ...
- C++学习008-delete与delete[]的差别
对于简单的数据类型,delete与delete[]是没啥差别的,就是等价的 例如 int main() { int *pdata = new int[20]; delete pdata; //dele ...
- 孤荷凌寒自学python第七十五天开始写Python的第一个爬虫5
孤荷凌寒自学python第七十五天开始写Python的第一个爬虫5 (完整学习过程屏幕记录视频地址在文末) 今天在上一天的基础上继续完成对我的第一个代码程序的书写. 直接上代码.详细过程见文末屏幕录像 ...
- ubuntu中执行truffle build出现问题
进行build之前,采用默认构建器方式创建客户端,先安装默认构建器: npm install truffle-default-builder --save 然后需要修改truffle.js配置文件如下 ...
- Introduction to TCP/IP
目录 First Week DHCP 子网掩码 ip路由表 Second Week ipv4 ipv6 TCP和UDP Third Week NAT RPC FTP E-mail Fouth Week ...
- java计算两个日期之间的相隔天数
[原创] 之前在很多竞赛的题目中有这样饿一类题,计算给定两个日期之间的相隔天数,或者很多类似的题目,都是需要转化到这个问题上时,之前用c++写的时候思想是这样的,一个结构体,包括年月日,还有一个计算下 ...
- const在c/c++中的区别
#include <iostream> using namespace std; int main() { ; ; }; ; i < sizeof array / sizeof *a ...
- Hash表 算法的详细解析
http://xingyunbaijunwei.blog.163.com/blog/static/76538067201111494524190/ 什么是HashHash,一般翻译做“散列”,也有直接 ...
- 正则awk和查看文件行数
[root@WebServer aa]# cat oldboy.txt I am oldboy myqq is 49000448[root@WebServer aa]# cat oldboy.txt ...
- 使用emit发出信号
1. 信号声明 在发送信号的模块类头文件中声明信号函数 signals: void sendRate(QString rate); 2. 在发送模块的成员函数中发出信号 emit sendRate(u ...