redis 字典

前言

借鉴了 黄健宏 的 <<Redis 设计与实现>> 一书, 对 redis 源码进行学习

欢迎大家给予意见, 互相沟通学习

概述

字典是一种用于存储键值对的抽象数据结构

redis 字典使用哈希表作为底层实现

字典结构

定义位置 (src/dict.h)

dict 结构

// 字典

typedef struct dict {

    // 字典类型所使用的操作函数集合

    dictType *type;

    // 私有数据

    void *privdata;

    // 哈希表

    dictht ht[2];

    // rehash 索引

    // 当 rehash 不在进行时, 值为 -1

    int rehashidx; /* rehashing not in progress if rehashidx == -1 */

    // 目前正在运行的安全迭代器的数量

    int iterators; /* number of iterators currently running */

} dict;

// 字典类型所使用的操作函数集合

typedef struct dictType {

    // 计算哈希值的函数

    unsigned int (*hashFunction)(const void *key);

    // 复制键的函数

    void *(*keyDup)(void *privdata, const void *key);

    // 复制值的函数

    void *(*valDup)(void *privdata, const void *obj);

    // 对比键的函数

    int (*keyCompare)(void *privdata, const void *key1, const void *key2);

    // 销毁键的函数

    void (*keyDestructor)(void *privdata, void *key);

    // 销毁值的函数

    void (*valDestructor)(void *privdata, void *obj);

} dictType;

dictht 结构

/*

 * 哈希表

 * 每个字典都使用两个哈希表，从而实现渐进式 rehash

 */

typedef struct dictht {

    // 哈希表数组

    dictEntry **table;

    // 哈希表大小 (哈希桶的数量)

    unsigned long size;

    // 哈希表大小掩码, 用于计算索引值

    // 总是等于 size - 1

    unsigned long sizemask;

    // 该哈希表已有节点的数量 (键值对数量)

    unsigned long used;

} dictht;

dictht 在 dict 结构中存在着2个 (dict 的 ht 属性)
ht[0] 是旧表, ht[1] 个是新表
ht[1] 新表只在 rehash 的时候使用

dictEntry 结构

// 哈希表节点

typedef struct dictEntry {

    // 键

    void *key;

    // 值

    union {

        void *val;

        uint64_t u64;

        int64_t s64;

    } v;

    // 指向下个哈希表节点, 形成链表, 链接法解决冲突

    struct dictEntry *next;

} dictEntry;

哈希算法

定义

存储键值对时, 根据键计算出哈希值, 进而计算出索引位置, 将键值对存储到索引位置上

hash = dict->type->hashFunction(key)
index = hash & dict->ht[x]->sizemask

redis 使用 MurmurHash2 哈希算法

键冲突

不同的 key 用同一哈希算法时, 索引位置可能会相同, 造成键冲突

redis 使用 链接法 解决键冲突, 即索引位置相同的时候, 该位置存储为一个链表, 冲突的节点作为链表节点

注意: 插入冲突节点链表的顺序为, 从链表头部插入

hash seed

为了保证哈希算法计算出的散列值均匀分布, 加入的参数

static uint32_t dict_hash_function_seed = 5381;

字典哈希结构中不存在相同 key 的键值对

dictEntry *dictAddRaw(dict *d, void *key)

{

	// 省略

	// 若指定的 key 在字典中已存在, 在添加操作时直接返回 NULL

    if ((index = _dictKeyIndex(d, key)) == -1)

        return NULL;

	// 省略

}

rehash

定义

哈希表的索引位置是有限的, 随着操作的不断进行, 键冲突的情况会越来越多, 查询效率会逐渐降低, 为了让哈希表的负载因子维持在一个合理的范围内, 当哈希表保存的键值对太多或太少时, 会对哈希表进行扩展和收缩, 这个过程称为 rehash

负载因子
- 哈希表存储的节点总数 / 哈希桶数量
- 阈值为5: static unsigned int dict_force_resize_ratio = 5;

rehash 开关

// 指示字典是否启用 rehash 的标识

static int dict_can_resize = 1;

字典在 rehash 期间, 不能调整大小

字典在 rehash 开关关闭时, 不能调整大小

// 调整字典大小

int dictResize(dict *d)

{

    int minimal;

    // 不能在关闭 rehash 或者正在 rehash 的时候调用

    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;

    // 计算让比率接近 1：1 所需要的最少节点数量

    minimal = d->ht[0].used;

    if (minimal < DICT_HT_INITIAL_SIZE)

        minimal = DICT_HT_INITIAL_SIZE;

    // 调整字典的大小

    return dictExpand(d, minimal);

}

DICT_HT_INITIAL_SIZE: 哈希表初始大小
- ```
#define DICT_HT_INITIAL_SIZE     4
```

字典扩展 (设置 rehashidx = 0, 可以开始 rehash)

// 字典扩展

int dictExpand(dict *d, unsigned long size)

{

    // 新哈希表

    dictht n;

    // 根据 size 参数, 计算所需调整到的大小

    unsigned long realsize = _dictNextPower(size);

    /*

    * 不能再 rehashing 时对字典调整大小

    * 要调整到的 size 值不能小于目前旧表中已用的大小 d->ht[0].used

    */

    if (dictIsRehashing(d) || d->ht[0].used > size)

        return DICT_ERR;

    // 为新表参数赋初始值

    n.size = realsize;

    n.sizemask = realsize-1;

    n.table = zcalloc(realsize*sizeof(dictEntry*));

    n.used = 0;

	// 若旧表数据为空, 则将新表作为旧表

    if (d->ht[0].table == NULL) {

        d->ht[0] = n;

        return DICT_OK;

    }

	/*

	* 若旧表数据非空

	* 将新创建的表作为新表 ht[1]

	* 设置字典的 rehashidx = 0, 使程序可以开始 rehash

	*/

    d->ht[1] = n;

    d->rehashidx = 0;

    return DICT_OK;

}

计算 rehash 的表的大小

// 计算第一个大于等于 size 的2的n次方的值, 作为哈希表的大小

static unsigned long _dictNextPower(unsigned long size)

{

    unsigned long i = DICT_HT_INITIAL_SIZE;

    if (size >= LONG_MAX) return LONG_MAX;

    while(1) {

        if (i >= size)

            return i;

        i *= 2;

    }

}

字典 rehash 操作

int dictRehash(dict *d, int n) {

    // 只可以在 rehash 进行中时执行

    if (!dictIsRehashing(d)) return 0;

    // 进行 n 步迁移

    while(n--) {

        dictEntry *de, *nextde;

        /*

        * 若旧表节点数为 0

        * 代表数据已经全部迁移完毕

        * 将新表设置为旧表

        * 重置新表参数

        * 关闭 rehash (设置 d->rehashidx = -1)

        */

        if (d->ht[0].used == 0) {

            zfree(d->ht[0].table);

            d->ht[0] = d->ht[1];

            _dictReset(&d->ht[1]);

            d->rehashidx = -1;

            return 0;

        }

        // 断言 rehashidx 没有越界

        assert(d->ht[0].size > (unsigned)d->rehashidx);

        // 遇到空的哈希桶, 跳过, 将 rehash 进度加1, 指向下个哈希桶

        while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;

        // 获取指定位置的哈希桶

        de = d->ht[0].table[d->rehashidx];

        /*

        * 将哈希桶中的数据迁移到新哈希表

        * 哈希桶是个 list 结构

        */

        while(de) {

            unsigned int h;

            // 保存下个节点的指针

            nextde = de->next;

            // 计算新哈希表的哈希值，以及节点插入的索引位置

            h = dictHashKey(d, de->key) & d->ht[1].sizemask;

            // 将节点插入到新表的哈希桶表头

            de->next = d->ht[1].table[h];

            d->ht[1].table[h] = de;

            // 更新计数器

            d->ht[0].used--;

            d->ht[1].used++;

            // 继续处理下个节点

            de = nextde;

        }

        // 将刚迁移完的哈希表索引的指针设为空

        d->ht[0].table[d->rehashidx] = NULL;

        // 更新 rehash 索引

        d->rehashidx++;

    }

    return 1;

}

所谓 rehash 操作, 就是将旧表中的数据依据新表的大小重新进行 hash 计算, 放入新表中, 最终全部数据迁移完毕后, 将新表作为旧表, rehash 结束

渐进式 rehash

redis 的 rehash 操作不是集中式的, 一次性完成的, 而是分散到了多个操作当中
之所以用渐进式的方式, 是考虑到若字典中的数据过多, rehash 耗费时间过多, 会造成此期间 redis 不可用
渐进式的操作 (增删改查均会触发)
dictAddRaw (dictAdd, dictReplace均会调用)

dictGenericDelete

dictFind

dictGetRandomKey

迭代器

redis 的迭代器用于遍历字典, 分为安全迭代器与非安全迭代器
指纹生成
dictFingerprint 函数用于生成指纹
安全迭代器与非安全迭代器的区别
在迭代器释放的时候, 会检测指纹是否发生变化, 若发生变化, 则会程序报错

这就决定了非安全迭代器只能对哈希表进行查操作, 否则数据发生变化, 指纹就会改变

dict api (src/dict.c)

函数	作用	备注
dictIntHashFunction	哈希算法, 计算哈希值	unsigned int dictIntHashFunction(unsigned int key)
dictIdentityHashFunction	直接使用 key 作为哈希值	unsigned int dictIdentityHashFunction(unsigned int key)
dictSetHashFunctionSeed	设置哈希种子 hash seed	void dictSetHashFunctionSeed(uint32_t seed)
dictGetHashFunctionSeed	获取哈希种子 hash seed	uint32_t dictGetHashFunctionSeed(void)
dictGenHashFunction	MurmurHash2 哈希算法	unsigned int dictGenHashFunction(const void *key, int len)
dictGenCaseHashFunction	哈希算法	unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len)
_dictReset	重置哈希表	static void _dictReset(dictht *ht)
dictCreate	创建一个新字典	dict dictCreate(dictType type, void *privDataPtr)
_dictInit	初始化字典数据	int _dictInit(dict d, dictType type, void *privDataPtr)
dictResize	调整字典大小	int dictResize(dict *d)
dictExpand	根据 size 调整字典大小	int dictExpand(dict *d, unsigned long size)
dictRehash	对指定的字典 d, 进行 n 步 rehash	int dictRehash(dict *d, int n)
timeInMilliseconds	返回毫秒为单位的 unix 时间戳	long long timeInMilliseconds(void)
dictRehashMilliseconds	在给定的毫秒内, 以100 步为单位, 进行rehash	int dictRehashMilliseconds(dict *d, int ms)
_dictRehashStep	单步 rehash	static void _dictRehashStep(dict *d)
dictAdd	将给定的键值对添加到字典中	int dictAdd(dict d, void key, void *val)
dictAddRaw	根据指定的 key, 创建新的哈希节点	dictEntry dictAddRaw(dict d, void *key)
dictReplace	将给定的键值对存入字典中, 若 key 不存在, 则新增; 若 key 存在, 则更新数据	int dictReplace(dict d, void key, void *val)
dictReplaceRaw	创建给定 key 的哈希节点, 若 key 不存在, 则新增; 若 key 存在, 则直接返回	dictEntry dictReplaceRaw(dict d, void *key)
dictGenericDelete	删除字典中指定 key 的节点, nofree 参数为0时, 代表同时调用键和值的 free 函数	static int dictGenericDelete(dict d, const void key, int nofree)
dictDelete	删除字典中指定 key 的节点, 同时释放键和值	int dictDelete(dict ht, const void key)
dictDeleteNoFree	删除字典中指定 key 的节点, 不释放键和值	int dictDeleteNoFree(dict ht, const void key)
_dictClear	删除指定字典的指定哈希表 ht 的所有节点, 并重置哈希表属性	int _dictClear(dict d, dictht ht, void(callback)(void *))
dictRelease	删除并释放指定字典	void dictRelease(dict *d)
dictFind	返回字典中指定 key 的节点	dictEntry dictFind(dict d, const void *key)
dictFetchValue	获取字典中指定 key 的值	void dictFetchValue(dict d, const void *key)
dictFingerprint	指纹生成	long long dictFingerprint(dict *d)
dictGetIterator	创建并返回指定字典的非安全迭代器	dictIterator dictGetIterator(dict d)
dictGetSafeIterator	创建并返回给定字典的安全迭代器	dictIterator dictGetSafeIterator(dict d)
dictNext	返回迭代器指向的当前节点	dictEntry dictNext(dictIterator iter)
dictReleaseIterator	释放迭代器	void dictReleaseIterator(dictIterator *iter)
dictGetRandomKey	随机返回字典中的任意节点	dictEntry dictGetRandomKey(dict d)
dictGetRandomKeys	随机获取字典中指定 count 个数的节点	int dictGetRandomKeys(dict d, dictEntry *des, int count)
rev	翻转 bit 位	static unsigned long rev(unsigned long v)
dictScan	字典扫描函数	unsigned long dictScan(dict d, unsigned long v, dictScanFunction fn, void *privdata)
_dictExpandIfNeeded	根据需要, 对字典进行扩展	static int _dictExpandIfNeeded(dict *d)
_dictNextPower	计算一个大于等于给定 size 的2的n次方的值, 作为哈希表的大小	static unsigned long _dictNextPower(unsigned long size)
_dictKeyIndex
dictEmpty
dictEnableResize
dictDisableResize

redis 字典的更多相关文章

Redis 字典的实现
[Redis 字典的实现] 注意 dict 类型使用了两个指针,分别指向两个哈希表. 其中, 0 号哈希表(ht[0])是字典主要使用的哈希表, 而 1 号哈希表(ht[1])则只有在程序对 0 号哈 ...
阿里面试官：HashMap 熟悉吧？好的，那就来聊聊 Redis 字典吧！
最近,小黑哥的一个朋友出去面试,回来跟小黑哥抱怨,面试官不按套路出牌,直接打乱了他的节奏. 事情是这样的,前面面试问了几个 Java 的相关问题,我朋友回答还不错,接下来面试官就问了一句:看来 Jav ...
Redis 字典结构细谈
Redis 字典底层基于哈希表实现. 一.哈希表结构 1.dictht: typedef struct dictht { dictEntry **table; //哈希表数组,存储具体的键值对元素,对 ...
REDIS 字典数据结构
对于REDIS来讲其实就是一个字典结构,key ---->value 就是一个典型的字典结构 [当然对于vaule来讲的话,有不同的内存组织结构这是后话] 试想一个这样的存储场景: ...
redis字典的底层实现hashTable
Redis的字典使用哈希表作为底层实现.一个哈希表里面可以有多个哈希表节点,而每个哈希表节点就保存了字典中的一个键值对哈希表的数据结构为 table属性是一个数组,数组中的每个元素都是指向dictE ...
《闲扯Redis七》Redis字典结构的底层实现
一.前言上节<闲扯Redis六>Redis五种数据类型之Hash型中说到 Hash(哈希对象)的底层实现有: 1.ziplist 编码的哈希对象使用压缩列表作为底层实现 2.hasht ...
《闲扯Redis八》Redis字典的哈希表执行Rehash过程分析
一.前言随着操作的不断执行, 哈希表保存的键值对会逐渐地增多或者减少, 为了让哈希表的负载因子(load factor)维持在一个合理的范围之内, 当哈希表保存的键值对数量太多或者太少时, 程序需要 ...
redis字典
字典作为一种保存键值对的数据结构,在redis中使用十分广泛,redis作为数据库本身底层就是通过字典实现的,对redis的增删改查实际上也是构建在字典之上. 一.字典的结构
redis字典快速映射+hash釜底抽薪+渐进式rehash | redis为什么那么快
前言相信你一定使用过新华字典吧!小时候不会读的字都是通过字典去查找的.在Redis中也存在相同功能叫做字典又称为符号表!是一种保存键值对的抽象数据结构本篇仍然定位在[redis前传]系列中,因为本 ...

随机推荐

Javaweb之Servlet入门
1. 什么是Servlet? Java Servlet 是运行在 Web 服务器或应用服务器上的程序:他是浏览器(HTTP客户端)请求和HTTP服务器上资源(访问数据库)之间的中间层. 2. 什么是S ...
1635: [Usaco2007 Jan]Tallest Cow 最高的牛
1635: [Usaco2007 Jan]Tallest Cow 最高的牛 Time Limit: 5 Sec Memory Limit: 64 MBSubmit: 383 Solved: 211 ...
python 语法笔记（一）
#python3里面input默认接收到的事str类型,而python2里面却认为是int类型n=int(input('请输入想要第几个数')) #如果将int省去,python3中该程序将会报错a, ...
去除IOS浏览器下面的工具栏
在head标签里添加下面的元素即可 <meta id="viewport" name="viewport" content="width=de ...
前端学PHP之自定义模板引擎
前面的话在大多数的项目组中,开发一个Web程序都会出现这样的流程:计划文档提交之后,前端工程师制作了网站的外观模型,然后把它交给后端工程师,它们使用后端代码实现程序逻辑,同时使用外观模型做成基本架构 ...
[cookie篇]从cookie-parser中间件说起
当我们在写web的时候,难免会要使用到cookie,由于node.js有了express这个web框架,我们就可以方便地去建站.在使用express时,经常会使用到cookie-parser这个插件. ...
python遍历一个目录，输出所有文件名
python遍历一个目录,输出所有文件名 python os模块 os import os def GetFileList(dir, fileList): newDir = dir if os. ...
For循环及例题
For循环 (1)循环操作某一个功能(执行某段代码) (2)四要素循环初始值循环条件 ...
高可用系列之Nginx
1.1Keepalived高可用软件 Keepalived起初是专为LVS设计的,专门用来监控LVS集群系统中各个服务节点的状态,后来又加入了VRRP的功能,因此除了配合LVS服务外,也可以作为其他服 ...
XJOI1689相连的城市
相连的城市 n个城市中,某些城市间有道路互相连接.给出与每个城市相邻的城市有多少个,请输出城市间的邻接矩阵. 输入格式: 第一行输入一个正整数n,表示城市的个数. 第二行输入n个用空格隔开的非负整数, ...

redis 字典

redis 字典

前言

概述

字典结构

哈希算法

rehash

迭代器

dict api (src/dict.c)

redis 字典的更多相关文章

随机推荐

热门专题