Python 2.7的字典实现简化版(C语言)
这是一个能自动调整大小的哈希字典,外部接口实现了下列功能.
1.字典级别:
创建字典 dict_new
归零字典 dict_clear
2.键值级别:
查找 dict_search
强制查找 dict_force_search
更新 dict_update
添加 dict_add
删除 dict_del
所谓强制查找就是假如key不存在,那么它将先在字典中添加这个key,值设置为默认值,再返回这个值的指针.
由于键值都是以空指针定义的,所以在处理一些简单的值类型时(如int),显得繁琐了些(比如valcmp),但好处是更加灵活了,比如稍作修改(valdup和get_default_val)就可以处理值为字符串的情况.
C确实很快,但繁重的内存管理果然名不虚传.这个简单的字典要求:
1.键(me_key)和值(me_value)的指针所指向的堆内存区域能够直接用free释放,如果这些区域还包含另一些堆指针,那么可能会出问题.
2.只需传递缓冲数据(main中的keybuf和valbuf)给键值函数,函数内部会根据情况申请或释放内存,或不做任何处理.
为方便处理,words文本格式要求每行一个词语.
/* Pure C simple version of python 2.7.8 hash table */
/* Sample usage: see main() */
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <string.h>
#define PyDict_MINSIZE 8
#define PERTURB_SHIFT 5
#define NEED_RESIZE(mp) ((mp)->ma_fill * 3 >= ((mp)->ma_mask + 1) * 2) typedef void PyObject; typedef struct {
size_t me_hash;
PyObject *me_key;
PyObject *me_value;
} PyDictEntry; typedef struct _dictobject PyDictObject;
struct _dictobject {
size_t ma_fill; /* # Active + # Dummy */
size_t ma_used; /* # Active */
size_t ma_mask;
PyDictEntry *ma_table;
size_t(*ma_keyhash)(PyObject *key);
int(*ma_keycmp)(PyObject *key1, PyObject *key2);
PyObject *(*ma_keydup)(PyObject *key);
PyObject *(*ma_valuedup)(PyObject *value);
PyObject *(*ma_default)(void);
}; /* Object used as dummy key to fill deleted entries */
static PyDictEntry _dummy_struct;
#define dummy (&_dummy_struct) static size_t
keyhash(PyObject *_key)
{
char *key = (char *)_key;
size_t hash = ;
for (; *key; key++)
hash = ((hash << ) + hash) + *key; /* hash * 33 + c */
return hash;
} static int
keycmp(PyObject *_key1, PyObject *_key2)
{
char *key1 = (char *)_key1;
char *key2 = (char *)_key2;
for (; *key1 == *key2; key1++, key2++)
if (*key1 == '\0')
return ;
return *key1 - *key2;
} static PyObject *
keydup(PyObject *key)
{
return (PyObject *)strdup((char *)key);
} static PyObject *
valuedup(PyObject *_value)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = *(size_t *)_value;
return (PyObject *)value;
} static PyObject *
get_default_value(void)
{
size_t *value = (size_t *)malloc(sizeof(size_t));
*value = ;
return (PyObject *)value;
} PyDictObject *
dict_new_custom(size_t ma_size,
size_t(*ma_keyhash)(PyObject *key),
int(*ma_keycmp)(PyObject *key1, PyObject *key2),
PyObject * (*ma_keydup)(PyObject *key),
PyObject * (*ma_valuedup)(PyObject *value),
PyObject * (*ma_default)(void))
{
PyDictObject *mp;
mp = (PyDictObject *)malloc(sizeof(PyDictObject));
if (mp == NULL)
return NULL;
size_t newsize;
for (newsize = PyDict_MINSIZE;
newsize < ma_size && newsize > ;
newsize <<= )
;
PyDictEntry *newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return NULL;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
mp->ma_fill = mp->ma_used = ;
mp->ma_keyhash = ma_keyhash ? ma_keyhash : keyhash;
mp->ma_keycmp = ma_keycmp ? ma_keycmp : keycmp;
mp->ma_keydup = ma_keydup ? ma_keydup : keydup;
mp->ma_valuedup = ma_valuedup ? ma_valuedup : valuedup;
mp->ma_default = ma_default ? ma_default : get_default_value;
return mp;
} PyDictObject *
dict_new(void)
{
return dict_new_custom(, , , , , );
} /*intern basic search method, used by other fucntions*/
static PyDictEntry *
lookdict(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
PyDictEntry *freeslot;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL || ep->me_key == key)
return ep;
if (ep->me_key == dummy)
freeslot = ep;
else if (ep->me_hash == hash
&& mp->ma_keycmp(ep->me_key, key) == )
return ep;
else
freeslot = NULL;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL)
return freeslot == NULL ? ep : freeslot;
if (ep->me_key == key
|| (ep->me_hash == hash
&& ep->me_key != dummy
&& mp->ma_keycmp(ep->me_key, key) == ))
return ep;
if (ep->me_key == dummy && freeslot == NULL)
freeslot = ep;
}
assert(); /* NOT REACHED */
return ;
} /*faster method used when no dummy key exists in table*/
static PyDictEntry *
lookdict_nodummy(PyDictObject *mp, PyObject *key, size_t hash)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
for (perturb = hash;; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
if (ep->me_key == NULL
|| ep->me_key == key
|| (ep->me_hash == hash && mp->ma_keycmp(ep->me_key, key) == ))
return ep;
}
assert(); /* NOT REACHED */
return ;
} /*intern fast function to insert item when no dummy key exists in table*/
static void
insertdict_clean(PyDictObject *mp, PyObject *key, size_t hash, PyObject *value)
{
size_t i;
size_t perturb;
size_t mask = mp->ma_mask;
PyDictEntry *ep0 = mp->ma_table;
PyDictEntry *ep;
i = (size_t)hash & mask;
ep = &ep0[i];
for (perturb = hash; ep->me_key != NULL; perturb >>= PERTURB_SHIFT) {
i = (i << ) + i + perturb + ;
ep = &ep0[i & mask];
}
mp->ma_fill++;
mp->ma_used++;
ep->me_key = key;
ep->me_hash = hash;
ep->me_value = value;
} /*
Restructure the table by allocating a new table and reinserting all
items again. When entries have been deleted, the new table may
actually be smaller than the old one.
*/
static int
dict_resize(PyDictObject *mp, size_t minused)
{
size_t newsize;
PyDictEntry *oldtable, *newtable, *ep;
oldtable = mp->ma_table;
/* Find the smallest table size > minused. */
for (newsize = PyDict_MINSIZE;
newsize <= minused && newsize > ;
newsize <<= )
;
/* Get space for a new table. */
newtable = (PyDictEntry*)malloc(sizeof(PyDictEntry) * newsize);
if (newtable == NULL)
return -;
memset(newtable, , sizeof(PyDictEntry)* newsize);
mp->ma_table = newtable;
mp->ma_mask = newsize - ;
size_t used = mp->ma_used;
mp->ma_used = ;
mp->ma_fill = ;
for (ep = oldtable; used > ; ep++) {
/* only active entry */
if (ep->me_value != NULL) {
used--;
insertdict_clean(mp, ep->me_key, ep->me_hash, ep->me_value);
}
}
free(oldtable);
return ;
} PyObject *
dict_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
return ep->me_value;
} int
dict_contain(PyDictObject *mp, PyObject *key)
{
return dict_search(mp, key) ? : ;
} int
dict_add(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for non-existing keys*/
assert(ep->me_value == NULL);
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return -;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL) {
free(ep->me_key);
return -;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp))
return dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
return ;
} int
dict_update(PyDictObject *mp, PyObject *key, PyObject *value)
{
assert(key);
assert(value);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
PyObject *old_value = ep->me_value;
if ((ep->me_value = mp->ma_valuedup(value)) == NULL)
return -;
free(old_value);
return ;
} int
dict_del(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
/*only for existing keys*/
assert(ep->me_value != NULL);
free(ep->me_key);
free(ep->me_value);
ep->me_key = dummy;
ep->me_value = NULL;
mp->ma_used--;
return ;
} PyObject *
dict_force_search(PyDictObject *mp, PyObject *key)
{
assert(key);
size_t hash = mp->ma_keyhash(key);
PyDictEntry *ep = lookdict(mp, key, hash);
if (ep->me_value == NULL) {
PyObject *old_key = ep->me_key;
if ((ep->me_key = mp->ma_keydup(key)) == NULL)
return NULL;
if ((ep->me_value = mp->ma_default()) == NULL) {
free(ep->me_key);
return NULL;
}
if (old_key == NULL)
mp->ma_fill++;
mp->ma_used++;
ep->me_hash = hash;
if (NEED_RESIZE(mp)) {
dict_resize(mp, (mp->ma_used > ? : ) * mp->ma_used);
ep = lookdict_nodummy(mp, key, hash);
}
}
return ep->me_value;
} void
dict_clear(PyDictObject *mp)
{
PyDictEntry *table = mp->ma_table;
assert(table != NULL);
size_t used = mp->ma_used;
if (mp->ma_fill == )
return;
PyDictEntry *ep;
for (ep = table; used > ; ep++) {
/*only free active entry, this is different from Python 2.7*/
if (ep->me_value != NULL) {
used--;
free(ep->me_key);
free(ep->me_value);
}
}
memset(table, , sizeof(PyDictEntry) * (mp->ma_mask + ));
} size_t
dict_len(PyDictObject *mp)
{
return mp->ma_used;
} /*helper function for sorting a PyDictEntry by its value*/
static int
_valcmp(const void *a, const void *b)
{
return *(size_t *)(*(PyDictEntry *)a).me_value > *(size_t *)(*
(PyDictEntry *)b).me_value ? - : ;
} /*print key value pair by value DESC order*/
static void
print_all_by_value_desc(PyDictObject *mp)
{
PyDictEntry *ep;
PyDictEntry *temp_table = (PyDictEntry *)malloc(sizeof(PyDictEntry) *
(mp->ma_used));
size_t i = , used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value != NULL) {
used--;
temp_table[i++] = *ep;
}
}
used = mp->ma_used;
qsort(temp_table, used, sizeof(temp_table[]), _valcmp);
for (i = ; i < used; i++)
fprintf(stdout, "%s\t%d\n", (char *)temp_table[i].me_key,
*(size_t *)temp_table[i].me_value);
free(temp_table);
} void printd(PyDictObject *mp)
{
PyDictEntry *ep;
size_t used = mp->ma_used;
for (ep = mp->ma_table; used > ; ep++) {
if (ep->me_value) {
used--;
fprintf(stdout, "%s\t%d\t%u\n", (char *)ep->me_key, *(size_t *)ep->me_value,
ep->me_hash);
} else if (ep->me_key == dummy) {
fprintf(stdout, "it is a dummy key! it's hash is %u\n", ep->me_hash);
}
}
} /*scan words from stdin, print total amount for each word by DESC order*/
int main(void)
{
//PyDictObject *mp = dict_new_custom(32, 0, 0, 0, 0, 0);
PyDictObject *mp = dict_new();
FILE *fp;
fp = fopen("words", "r");
char keybuf[];
size_t valuebuf[] = { };
size_t *vp;
/* while (fscanf(stdin, "%s", keybuf) == 1) {
if (dict_contain(mp, keybuf)) {
vp = dict_search(mp, keybuf);
*vp += 1;
} else
dict_add(mp, keybuf, valuebuf);
}*/
while (fscanf(fp, "%s", keybuf) == ) {
vp = dict_force_search(mp, keybuf);
*vp += ;
} print_all_by_value_desc(mp);
//printd(mp);
dict_clear(mp);
fclose(fp);
free(mp);
return ;
}
Python 2.7的字典实现简化版(C语言)的更多相关文章
- 『Python基础-10』字典
# 『Python基础-10』字典 目录: 1.字典基本概念 2.字典键(key)的特性 3.字典的创建 4-7.字典的增删改查 8.遍历字典 1. 字典的基本概念 字典一种key - value 的 ...
- Python 优雅的操作字典【转】
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 1.1 创建字典 Python有两种方法可以创建字典,第一种是使用花括号,另一种是使用内建 函数dict ...
- 初学Python(三)——字典
初学Python(三)——字典 初学Python,主要整理一些学习到的知识点,这次是字典. #-*- coding:utf-8 -*- d = {1:"name",2:" ...
- python编程基础知识—字典
字典 在python中,字典是一系列键-值对,每个键都与一个值相关联,可使用键来访问相关联的值.与键相关联的值可以是数字.字符串.列表乃至字典,即可将任何python对象用在字典中的值. 在pytho ...
- python调用数据返回字典dict数据的现象2
python调用数据返回字典dict数据的现象2 思考: 话题1连接:https://www.cnblogs.com/zwgbk/p/10248479.html在打印和添加时候加上内存地址id(),可 ...
- python调用数据返回字典dict数据的现象1
python调用数据返回字典dict数据的现象1 思考: 可以看到这两种情况,区别在于构造函数make()里赋值给字典dict的方式不同.使用相同的调用方式,而结果却完全不同.可以看到第二种情况才是我 ...
- python基本数据类型之字典
python基本数据类型之字典 python中的字典是以键(key)值(value)对的形式储存数据,基本形式如下: d = {'Bart': 95, 'Michael': 34, 'Lisa': 5 ...
- Python 优雅的操作字典
Python 中的字典是Python中一个键值映射的数据结构,下面介绍一下如何优雅的操作字典. 来源:https://www.linuxzen.com/python-you-ya-de-cao-zuo ...
- Python数据类型详解——字典
Python数据类型详解--字典 引子 已经学习了列表,现在有个需求--把公司每个员工的姓名.年龄.职务.工资存到列表里,你怎么存? staff_list = [ ["Kwan", ...
随机推荐
- kafka知识体系-kafka设计和原理分析
kafka设计和原理分析 kafka在1.0版本以前,官方主要定义为分布式多分区多副本的消息队列,而1.0后定义为分布式流处理平台,就是说处理传递消息外,kafka还能进行流式计算,类似Strom和S ...
- Mlecms 反射型xss && 后台任意文件下载
应该算0day吧,自己分析出来的,有点鸡肋,不过小cms分析确实比较简单. xss地址:search.php?word=a><img+src=1+onerror=alert`1`>a ...
- Fetching data with Ajax小例子
ajax获取数据示例: 示例1 通过ajax获取txt文件里面的内容示例: <html> <head> <title>Ajax at work</title& ...
- [BJOI 2010]次小生成树Tree
Description 小 C 最近学了很多最小生成树的算法,Prim 算法.Kurskal 算法.消圈算法等等. 正当小 C 洋洋得意之时,小 P 又来泼小 C 冷水了.小 P 说,让小 C 求出一 ...
- [测试题]幸运序列(lucky)
Description Ly喜欢幸运数字,众所周知,幸运数字就是数字位上只有4和7的数字. 但是本题的幸运序列和幸运数字完全没关系,就是一个非常非常普通的序列.哈哈,是不是感觉被耍了,没错,你就是被耍 ...
- [HNOI2012]双十字
题目描述 在C 部落,双十字是非常重要的一个部落标志.所谓双十字,如下面两个例子,由两条水平的和一条竖直的”1“线段组成,要求满足以下几个限制: ![] 我们可以找到 5 个满足条件的双十字,分别如下 ...
- hdu 3433 A Task Process 二分+dp
A Task Process Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 32768/32768 K (Java/Others) T ...
- bzoj4011[HNOI2015]落忆枫音 dp+容斥(?)
4011: [HNOI2015]落忆枫音 Time Limit: 10 Sec Memory Limit: 512 MBSubmit: 1125 Solved: 603[Submit][Statu ...
- 根据构建类型自动修改依赖库的BuildConfig.DEBUG的值
app模块引用了library,在library模块中控制日志输出使用的是 if (BuildConfig.DEBUG) { logger.d("print %s", msg); ...
- Python的IO编程
原文传送门:请点击 原文传送门:请点击