Squid--hash代码分析

#ifndef SQUID_HASH_H

#define SQUID_HASH_H

//几个函数和变量的别名

typedef void HASHFREE(void *);

typedef int HASHCMP(const void *, const void *);

typedef unsigned int HASHHASH(const void *, unsigned int);

typedef struct _hash_link hash_link;

typedef struct _hash_table hash_table;

//每个hash节点的数据结构

struct _hash_link {

    void *key;

    hash_link *next;

};

//hash表的数据结构

struct _hash_table {

    hash_link **buckets;    //存储hash节点（hash_link）地址的桶链表

    HASHCMP *cmp;           //hash比较函数

    HASHHASH *hash;         //获取hash值函数

    unsigned int size;      //buckets桶链表的大小

    unsigned int current_slot;  //指向当前的桶

    hash_link *next;        //指向下一个桶（相对于current_slot的下一个桶）

    int count;      //hash_table中已经存储的hash节点（hash_link）的数目

};

SQUIDCEXTERN hash_table *hash_create(HASHCMP *, int, HASHHASH *);

SQUIDCEXTERN void hash_join(hash_table *, hash_link *);

SQUIDCEXTERN void hash_remove_link(hash_table *, hash_link *);

SQUIDCEXTERN int hashPrime(int n);

SQUIDCEXTERN hash_link *hash_lookup(hash_table *, const void *);

SQUIDCEXTERN void hash_first(hash_table *);

SQUIDCEXTERN hash_link *hash_next(hash_table *);

SQUIDCEXTERN void hash_last(hash_table *);

SQUIDCEXTERN hash_link *hash_get_bucket(hash_table *, unsigned int);

SQUIDCEXTERN void hashFreeMemory(hash_table *);

SQUIDCEXTERN void hashFreeItems(hash_table *, HASHFREE *);

SQUIDCEXTERN HASHHASH hash_string;

SQUIDCEXTERN HASHHASH hash4;

SQUIDCEXTERN const char *hashKeyStr(hash_link *);

/*  squid建议的hansh素数

 *  Here are some good prime number choices.  It's important not to

 *  choose a prime number that is too close to exact powers of 2.

 *

 *  HASH_SIZE 103               // prime number < 128

 *  HASH_SIZE 229               // prime number < 256

 *  HASH_SIZE 467               // prime number < 512

 *  HASH_SIZE 977               // prime number < 1024

 *  HASH_SIZE 1979              // prime number < 2048

 *  HASH_SIZE 4019              // prime number < 4096

 *  HASH_SIZE 6037              // prime number < 6144

 *  HASH_SIZE 7951              // prime number < 8192

 *  HASH_SIZE 12149             // prime number < 12288

 *  HASH_SIZE 16231             // prime number < 16384

 *  HASH_SIZE 33493             // prime number < 32768

 *  HASH_SIZE 65357             // prime number < 65536

 */

//默认的桶链表大小

#define  DEFAULT_HASH_SIZE 7951 /* prime number < 8192 */

#endif /* SQUID_HASH_H */

/*

 * DEBUG: section 00    Hash Tables

 * AUTHOR: Harvest Derived

 *

 * SQUID Web Proxy Cache          http://www.squid-cache.org/

 * ----------------------------------------------------------

 *

 *  Squid is the result of efforts by numerous individuals from

 *  the Internet community; see the CONTRIBUTORS file for full

 *  details.   Many organizations have provided support for Squid's

 *  development; see the SPONSORS file for full details.  Squid is

 *  Copyrighted (C) 2001 by the Regents of the University of

 *  California; see the COPYRIGHT file for full details.  Squid

 *  incorporates software developed and/or copyrighted by other

 *  sources; see the CREDITS file for full details.

 *

 *  This program is free software; you can redistribute it and/or modify

 *  it under the terms of the GNU General Public License as published by

 *  the Free Software Foundation; either version 2 of the License, or

 *  (at your option) any later version.

 *

 *  This program is distributed in the hope that it will be useful,

 *  but WITHOUT ANY WARRANTY; without even the implied warranty of

 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the

 *  GNU General Public License for more details.

 *

 *  You should have received a copy of the GNU General Public License

 *  along with this program; if not, write to the Free Software

 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.

 *

 */

#include "squid.h"

#include "hash.h"

#include "profiler/Profiler.h"

#if HAVE_STDIO_H

#include <stdio.h>

#endif

#if HAVE_STDLIB_H

#include <stdlib.h>

#endif

#if HAVE_STRING_H

#include <string.h>

#endif

#if HAVE_UNISTD_H

#include <unistd.h>

#endif

#if HAVE_GNUMALLLOC_H

#include <gnumalloc.h>

#elif HAVE_MALLOC_H

#include <malloc.h>

#endif

#if HAVE_ASSERT_H

#include <assert.h>

#endif

#if HAVE_MATH_H

#include <math.h>

#endif

static void hash_next_bucket(hash_table * hid);

/*下面两个函数常用来作为第二个参数建立hash表， 也就是取hash值的操作函数*/

/*第二个更适合于操作字符串，第一个可以是二进制数*/

unsigned int

hash_string(const void *data, unsigned int size)

{

    const unsigned char *s = static_cast<const unsigned char *>(data);

    unsigned int n = 0;

    unsigned int j = 0;

    unsigned int i = 0;

    while (*s) {

        ++j;

        n ^= 271 * *s;	//^按位异或运算

        ++s;

    }

    i = n ^ (j * 271);

    return i % size;

}

/* the following function(s) were adapted from

 *    usr/src/lib/libc/db/hash_func.c, 4.4 BSD lite */

/* Hash function from Chris Torek. */

unsigned int

hash4(const void *data, unsigned int size)

{

    const char *key = static_cast<const char *>(data);

    size_t loop;

    unsigned int h;

    size_t len;

#define HASH4a   h = (h << 5) - h + *key++;

#define HASH4b   h = (h << 5) + h + *key++;

#define HASH4 HASH4b

    h = 0;

    len = strlen(key);

    loop = len >> 3;

    switch (len & (8 - 1)) {

    case 0:

        break;

    case 7:

        HASH4;

        /* FALLTHROUGH */

    case 6:

        HASH4;

        /* FALLTHROUGH */

    case 5:

        HASH4;

        /* FALLTHROUGH */

    case 4:

        HASH4;

        /* FALLTHROUGH */

    case 3:

        HASH4;

        /* FALLTHROUGH */

    case 2:

        HASH4;

        /* FALLTHROUGH */

    case 1:

        HASH4;

    }

    while (loop) {

        --loop;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

        HASH4;

    }

    return h % size;

}

/**

 *  hash_create - creates a new hash table, uses the cmp_func

 *  to compare keys.  Returns the identification for the hash table;

 *  otherwise returns a negative number on error.

 * 创建hash表，返回hash_table的对象

 */

hash_table *

hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)

{

    hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));

    if (!hash_sz)

        hid->size = (unsigned int) DEFAULT_HASH_SIZE;	//EFAULT_HASH_SIZE 7951

    else

        hid->size = (unsigned int) hash_sz;

    /* allocate and null the buckets */

    hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));

    hid->cmp = cmp_func;

    hid->hash = hash_func;

    hid->next = NULL;

    hid->current_slot = 0;

    return hid;

}

/**

 *  hash_join - joins a hash_link under its key lnk->key

 *  into the hash table 'hid'.

 *

 *  It does not copy any data into the hash table, only links pointers.

 *	将hash节点链接到hash表中对应的桶节点。

 */

void

hash_join(hash_table * hid, hash_link * lnk)

{

    int i;

    i = hid->hash(lnk->key, hid->size);

    lnk->next = hid->buckets[i];

    hid->buckets[i] = lnk;

    ++hid->count;

}

/**

 *  hash_lookup - locates the item under the key 'k' in the hash table

 *  'hid'.  Returns a pointer to the hash bucket on success; otherwise

 *  returns NULL.

 *	在hash_table中定位哈希值为k的hash_link节点

 */

hash_link *

hash_lookup(hash_table * hid, const void *k)

{

    int b;

    PROF_start(hash_lookup);

    assert(k != NULL);

    b = hid->hash(k, hid->size);

    for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {

        if ((hid->cmp) (k, walker->key) == 0) {

            PROF_stop(hash_lookup);

            return (walker);

        }

        assert(walker != walker->next);

    }

    PROF_stop(hash_lookup);

    return NULL;

}

//指向下一个桶

static void

hash_next_bucket(hash_table * hid)

{

    while (hid->next == NULL && ++hid->current_slot < hid->size)

        hid->next = hid->buckets[hid->current_slot];

}

/**

 *  hash_first - initializes the hash table for the hash_next()

 *  function.

 *	使hid->current_slot指向第一个桶，hid->next指向下一个桶

 */

void

hash_first(hash_table * hid)

{

    assert(NULL == hid->next);

    hid->current_slot = 0;

    hid->next = hid->buckets[hid->current_slot];

    if (NULL == hid->next)	//如果hash_table还没有使用

        hash_next_bucket(hid);

}

/**

 *  hash_next - returns the next item in the hash table 'hid'.

 *  Otherwise, returns NULL on error or end of list.

 *

 *  MUST call hash_first() before hash_next().

 *	获取下一个hash_link节点

 */

hash_link *

hash_next(hash_table * hid)

{

    hash_link *p = hid->next;

    if (NULL == p)

        return NULL;

    hid->next = p->next;

    if (NULL == hid->next)

        hash_next_bucket(hid);

    return p;

}

/**

 *  hash_last - resets hash traversal state to NULL

 *

 */

void

hash_last(hash_table * hid)

{

    assert(hid != NULL);

    hid->next = NULL;

    hid->current_slot = 0;

}

/**

 *  hash_remove_link - deletes the given hash_link node from the

 *  hash table 'hid'.  Does not free the item, only removes it

 *  from the list.

 *

 *  An assertion is triggered if the hash_link is not found in the

 *  list.

 *	将hash_link为hl的节点从桶链表中移除

 */

void

hash_remove_link(hash_table * hid, hash_link * hl)

{

    assert(hl != NULL);

    int i = hid->hash(hl->key, hid->size);

    for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {

        if (*P != hl)

            continue;

        *P = hl->next;

        if (hid->next == hl) {

            hid->next = hl->next;

            if (NULL == hid->next)

                hash_next_bucket(hid);

        }

        --hid->count;

        return;

    }

    assert(0);

}

/**

 *  hash_get_bucket - returns the head item of the bucket

 *  in the hash table 'hid'. Otherwise, returns NULL on error.

 *	获取hid->buckets[bucket]

 */

hash_link *

hash_get_bucket(hash_table * hid, unsigned int bucket)

{

    if (bucket >= hid->size)

        return NULL;

    return (hid->buckets[bucket]);

}

//将所有hash_link节点集中到一起，集中释放存储空间

void

hashFreeItems(hash_table * hid, HASHFREE * free_func)

{

    hash_link *l;

    int i = 0;

    hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));

    hash_first(hid);

    while ((l = hash_next(hid)) && i < hid->count) {

        *(list + i) = l;

        ++i;

    }

    for (int j = 0; j < i; ++j)

        free_func(*(list + j));

    xfree(list);

}

//释放hash_table空间

void

hashFreeMemory(hash_table * hid)

{

    if (hid == NULL)

        return;

    if (hid->buckets)

        xfree(hid->buckets);

    xfree(hid);

}

static int hash_primes[] = {

    103,

    229,

    467,

    977,

    1979,

    4019,

    6037,

    7951,

    12149,

    16231,

    33493,

    65357

};

int

hashPrime(int n)

{

    int I = sizeof(hash_primes) / sizeof(int);

    int best_prime = hash_primes[0];

    double min = fabs(log((double) n) - log((double) hash_primes[0]));

    double d;

    for (int i = 0; i < I; ++i) {

        d = fabs(log((double) n) - log((double) hash_primes[i]));

        if (d > min)

            continue;

        min = d;

        best_prime = hash_primes[i];

    }

    return best_prime;

}

/**

 * return the key of a hash_link as a const string

 *	获取hl的哈希值

 */

const char *

hashKeyStr(hash_link * hl)

{

    return (const char *) hl->key;

}

#if USE_HASH_DRIVER

/**

 *  hash-driver - Run with a big file as stdin to insert each line into the

 *  hash table, then prints the whole hash table, then deletes a random item,

 *  and prints the table again...

 */

int

main(void)

{

    hash_table *hid;

    LOCAL_ARRAY(char, buf, BUFSIZ);

    LOCAL_ARRAY(char, todelete, BUFSIZ);

    hash_link *walker = NULL;

    todelete[0] = '\0';

    printf("init\n");

    printf("creating hash table\n");

    if ((hid = hash_create((HASHCMP *) strcmp, 229, hash4)) < 0) {

        printf("hash_create error.\n");

        exit(1);

    }

    printf("done creating hash table: %d\n", hid);

    while (fgets(buf, BUFSIZ, stdin)) {

        buf[strlen(buf) - 1] = '\0';

        printf("Inserting '%s' for item %p to hash table: %d\n",

               buf, buf, hid);

        hash_insert(hid, xstrdup(buf), (void *) 0x12345678);

        if (random() % 17 == 0)

            strcpy(todelete, buf);

    }

    printf("walking hash table...\n");

    for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {

        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,

               walker->item);

    }

    printf("done walking hash table...\n");

    if (todelete[0]) {

        printf("deleting %s from %d\n", todelete, hid);

        if (hash_delete(hid, todelete))

            printf("hash_delete error\n");

    }

    printf("walking hash table...\n");

    for (int i = 0, walker = hash_first(hid); walker; walker = hash_next(hid)) {

        printf("item %5d: key: '%s' item: %p\n", i++, walker->key,

               walker->item);

    }

    printf("done walking hash table...\n");

    printf("driver finished.\n");

    exit(0);

}

#endif

下面具体分析：

hash表整体结构：

1、hash_create

/**

 *  hash_create - creates a new hash table, uses the cmp_func

 *  to compare keys.  Returns the identification for the hash table;

 *  otherwise returns a negative number on error.

 * 创建hash表，返回hash_table的对象

 */

hash_table *

hash_create(HASHCMP * cmp_func, int hash_sz, HASHHASH * hash_func)

{

    hash_table *hid = (hash_table *)xcalloc(1, sizeof(hash_table));

    if (!hash_sz)

        hid->size = (unsigned int) DEFAULT_HASH_SIZE;	//EFAULT_HASH_SIZE 7951

    else

        hid->size = (unsigned int) hash_sz;

    /* allocate and null the buckets */

    hid->buckets = (hash_link **)xcalloc(hid->size, sizeof(hash_link *));

    hid->cmp = cmp_func;

    hid->hash = hash_func;

    hid->next = NULL;

    hid->current_slot = 0;

    return hid;

}

创建hash表。需要三个参数：cmp_func、hash_sz、hash_func，其中hash_sz用来表示创建的hash表的桶链表的大小，如果为0，则使用默认的大小DEFAULT_HASH_SIZE.

桶链表储存的数据类型为：hash_link * ，即它只存储hash_link节点的地址。初始化后的桶链表没有存储任何地址，全部为0。

current_slot = 0，当前指向的桶为buckets[0];指向下一个桶的指针hid->next为NULL，表示hash表还没有被使用。

2、hash_join

/**

 *  hash_join - joins a hash_link under its key lnk->key

 *  into the hash table 'hid'.

 *

 *  It does not copy any data into the hash table, only links pointers.

 *	将hash节点链接到hash表中对应的桶节点。

 */

void

hash_join(hash_table * hid, hash_link * lnk)

{

    int i;

    i = hid->hash(lnk->key, hid->size);

    lnk->next = hid->buckets[i];

    hid->buckets[i] = lnk;

    ++hid->count;

}

首先利用函数hash找到节点link应该插入到的桶号i，将link的next指针指向桶号i存储的链表的首节点，再将link节点的地址储存到桶号i内，link节点成为桶号i储存的链表的首节点。

3、hash_lookup

/**

 *  hash_lookup - locates the item under the key 'k' in the hash table

 *  'hid'.  Returns a pointer to the hash bucket on success; otherwise

 *  returns NULL.

 *	在hash_table中定位哈希值为k的hash_link节点

 */

hash_link *

hash_lookup(hash_table * hid, const void *k)

{

    int b;

    PROF_start(hash_lookup);

    assert(k != NULL);

    b = hid->hash(k, hid->size);

    for (hash_link *walker = hid->buckets[b]; walker != NULL; walker = walker->next) {

        if ((hid->cmp) (k, walker->key) == 0) {

            PROF_stop(hash_lookup);

            return (walker);

        }

        assert(walker != walker->next);

    }

    PROF_stop(hash_lookup);

    return NULL;

}

首先根据哈希值k找到对应的桶链表节点b，walker指向b所在链表的首节点。

4、hash_remove_link

/**

 *  hash_remove_link - deletes the given hash_link node from the

 *  hash table 'hid'.  Does not free the item, only removes it

 *  from the list.

 *

 *  An assertion is triggered if the hash_link is not found in the

 *  list.

 *	将hash_link为hl的节点从桶链表中移除

 */

void

hash_remove_link(hash_table * hid, hash_link * hl)

{

    assert(hl != NULL);

    int i = hid->hash(hl->key, hid->size);

    for (hash_link **P = &hid->buckets[i]; *P; P = &(*P)->next) {

        if (*P != hl)

            continue;

        *P = hl->next;

        if (hid->next == hl) {

            hid->next = hl->next;

            if (NULL == hid->next)

                hash_next_bucket(hid);

        }

        --hid->count;

        return;

    }

    assert(0);

}

移除分两种情况：

1、hl为首节点，将hid->next = hl->next

2、hl为中间节点，*p = hl->next，p指向hl的下一个节点

5、hashFreeItems

//将所有hash_link节点集中到一起，集中释放存储空间

void

hashFreeItems(hash_table * hid, HASHFREE * free_func)

{

    hash_link *l;

    int i = 0;

    hash_link **list = (hash_link **)xcalloc(hid->count, sizeof(hash_link *));

    hash_first(hid);

    while ((l = hash_next(hid)) && i < hid->count) {

        *(list + i) = l;

        ++i;

    }

    for (int j = 0; j < i; ++j)

        free_func(*(list + j));

    xfree(list);

}

根据hid->count大小分配存储空间来存储hash_link节点的地址。调用hash_first将hid->current_slot指向第一个桶，同时将hid->next指向第二个桶。调用hash_next取得hash链表中的每一个hash_link节点，并将地址赋值到list中，最后对list中的hash_link地址统一销毁。

本文为Eliot原创，转载请注明出处：http://blog.csdn.net/xyw_blog/article/details/9791221

Squid--hash代码分析的更多相关文章

完整全面的Java资源库（包括构建、操作、代码分析、编译器、数据库、社区等等）
构建这里搜集了用来构建应用程序的工具. Apache Maven:Maven使用声明进行构建并进行依赖管理,偏向于使用约定而不是配置进行构建.Maven优于Apache Ant.后者采用了一种过程化 ...
Android4.0图库Gallery2代码分析(二) 数据管理和数据加载
Android4.0图库Gallery2代码分析(二) 数据管理和数据加载 2012-09-07 11:19 8152人阅读评论(12) 收藏举报代码分析android相册优化工作 Androi ...
2018-2019 20165237网络对抗 Exp4 恶意代码分析
2018-2019 20165237网络对抗 Exp4 恶意代码分析实验目标 1.1是监控你自己系统的运行状态,看有没有可疑的程序在运行. 1.2是分析一个恶意软件,就分析Exp2或Exp3中生成后 ...
2018-2019-2 20165312《网络攻防技术》Exp4 恶意代码分析
2018-2019-2 20165312<网络攻防技术>Exp4 恶意代码分析知识点总结 1.有关schtasks schtacks的作用:安排命令和程序定期运行或在指定时间内运行.从计 ...
2017-2018-2 20155314《网络对抗技术》Exp4 恶意代码分析
2017-2018-2 20155314<网络对抗技术>Exp4 恶意代码分析目录实验要求实验内容实验环境基础问题回答预备知识实验步骤 1 静态分析 1.1 使用virsca ...
20165218 《网络对抗技术》Exp4 恶意代码分析
Exp4 恶意代码分析任务一:系统运行监控记录分析联网的程序创建计划任务netstat5218 schtasks /create /TN netstat5218 /sc MINUTE /MO 1 ...
Https与Http，SSL,DevOps, 静态代码分析工具，RFID, SSH, 非对称加密算法(使用最广泛的一种是RSA)，数字签名，数字证书
在URL前加https://前缀表明是用SSL加密的. 你的电脑与服务器之间收发的信息传输将更加安全. Web服务器启用SSL需要获得一个服务器证书并将该证书与要使用SSL的服务器绑定. http和h ...
insmod模块加载过程代码分析1【转】
转自:http://blog.chinaunix.net/uid-27717694-id-3966290.html 一.概述模块是作为ELF对象文件存放在文件系统中的,并通过执行insmod程序链接到 ...
Android代码分析工具lint学习
1 lint简介 1.1 概述 lint是随Android SDK自带的一个静态代码分析工具.它用来对Android工程的源文件进行检查,找出在正确性.安全.性能.可使用性.可访问性及国际化等方面可能 ...
pmd静态代码分析
在正式进入测试之前,进行一定的静态代码分析及code review对代码质量及系统提高是有帮助的,以上为数据证明 Pmd 它是一个基于静态规则集的Java源码分析器,它可以识别出潜在的如下问题:– 可 ...

随机推荐

Fancybox——学习（1）
转载:http://www.helloweba.com/view-blog-65.html Fancybox是一款优秀的jquery插件,它能够展示丰富的弹出层效果.前面我们有文章介绍了facybox ...
C语言排序算法复习
排序算法有很多种,这里在复习和分析的基础上,做一个自己的总结: 首先要知道有哪些排序算法,google一下,有云C语言7大经典排序算法(也有8大).主要包括冒泡排序,快速排序,选择排序,插入排序,希尔 ...
[React Testing] The Redux Store - Multiple Actions
When using Redux, we can test that our application state changes are working by testing that dispatc ...
using namespace cocos2d;
忘记在头文件添加using namespace cocos2d; 导致一直出现问题,定义的精灵却一直报错. error C2143: 语法错误 : 缺少“;”(在“*”的前面)
好的android编码习惯
上一期分享了android内存优化的一些总结,这一期说说我认为的好的编码习惯,然后下一期会做安卓数据库优化的一些总结,逐渐的会将一些性能优化点总结分享出来,肯定是不够全面的希望不足的地方欢迎指出. 良 ...
InstallShield常用prq文件的下载地址
VC 2010 redist X86: http://saturn.installshield.com/is/prerequisites/microsoft visual c++ 2010 redis ...
java 修改文件名
// 修改文件名 public static boolean modifyFileName(String serverPath, String oldFileName, String newLogin ...
一些小trick~
做质因子分解的时候将先打素数表会节省很多时间
Mysql学习（慕课学习笔记9）查询、分组
查找记录 Select select username,id from users; Group by 进行分组 select sex from users group by sex; 分组条件 se ...
安卓网络请求之——OkHttp学习
之前做安卓项目的时候,HTTP请求用的是android api中的HttpURLConnection和HttpClient,编码比较繁琐,自己封装的也不好.后来知道有很多网络请求的第三方框架,可以方便 ...

Squid--hash代码分析

Squid--hash代码分析的更多相关文章

随机推荐

热门专题