memcachedd事件模型

　　在memcachedd中，作者为了专注于缓存的设计，使用了libevent来开发事件模型。memcachedd的时间模型同nginx的类似，拥有一个主进行（master）以及多个工作者线程（woker）。

流程图

在memcached中，是先对工作者线程进行初始化并启动，然后才会创建启动主线程。

工作者线程

初始化

memcached对工作者线程进行初始化，参数分别为线程数量以及`main_base`，

/* start up worker threads if MT mode */

thread_init(settings.num_threads, main_base);

/*

 * Initializes the thread subsystem, creating various worker threads.

 *

 * nthreads  Number of worker event handler threads to spawn

 * main_base Event base for main thread

 */

void thread_init(int nthreads, struct event_base *main_base) {

    int         i;

    int         power;

    pthread_mutex_init(&cache_lock, NULL);

    pthread_mutex_init(&stats_lock, NULL);

    pthread_mutex_init(&init_lock, NULL);

    pthread_cond_init(&init_cond, NULL);

    pthread_mutex_init(&cqi_freelist_lock, NULL);

    cqi_freelist = NULL;

    /* Want a wide lock table, but don't waste memory */

    if (nthreads < ) {

        power = ;

    } else if (nthreads < ) {

        power = ;

    } else if (nthreads < ) {

        power = ;

    } else {

        /* 8192 buckets, and central locks don't scale much past 5 threads */

        power = ;

    }

    item_lock_count = hashsize(power);

    item_lock_hashpower = power;

    item_locks = calloc(item_lock_count, sizeof(pthread_mutex_t));

    if (! item_locks) {

        perror("Can't allocate item locks");

        exit();

    }

    for (i = ; i < item_lock_count; i++) {

        pthread_mutex_init(&item_locks[i], NULL);

    }

    pthread_key_create(&item_lock_type_key, NULL);

    pthread_mutex_init(&item_global_lock, NULL);

    threads = calloc(nthreads, sizeof(LIBEVENT_THREAD));

    if (! threads) {

        perror("Can't allocate thread descriptors");

        exit();

    }

    dispatcher_thread.base = main_base;

    dispatcher_thread.thread_id = pthread_self();

    for (i = ; i < nthreads; i++) {

        int fds[];

        if (pipe(fds)) {

            perror("Can't create notify pipe");

            exit();

        }

        threads[i].notify_receive_fd = fds[];

        threads[i].notify_send_fd = fds[];

        setup_thread(&threads[i]);

        /* Reserve three fds for the libevent base, and two for the pipe */

        stats.reserved_fds += ;

    }

    /* Create threads after we've done all the libevent setup. */

    for (i = ; i < nthreads; i++) {

        create_worker(worker_libevent, &threads[i]);

    }

    /* Wait for all the threads to set themselves up before returning. */

    pthread_mutex_lock(&init_lock);

    wait_for_thread_registration(nthreads);

    pthread_mutex_unlock(&init_lock);

}

thread_init源码

在memcachedd中为了避免多线程共享资源的使用使用了很多锁，这里对锁不做介绍。

线程的结构体

typedef struct {

    pthread_t thread_id;        /* unique ID of this thread 线程ID*/

    struct event_base *base;    /* libevent handle this thread uses libevent事件*/

    struct event notify_event;  /* listen event for notify pipe 注册事件*/

    int notify_receive_fd;      /* receiving end of notify pipe 管道中接收端*/

    int notify_send_fd;         /* sending end of notify pipe 管道中发送端*/

    struct thread_stats stats;  /* Stats generated by this thread 线程状态*/

    struct conn_queue *new_conn_queue; /* queue of new connections to handle 消息队列*/

    cache_t *suffix_cache;      /* suffix cache */

    uint8_t item_lock_type;     /* use fine-grained or global item lock */

} LIBEVENT_THREAD;

初始化工作者线程

for (i = ; i < nthreads; i++) {

        int fds[];

        /* 创建管道 */

        if (pipe(fds)) {

            perror("Can't create notify pipe");

            exit();

        }

        /* 设置线程管道的读写入口 */

        threads[i].notify_receive_fd = fds[];

        threads[i].notify_send_fd = fds[];

        /*  设置线程属性 */

        setup_thread(&threads[i]);

        /* Reserve three fds for the libevent base, and two for the pipe */

        stats.reserved_fds += ;

    }

设置线程属性

/*

 * Set up a thread's information.

 */

static void setup_thread(LIBEVENT_THREAD *me) {

    me->base = event_init(); //初始化线程事件

    if (! me->base) {

        fprintf(stderr, "Can't allocate event base\n");

        exit();

    }

    /* 初始化监听事件 */

    /* Listen for notifications from other threads */

    event_set(&me->notify_event, me->notify_receive_fd,

              EV_READ | EV_PERSIST, thread_libevent_process, me);

    /* 把事件绑定到线程事件 */

    event_base_set(me->base, &me->notify_event);

    /* 注册事件到监听状态 */

    if (event_add(&me->notify_event, ) == -) {

        fprintf(stderr, "Can't monitor libevent notify pipe\n");

        exit();

    }

    ...

}

READ回调函数

/*

 * Processes an incoming "handle a new connection" item. This is called when

 * input arrives on the libevent wakeup pipe.

 */

static void thread_libevent_process(int fd, short which, void *arg) {

    ...

    /* 从管道读取消息 */

    if (read(fd, buf, ) != )

        if (settings.verbose > )

            fprintf(stderr, "Can't read from libevent pipe\n");

    item = cq_pop(me->new_conn_queue); //读取连接

    ...

}

启动工作者线程

/* Create threads after we've done all the libevent setup. */

for (i = ; i < nthreads; i++) {

     create_worker(worker_libevent, &threads[i]);

}

`create_woker`函数创建工作者线程，

/*

 * Creates a worker thread.

 */

static void create_worker(void *(*func)(void *), void *arg) {

    pthread_t       thread;

    pthread_attr_t  attr;

    int             ret;

    pthread_attr_init(&attr);

    if ((ret = pthread_create(&thread, &attr, func, arg)) != ) {

        fprintf(stderr, "Can't create thread: %s\n",

                strerror(ret));

        exit();

    }

}

`worker_libevent`函数进入线程循环监听状态，

/*

 * Worker thread: main event loop

 */

static void *worker_libevent(void *arg) {

    LIBEVENT_THREAD *me = arg;

    /* Any per-thread setup can happen here; thread_init() will block until

     * all threads have finished initializing.

     */

    /* set an indexable thread-specific memory item for the lock type.

     * this could be unnecessary if we pass the conn *c struct through

     * all item_lock calls...

     */

    me->item_lock_type = ITEM_LOCK_GRANULAR;

    pthread_setspecific(item_lock_type_key, &me->item_lock_type);

    register_thread_initialized();

    event_base_loop(me->base, );

    return NULL;

}

主线程

初始化

static struct event_base* mian_base;

/* initialize main thread libevent instance */

main_base = event_init();

在`memcached.c`的主函数中，使用`libevent`的事件初始化函数来初始化`main_base`。

初始化socket

这里只介绍tcp连接，其中使用`server_sockets`来调用`server_socket`来初始化连接。

if (settings.port && server_sockets(settings.port, tcp_transport,  portnumber_file)) {

            vperror("failed to listzhefen on TCP port %d", settings.port);

            exit(EX_OSERR);

}

static int server_sockets(int port, enum network_transport transport,

                          FILE *portnumber_file) {

    if (settings.inter == NULL) {

        return server_socket(settings.inter, port, transport, portnumber_file);

    }

    ...

}

而在`server_socket`中完成了socket的初始化、绑定等操作。

/**

 * Create a socket and bind it to a specific port number

 * @param interface the interface to bind to

 * @param port the port number to bind to

 * @param transport the transport protocol (TCP / UDP)

 * @param portnumber_file A filepointer to write the port numbers to

 *        when they are successfully added to the list of ports we

 *        listen on.

 */

static int server_socket(const char *interface,

                         int port,

                         enum network_transport transport,

                         FILE *portnumber_file) {

    int sfd;

    struct linger ling = {, };

    struct addrinfo *ai;

    struct addrinfo *next;

    struct addrinfo hints = { .ai_flags = AI_PASSIVE,

                              .ai_family = AF_UNSPEC };

    char port_buf[NI_MAXSERV];

    int error;

    int success = ;

    int flags =;

    hints.ai_socktype = IS_UDP(transport) ? SOCK_DGRAM : SOCK_STREAM;

    if (port == -) {

        port = ;

    }

    snprintf(port_buf, sizeof(port_buf), "%d", port);

    error= getaddrinfo(interface, port_buf, &hints, &ai);

    if (error != ) {

        if (error != EAI_SYSTEM)

          fprintf(stderr, "getaddrinfo(): %s\n", gai_strerror(error));

        else

          perror("getaddrinfo()");

        return ;

    }

    for (next= ai; next; next= next->ai_next) {

        conn *listen_conn_add;

        if ((sfd = new_socket(next)) == -) {

            /* getaddrinfo can return "junk" addresses,

             * we make sure at least one works before erroring.

             */

            if (errno == EMFILE) {

                /* ...unless we're out of fds */

                perror("server_socket");

                exit(EX_OSERR);

            }

            continue;

        }

#ifdef IPV6_V6ONLY

        if (next->ai_family == AF_INET6) {

            error = setsockopt(sfd, IPPROTO_IPV6, IPV6_V6ONLY, (char *) &flags, sizeof(flags));

            if (error != ) {

                perror("setsockopt");

                close(sfd);

                continue;

            }

        }

#endif

        setsockopt(sfd, SOL_SOCKET, SO_REUSEADDR, (void *)&flags, sizeof(flags));

        if (IS_UDP(transport)) {

            maximize_sndbuf(sfd);

        } else {

            error = setsockopt(sfd, SOL_SOCKET, SO_KEEPALIVE, (void *)&flags, sizeof(flags));

            if (error != )

                perror("setsockopt");

            error = setsockopt(sfd, SOL_SOCKET, SO_LINGER, (void *)&ling, sizeof(ling));

            if (error != )

                perror("setsockopt");

            error = setsockopt(sfd, IPPROTO_TCP, TCP_NODELAY, (void *)&flags, sizeof(flags));

            if (error != )

                perror("setsockopt");

        }

        if (bind(sfd, next->ai_addr, next->ai_addrlen) == -) {

            if (errno != EADDRINUSE) {

                perror("bind()");

                close(sfd);

                freeaddrinfo(ai);

                return ;

            }

            close(sfd);

            continue;

        } else {

            success++;

            if (!IS_UDP(transport) && listen(sfd, settings.backlog) == -) {

                perror("listen()");

                close(sfd);

                freeaddrinfo(ai);

                return ;

            }

            if (portnumber_file != NULL &&

                (next->ai_addr->sa_family == AF_INET ||

                 next->ai_addr->sa_family == AF_INET6)) {

                union {

                    struct sockaddr_in in;

                    struct sockaddr_in6 in6;

                } my_sockaddr;

                socklen_t len = sizeof(my_sockaddr);

                if (getsockname(sfd, (struct sockaddr*)&my_sockaddr, &len)==) {

                    if (next->ai_addr->sa_family == AF_INET) {

                        fprintf(portnumber_file, "%s INET: %u\n",

                                IS_UDP(transport) ? "UDP" : "TCP",

                                ntohs(my_sockaddr.in.sin_port));

                    } else {

                        fprintf(portnumber_file, "%s INET6: %u\n",

                                IS_UDP(transport) ? "UDP" : "TCP",

                                ntohs(my_sockaddr.in6.sin6_port));

                    }

                }

            }

        }

        if (IS_UDP(transport)) {

            int c;

            for (c = ; c < settings.num_threads_per_udp; c++) {

                /* Allocate one UDP file descriptor per worker thread;

                 * this allows "stats conns" to separately list multiple

                 * parallel UDP requests in progress.

                 *

                 * The dispatch code round-robins new connection requests

                 * among threads, so this is guaranteed to assign one

                 * FD to each thread.

                 */

                int per_thread_fd = c ? dup(sfd) : sfd;

                dispatch_conn_new(per_thread_fd, conn_read,

                                  EV_READ | EV_PERSIST,

                                  UDP_READ_BUFFER_SIZE, transport);

            }

        } else {

            if (!(listen_conn_add = conn_new(sfd, conn_listening,

                                             EV_READ | EV_PERSIST, ,

                                             transport, main_base))) {

                fprintf(stderr, "failed to create listening connection\n");

                exit(EXIT_FAILURE);

            }

            listen_conn_add->next = listen_conn;

            listen_conn = listen_conn_add;

        }

    }

    freeaddrinfo(ai);

    /* Return zero iff we detected no errors in starting up connections */

    return success == ;

}

server_socket源码

主线程事件

在主线程中通过`conn_new`函数来建立主线程和工作者线程之间的关系。

/* 设置线程事件 */

event_set(&c->event, sfd, event_flags, event_handler, (void *)c);

event_base_set(base, &c->event);

c->ev_flags = event_flags;

/* 注册事件到监听 */

if (event_add(&c->event, ) == -) {

    perror("event_add");

    return NULL;

}

事件处理

上面中设置了事件的回调函数`event_handler`，而在`event_handler`中，主要调用了`driver_machine`函数。

driver_machine看名字就知道，想发动机一样的函数，那么该函数主要是处理各种事件以及相应的处理方法。

这里只简要介绍一个函数调用`dispatch_conn_new`。

void dispatch_conn_new(int sfd, enum conn_states init_state, int event_flags,

                       int read_buffer_size, enum network_transport transport) {

    CQ_ITEM *item = cqi_new();

    char buf[];

    if (item == NULL) {

        close(sfd);

        /* given that malloc failed this may also fail, but let's try */

        fprintf(stderr, "Failed to allocate memory for connection object\n");

        return ;

    }

    int tid = (last_thread + ) % settings.num_threads;

    LIBEVENT_THREAD *thread = threads + tid; //循环获取工作者线程

    last_thread = tid;

    item->sfd = sfd;

    item->init_state = init_state;

    item->event_flags = event_flags;

    item->read_buffer_size = read_buffer_size;

    item->transport = transport;

    cq_push(thread->new_conn_queue, item); //连接加入懂啊队列

    memcachedD_CONN_DISPATCH(sfd, thread->thread_id);

    buf[] = 'c';

    if (write(thread->notify_send_fd, buf, ) != ) {//向管道写入消息

        perror("Writing to thread notify pipe");

    }

}

memcached（二）事件模型源码分析的更多相关文章

ApplicationEvent事件机制源码分析
<spring扩展点之三:Spring 的监听事件 ApplicationListener 和 ApplicationEvent 用法,在spring启动后做些事情> <服务网关zu ...
Zepto事件模块源码分析
Zepto事件模块源码分析一.保存事件数据的handlers 我们知道js原生api中要移除事件,需要传入绑定时的回调函数.而Zepto则可以不传入回调函数,直接移除对应类型的所有事件.原因就在于Z ...
Java 序列化和反序列化（二）Serializable 源码分析 - 1
目录 Java 序列化和反序列化(二)Serializable 源码分析 - 1 1. Java 序列化接口 2. ObjectOutputStream 源码分析 2.1 ObjectOutputSt ...
Django（60）Django内置User模型源码分析及自定义User
前言 Django为我们提供了内置的User模型,不需要我们再额外定义用户模型,建立用户体系了.它的完整的路径是在django.contrib.auth.models.User. User模型源码分析 ...
Hbase WAL线程模型源码分析
版权声明:本文由熊训德原创文章,转载请注明出处: 文章原文链接:https://www.qcloud.com/community/article/257 来源:腾云阁 https://www.qclo ...
[UGUI]图文混排(二)：Text源码分析
UGUI源码: https://bitbucket.org/Unity-Technologies/ui/downloads/?tab=tags 首先下载一份UGUI源码,这里我下载的版本是5.3.2f ...
基于Netty的RPC架构学习笔记（五）：netty线程模型源码分析（二）
文章目录小技巧(如何看开源框架的源码) 源码解析阅读源码技巧打印查看通过打断点调试查看调用栈小技巧(如何看开源框架的源码) 一断点二打印三看调用栈四搜索源码解析 //设置nioso ...
跟厂长学PHP7内核（二）：源码分析的环境与工具
本文主要介绍分析源码的方式,其中包含环境的搭建.分析工具的安装以及源码调试的基本操作. 一.工具清单 PHP7.0.12 GDB CLion 二.源码下载及安装 $ wget http://php.n ...
adaptiveThreshold自适应二值化源码分析
自适应二值化介绍: 二值化算法是用输入像素的值I与一个值C来比较,根据比较结果确定输出值. 自适应二值化的每一个像素的比较值C都不同,比较值C由这个像素为中心的一个块范围计算在减去差值delta得到. ...

随机推荐

ipad或iPhone 访问https网站不成功
可能的原因是设备的日期不对,将设备日期调整正确即可解决
app.js ejs 转换为html
var express = require('express');var path = require('path');var favicon = require('serve-favicon');v ...
Linux下查找文件命令——find
find [在哪个目录下查找] -name <文件名> 1.场景:当你知道了某个文件的文件名,而不知道这个文件放到哪个文件夹,甚至是层层套嵌的文件夹里, 也可以使用find命令来查找,如: ...
自定义底部tab
public class MainActivity extends TabActivity implements OnCheckedChangeListener { private RadioGrou ...
关于生物信息学与R的相关资料和网站
生物信息学的相关论坛:http://www.omicshare.com/forum/ 糗世界:http://qiubio.com:8080/ 统计之都网站绘制QQ图和曼哈顿图:http://www. ...
C#动态执行字符串(动态创建代码)
在编写C#程序的时候,有时我们需要动态生成一些代码并执行.然而C#不像JavaScript有一个Eval函数,可以动态的执行代码.所有这些功能都要我们自己去完成.如下是实例. 动态创建代码: usin ...
PHP 四种基本排序算法的代码实现
前提:分别用冒泡排序法,快速排序法,选择排序法,插入排序法将下面数组中的值按照从小到大的顺序进行排序. $arr(1,43,54,62,21,66,32,78,36,76,39); 1. 冒泡排序思 ...
SUSE Linux Enterprise Server 11 软件源
1.添加软件源 zypper ar http://ftp5.gwdg.de/pub/opensuse/discontinued/distribution/11.4/repo/oss oss zyppe ...
统计学习方法 AdaBoost
提升方法的基本思路在概率近似正确(probably approximately correct,PAC)学习的框架中, 一个概念(一个类),如果存在一个多项式的学习算法能够学习它,并且正确率很高,那 ...
three.js 源码注释（三十九）Light/HemisphereLight.js 半球光、自然光（天光效果）
/*** * HemisphereLight类是在场景中创建半球光,就是天光效果,经常用在室外,将各个位置的物体都照亮,室内的光线大多是方向性的, * 无论是窗口还是灯槽,用平面光很方便,室外用平面 ...

memcached（二）事件模型源码分析