memcached学习笔记—

　　文章链接：http://www.hcoding.com/?p=121

　　个人站点：JC&hcoding.com

　　memcached是什么呢？memcached是一个优秀的、高性能的内存缓存工具。

　　memcached具有以下的特点：

协议简单：memcached的服务器客户端通信并不使用复杂的MXL等格式，而是使用简单的基于文本的协议。
基于libevent的事件处理：libevent是个程序库，他将Linux 的epoll、BSD类操作系统的kqueue等时间处理功能封装成统一的接口。memcached使用这个libevent库，因此能在Linux、BSD、Solaris等操作系统上发挥其高性能。（libevent是什么）
内置内存存储方式：为了提高性能，memcached中保存的数据都存储在memcached内置的内存存储空间中。由于数据仅存在于内存中，因此重启memcached，重启操作系统会导致全部数据消失。另外，内容容量达到指定的值之后memcached回自动删除不适用的缓存。
Memcached不互通信的分布式：memcached尽管是“分布式”缓存服务器，但服务器端并没有分布式功能。各个memcached不会互相通信以共享信息。他的分布式主要是通过客户端实现的。

　　本文主要讲解memcached的连接模型，memcached由一条主线程（连接线程）监听连接，然后把成功的连接交给子线程（工作线程）处理读写操作。N条【启动memcached通过-t命令指定】子线程（工作线程）负责读写数据，一条子线程（工作线程）维护着多个连接。一个conn结构体对象对应着一个连接，主线程（连接线程）成功连接后，会把连接的内容赋值到一个conn结构体对象，并把这个conn结构体对象传递给一条子线程（工作线程）处理。

conn结构体：

 typedef struct conn conn;

 struct conn {

     int    sfd;

     sasl_conn_t *sasl_conn;

     // 连接状态

     enum conn_states  state;

     enum bin_substates substate;

     struct event event;

     short  ev_flags;

     // 刚刚出发的事件

     short  which;   /** which events were just triggered */

     // read buffer

     char   *rbuf;   /** buffer to read commands into */

     // 已经解析了一部分的命令, 指向已经解析结束的地方

     char   *rcurr;  /** but if we parsed some already, this is where we stopped */

     // rbuf 已分配的大小

     int    rsize;   /** total allocated size of rbuf */

     // 尚未解析的命令大小

     int    rbytes;  /** how much data, starting from rcur, do we have unparsed */

     // buffer to write

     char   *wbuf;

     // 指向已经返回的地方

     char   *wcurr;

     // 写大小

     int    wsize;

     // 尚未写的数据大小

     int    wbytes;

     /** which state to go into after finishing current write */

     // 当写回结束后需要即刻转变的状态

     enum conn_states  write_and_go;

     void   *write_and_free; /** free this memory after finishing writing */

     char   *ritem;  /** when we read in an item's value, it goes here */

     int    rlbytes;

     /* data for the nread state */

     /**

      * item is used to hold an item structure created after reading the command

      * line of set/add/replace commands, but before we finished reading the actual

      * data. The data is read into ITEM_data(item) to avoid extra copying.

      */

     // 指向当下需要完成的任务

     void   *item;     /* for commands set/add/replace  */

     /* data for the swallow state */

     int    sbytes;    /* how many bytes to swallow */

     /* data for the mwrite state */

     struct iovec *iov;

     int    iovsize;   /* number of elements allocated in iov[] */

     int    iovused;   /* number of elements used in iov[] */

     // msghdr 链表, 一个连接可能有多个 msghdr

     // 如果是 UDP, 需要为每一个 msghdr 填写一个 UDP 头部

     struct msghdr *msglist;

     int    msgsize;   /* number of elements allocated in msglist[] */

     int    msgused;   /* number of elements used in msglist[] */

     int    msgcurr;   /* element in msglist[] being transmitted now */

     int    msgbytes;  /* number of bytes in current msg */

     item   **ilist;   /* list of items to write out */

     int    isize;

     item   **icurr;

     // 记录任务数量

     int    ileft;

     char   **suffixlist;

     int    suffixsize;

     char   **suffixcurr;

     int    suffixleft;

     enum protocol protocol;   /* which protocol this connection speaks */

     enum network_transport transport; /* what transport is used by this connection */

     /* data for UDP clients */

     int    request_id; /* Incoming UDP request ID, if this is a UDP "connection" */

     struct sockaddr request_addr; /* Who sent the most recent request */

     socklen_t request_addr_size;

     unsigned char *hdrbuf; /* udp packet headers */

     int    hdrsize;   /* number of headers' worth of space is allocated */

     bool   noreply;   /* True if the reply should not be sent. */

     /* current stats command */

     struct {

         char *buffer;

         size_t size;

         size_t offset;

     } stats;

     /* Binary protocol stuff */

     /* This is where the binary header goes */

     protocol_binary_request_header binary_header;

     uint64_t cas; /* the cas to return */

     short cmd; /* current command being processed */

     // ? 不透明

     int opaque;

     int keylen;

     // 可见是一个链表

     conn   *next;     /* Used for generating a list of conn structures */

     // 指向服务于此连接的线程

     LIBEVENT_THREAD *thread; /* Pointer to the thread object serving this connection */

 };

 //memcached.c

 int main{

     // ......

     // 第一步：初始化主线程的事件机制

     /* initialize main thread libevent instance */

     // libevent 事件机制初始化

     main_base = event_init();

     // ......

     // 第二步：初始化 N 个 （初始值200，当连接超过200个的时候会往上递增） conn结构体对象

     // 空闲连接数组初始化

     conn_init();

     // ......

     // 第三步：启动工作线程

     /* start up worker threads if MT mode */

     thread_init(settings.num_threads, main_base);

     // ......

     // 第四步：初始化socket，绑定监听端口，为主线程的事件机制设置连接监听事件（event_set、event_add）

     /**

         memcached 有可配置的两种模式: unix 域套接字和 TCP/UDP, 允许客户端以两种方式向 memcached 发起请求. 客户端和服务器在同一个主机上的情况下可以用 unix 域套接字, 否则可以采用 TCP/UDP 的模式. 两种模式是不兼容的.

         以下的代码便是根据 settings.socketpath 的值来决定启用哪种方式.

     */

     /**

         第一种, unix 域套接字.

     */

     /* create unix mode sockets after dropping privileges */

     if (settings.socketpath != NULL) {

         errno = ;

         if (server_socket_unix(settings.socketpath,settings.access)) {

             vperror("failed to listen on UNIX socket: %s", settings.socketpath);

             exit(EX_OSERR);

         }

     }

     /**

         第二种, TCP/UDP.

     */

     /* create the listening socket, bind it, and init */

     if (settings.socketpath == NULL) {

         const char *portnumber_filename = getenv("MEMCACHED_PORT_FILENAME");

         char temp_portnumber_filename[PATH_MAX];

         FILE *portnumber_file = NULL;

         // 读取端口号文件

         if (portnumber_filename != NULL) {

             snprintf(temp_portnumber_filename,

                      sizeof(temp_portnumber_filename),

                      "%s.lck", portnumber_filename);

             portnumber_file = fopen(temp_portnumber_filename, "a");

             if (portnumber_file == NULL) {

                 fprintf(stderr, "Failed to open \"%s\": %s\n",

                         temp_portnumber_filename, strerror(errno));

             }

         }

         // TCP

         errno = ;

         if (settings.port && server_sockets(settings.port, tcp_transport,

                                            portnumber_file)) {

             vperror("failed to listen on TCP port %d", settings.port);

             exit(EX_OSERR);

         }

         /*

          * initialization order: first create the listening sockets

          * (may need root on low ports), then drop root if needed,

          * then daemonise if needed, then init libevent (in some cases

          * descriptors created by libevent wouldn't survive forking).

          */

         // UDP

         /* create the UDP listening socket and bind it */

         errno = ;

         if (settings.udpport && server_sockets(settings.udpport, udp_transport,

                                               portnumber_file)) {

             vperror("failed to listen on UDP port %d", settings.udpport);

             exit(EX_OSERR);

         }

         if (portnumber_file) {

             fclose(portnumber_file);

             rename(temp_portnumber_filename, portnumber_filename);

         }

     }

     // ......

     // 第五步：主线程进入事件循环

     /* enter the event loop */

     // 进入事件循环

     if (event_base_loop(main_base, ) != ) {

         retval = EXIT_FAILURE;

     }

     // ......

 }

　　LIBEVENT_THREAD 结构体：

 // 多个线程, 每个线程一个 event_base

 typedef struct {

     pthread_t thread_id;        /* unique ID of this thread */

     struct event_base *base;    /* libevent handle this thread uses */

     // event 结构体, 用于管道读写事件的监听

     struct event notify_event;  /* listen event for notify pipe */

     // 读写管道文件描述符

     int notify_receive_fd;      /* receiving end of notify pipe */

     int notify_send_fd;         /* sending end of notify pipe */

     // 线程的状态

     struct thread_stats stats;  /* Stats generated by this thread */

     // 这个线程需要处理的连接队列

     struct conn_queue *new_conn_queue; /* queue of new connections to handle */

     cache_t *suffix_cache;      /* suffix cache */

     uint8_t item_lock_type;     /* use fine-grained or global item lock */

 } LIBEVENT_THREAD;

　　第三步工作线程的详细启动过程：

 /*

  * thread.c

  *

  * 初始化线程子系统, 创建工作线程

  * Initializes the thread subsystem, creating various worker threads.

  *

  * nthreads  Number of worker event handler threads to spawn

  *   需准备的线程数

  * main_base Event base for main thread

  *   分发线程

  */

 void thread_init(int nthreads, struct event_base *main_base) {

     int         i;

     int         power;

     // 互斥量初始化

     pthread_mutex_init(&cache_lock, NULL);

     pthread_mutex_init(&stats_lock, NULL);

     pthread_mutex_init(&init_lock, NULL);

     //条件同步

     pthread_cond_init(&init_cond, NULL);

     pthread_mutex_init(&cqi_freelist_lock, NULL);

     cqi_freelist = NULL;

     /* Want a wide lock table, but don't waste memory */

     if (nthreads < ) {

         power = ;

     } else if (nthreads < ) {

         power = ;

     } else if (nthreads < ) {

         power = ;

     } else {

         // 2^13

         /* 8192 buckets, and central locks don't scale much past 5 threads */

         power = ;

     }

     // hashsize = 2^n

     item_lock_count = hashsize(power);

     item_locks = calloc(item_lock_count, sizeof(pthread_mutex_t));

     if (! item_locks) {

         perror("Can't allocate item locks");

         exit();

     }

     // 初始化

     for (i = ; i < item_lock_count; i++) {

         pthread_mutex_init(&item_locks[i], NULL);

     }

     //item_lock_type_key设置为线程的私有变量的key

     pthread_key_create(&item_lock_type_key, NULL);

     pthread_mutex_init(&item_global_lock, NULL);

     // LIBEVENT_THREAD 是结合 libevent 使用的结构体, event_base, 读写管道

     threads = calloc(nthreads, sizeof(LIBEVENT_THREAD));

     if (! threads) {

         perror("Can't allocate thread descriptors");

         exit();

     }

     // main_base 是分发任务的线程, 即主线程

     dispatcher_thread.base = main_base;

     dispatcher_thread.thread_id = pthread_self();

     // 管道, libevent 通知用的

     // 一个 LIBEVENT_THREAD 结构体对象对应由一条子线程维护

     // 子线程通过读管道来接收主线程的命令（例如主线程接收到新连接，会往子线程的读管道写入字符'c'，子线程接收到命令就会做出相应的处理）

     for (i = ; i < nthreads; i++) {

         int fds[];

         if (pipe(fds)) {

             perror("Can't create notify pipe");

             exit();

         }

         // 读管道

         threads[i].notify_receive_fd = fds[];

         // 写管道

         threads[i].notify_send_fd = fds[];

         // 初始化线程信息数据结构, 其中就将 event 结构体的回调函数设置为 thread_libevent_process()，此时线程还没有创建

         setup_thread(&threads[i]);

         /* Reserve three fds for the libevent base, and two for the pipe */

         stats.reserved_fds += ;

     }

     /* Create threads after we've done all the libevent setup. */

     // 创建并初始化线程, 线程的代码都是 work_libevent()

     for (i = ; i < nthreads; i++) {

         // 调用 pthread_attr_init() 和 pthread_create() 来创建子线程

         // 子线程的函数入口 worker_libevent ，负责启动子线程的事件循环

         create_worker(worker_libevent, &threads[i]);

     }

     /* Wait for all the threads to set themselves up before returning. */

     pthread_mutex_lock(&init_lock);

     // wait_for_thread_registration() 是 pthread_cond_wait 的调用

     wait_for_thread_registration(nthreads);

     pthread_mutex_unlock(&init_lock);

 }

 /*

  * Set up a thread's information.

  */

  // 填充 LIBEVENT_THREAD 结构体, 其中包括:

  //     填充 struct event

  //     初始化线程工作队列

  //     初始化互斥量

  //     等

 static void setup_thread(LIBEVENT_THREAD *me) {

     // 子线程的事件机制，每条子线程都有一个事件机制

     me->base = event_init();

     if (! me->base) {

         fprintf(stderr, "Can't allocate event base\n");

         exit();

     }

     /* Listen for notifications from other threads */

     // 在线程数据结构初始化的时候, 为 me->notify_receive_fd 读管道注册读事件, 回调函数是 thread_libevent_process()

     // 为子线程的事件机制添加事件

     event_set(&me->notify_event, me->notify_receive_fd,

               EV_READ | EV_PERSIST, thread_libevent_process, me);

     event_base_set(me->base, &me->notify_event);

     if (event_add(&me->notify_event, ) == -) {

         fprintf(stderr, "Can't monitor libevent notify pipe\n");

         exit();

     }

     // ......

 }

 /*

  * Worker thread: main event loop

  * 线程函数入口, 启动事件循环

  */

 static void *worker_libevent(void *arg) {

     LIBEVENT_THREAD *me = arg;

     // ......

     // 进入事件循环

     event_base_loop(me->base, );

     return NULL;

 }

　　子线程读管道回调函数：

 /*

  * Processes an incoming "handle a new connection" item. This is called when

  * input arrives on the libevent wakeup pipe.

  *

  * 当管道有数据可读的时候会触发此函数的调用

  */

 static void thread_libevent_process(int fd, short which, void *arg) {

     LIBEVENT_THREAD *me = arg;

     CQ_ITEM *item;

     char buf[];

     if (read(fd, buf, ) != )

         if (settings.verbose > )

             fprintf(stderr, "Can't read from libevent pipe\n");

     switch (buf[]) {

     case 'c':

     // 表示主线程把一个新的连接分发给该子线程处理

     // 取出一个任务

     item = cq_pop(me->new_conn_queue);

     if (NULL != item) {

         // 为新的请求建立一个连接结构体. 连接其实已经建立, 这里只是为了填充连接结构体. 最关键的动作是在 libevent 中注册了事件, 回调函数是 event_handler()

         conn *c = conn_new(item->sfd, item->init_state, item->event_flags,

                            item->read_buffer_size, item->transport, me->base);

         if (c == NULL) {

             if (IS_UDP(item->transport)) {

                 fprintf(stderr, "Can't listen for events on UDP socket\n");

                 exit();

             } else {

                 if (settings.verbose > ) {

                     fprintf(stderr, "Can't listen for events on fd %d\n",

                         item->sfd);

                 }

                 close(item->sfd);

             }

         } else {

             c->thread = me;

         }

         cqi_free(item);

     }

         break;

     /* we were told to flip the lock type and report in */

     case 'l':

     me->item_lock_type = ITEM_LOCK_GRANULAR;

     register_thread_initialized();

         break;

     case 'g':

     me->item_lock_type = ITEM_LOCK_GLOBAL;

     register_thread_initialized();

         break;

     }

 }

　　第四步主要是初始化socket、绑定服务器端口和IP、为主线程事件机制添加监听连接事件：

 // memcached.c

 // server_sockets()->server_socket()

 static int server_socket(const char *interface,

                          int port,

                          enum network_transport transport,

                          FILE *portnumber_file) {

     // ......

     // getaddrinfo函数能够处理名字到地址以及服务到端口这两种转换，返回的是一个addrinfo的结构（列表）指针而不是一个地址清单。

     error= getaddrinfo(interface, port_buf, &hints, &ai);

     if (error != ) {

         if (error != EAI_SYSTEM)

           fprintf(stderr, "getaddrinfo(): %s\n", gai_strerror(error));

         else

           perror("getaddrinfo()");

         return ;

     }

     for (next= ai; next; next= next->ai_next) {

         conn *listen_conn_add;

         // new_socket() 申请了一个 UNIX 域套接字，通过调用socket()方法创建套接字，并设置把套接字为非阻塞

         if ((sfd = new_socket(next)) == -) {

             // ......

         }// if

         // ......

         // bind() 绑定源IP的端口

         if (bind(sfd, next->ai_addr, next->ai_addrlen) == -) {

             // ......

         } else {

             success++;

             // bind()调用成功后，调用listen()

             if (!IS_UDP(transport) && listen(sfd, settings.backlog) == -) {

                 // ......

             }

             // ......

         }

         // UDP 和 TCP 区分对待, UDP 没有连接概念, 只要绑定服务器之后, 直接读取 socket 就好了, 所以与它对应 conn 的初始状态应该为 conn_read; 而 TCP 对应的 conn 初始状态应该为 conn_listening

         if (IS_UDP(transport)) {

             // UDP

             int c;

             for (c = ; c < settings.num_threads_per_udp; c++) {

                 /* this is guaranteed to hit all threads because we round-robin */

                 // 分发新的连接到线程池中的一个线程中

                 dispatch_conn_new(sfd, conn_read, EV_READ | EV_PERSIST,

                                   UDP_READ_BUFFER_SIZE, transport);

             }

         } else {

             // TCP 要建立连接

             if (!(listen_conn_add = conn_new(sfd, conn_listening,

                                              EV_READ | EV_PERSIST, ,

                                              transport, main_base))) {

                 fprintf(stderr, "failed to create listening connection\n");

                 exit(EXIT_FAILURE);

             }

             // 放在头部, listen_conn 是头指针

             listen_conn_add->next = listen_conn;

             listen_conn = listen_conn_add;

         }

     }

     freeaddrinfo(ai);

     /* Return zero iff we detected no errors in starting up connections */

     return success == ;

 }

 // 填写 struct conn 结构体, 包括 struct conn 中的 event 结构, 并返回

 conn *conn_new(const int sfd, enum conn_states init_state,

                 const int event_flags,

                 const int read_buffer_size, enum network_transport transport,

                 struct event_base *base) {

     // c 指向一个新的 conn 空间

     // 可能是出于性能的考虑, memcached 预分配了若干个 struct conn 空间

     {

         /* data */

     };

     conn *c = conn_from_freelist();

     if (NULL == c) {

         // 可能分配失败了, 因为默认数量有限. 进行新的扩展，conn_init()中初始数量是200

         if (!(c = (conn *)calloc(, sizeof(conn)))) {

             fprintf(stderr, "calloc()\n");

             return NULL;

         }

         // ......

         // 填充conn结构体

     }// if

     // ......

     // libevent 操作: 设置事件, 设置回调函数 event_handler()

     event_set(&c->event, sfd, event_flags, event_handler, (void *)c);

     // libevent 操作:设置 c->event 的 event_base

     event_base_set(base, &c->event);

     c->ev_flags = event_flags;

     // libevent 操作: 添加事件

     if (event_add(&c->event, ) == -) {

         // ......

     }

     // ......

     return c;

 }

memcached学习笔记——连接模型的更多相关文章

memcached学习笔记——存储命令源码分析上篇
原创文章,转载请标明,谢谢. 上一篇分析过memcached的连接模型,了解memcached是如何高效处理客户端连接,这一篇分析memcached源码中的process_update_command ...
memcached学习笔记——存储命令源码分析下篇
上一篇回顾:<memcached学习笔记——存储命令源码分析上篇>通过分析memcached的存储命令源码的过程,了解了memcached如何解析文本命令和mencached的内存管理机制 ...
Memcached 学习笔记（二）——ruby调用
Memcached 学习笔记(二)——ruby调用上一节我们讲述了怎样安装memcached及memcached常用命令.这一节我们将通过ruby来调用memcached相关操作. 第一步,安装ru ...
ArcGIS案例学习笔记2_2_模型构建器和山顶点提取批处理
ArcGIS案例学习笔记2_2_模型构建器和山顶点提取批处理计划时间:第二天下午背景:数据量大,工程大目的:自动化,批处理,定制业务流程,不写程序教程:Pdf/343 数据:chap8/ex5 ...
Django：学习笔记(7)——模型进阶
Django:学习笔记(7)——模型进阶模型的继承我们在面向对象的编程中,一个很重要的的版块,就是类的继承.父类保存了所有子类共有的内容,子类通过继承它来减少冗余代码并进行灵活扩展. 在Djang ...
Django：学习笔记(6)——模型
Django:学习笔记(6)——模型快速上手模型到底是什么呢?我们可以想,如果一张数据表的各个字段可以自动映射到一个类的各个属性,则每条记录对应这个类的一个对象.那我们通过类方法来操作对象(即表记 ...
JVM学习笔记——内存模型篇
JVM学习笔记--内存模型篇在本系列内容中我们会对JVM做一个系统的学习,本片将会介绍JVM的内存模型部分我们会分为以下几部分进行介绍: 内存模型乐观锁与悲观锁 synchronized优化内 ...
JUC学习笔记——共享模型之管程
JUC学习笔记--共享模型之管程在本系列内容中我们会对JUC做一个系统的学习,本片将会介绍JUC的管程部分我们会分为以下几部分进行介绍: 共享问题共享问题解决方案线程安全分析 Monitor ...
JUC学习笔记——共享模型之内存
JUC学习笔记--共享模型之内存在本系列内容中我们会对JUC做一个系统的学习,本片将会介绍JUC的内存部分我们会分为以下几部分进行介绍: Java内存模型可见性模式之两阶段终止模式之Balk ...

随机推荐

Unix和Linux下C语言学习指南
转自:http://www.linuxdiyf.com/viewarticle.php?id=174074 Unix和Linux下C语言学习指南引言尽管 C 语言问世已近 30 年,但它的魅力仍未 ...
解读《2014 最流行编程语言》 by Code Eval
此文已转至http://cn.abnerchou.me/2014/02/23/fa87ae80/ 原文:点我原文翻译:点我首先了解下CodeEval是做什么的: A tool for auto e ...
Altium Designer 等长线&&蛇形线
Altium Designer 里面怎么画等长线 (1)一般是将走线布完后,新建一个class. Design -> Classes 如上图添加完后可以点击close. (2)快捷键 T + R ...
Delphi判断进程是否存在（使用CreateToolhelp32Snapshot）
program Project2; uses windows,TLHelp32; function FindProcess(AFileName:string):boolean; var hSnap ...
关于Action返回结果类型的事儿（下）
原文:关于Action返回结果类型的事儿(下) using System; using System.Collections.Generic; using System.Linq; using ...
8.2.1.4 Index Merge Optimization 索引合并优化:
8.2.1.4 Index Merge Optimization 索引合并优化: 索引合并方法是用于检索记录使用多个范围扫描和合并它们的结果集到一起 mysql> show index fr ...
linux内存管理子系统
一.Linux内存管理模型 1.虚拟地址与物理地址的映射 2.物理地址的分配二.虚拟地址与物理地址的映射 1.虚拟地址空间分布 32位处理器有32根地址总线,可访问4G的物理空间.其中有0-3G为用户 ...
Java中setCharAt()方法介绍
--转载自网络,备忘这是StringBuffer类里面的一个方法:主要是用来替换的,方法里面有两个参数setCharAt(int index,Char ch),第一个参数是取代的位置索引从0开始 ...
[置顶] Android的IPC访问控制设计与实现
3.3.1 IPC钩子函数设计与实现 IPC Binder是Android最重要的进程间通信机制,因此,必须在此实施强制访问控制. 1. 修改secuirty.h 打开终端shell,输入指令“cd ...
VC用OLE方式读写Excel
前几天要做一个项目,需要读取Excel中的数据.从网上查资料发现,主要是有两种方式.一是把Excel表当成数据库使用ODBC读写,这样操作起来就跟操作Access数据库似的.但这种方式效率比较低.另一 ...

memcached学习笔记——连接模型

memcached学习笔记——连接模型的更多相关文章

随机推荐

热门专题