Redis 源码走读(一)事件驱动机制与命令处理
eventloop
从 server.c 的 main 方法看起
int main(int argc, char **argv) {
....... aeSetBeforeSleepProc(server.el,beforeSleep);
aeSetAfterSleepProc(server.el,afterSleep);
aeMain(server.el);
aeDeleteEventLoop(server.el);
return ;
}
aeMain.c
//在死循环中调用 aeProcessEvents 方法,处理可以执行的 time event 与 file event
// 在 server.c 的 main 函数中会被调用
void aeMain(aeEventLoop *eventLoop) {
eventLoop->stop = ;
while (!eventLoop->stop) {
if (eventLoop->beforesleep != NULL)
eventLoop->beforesleep(eventLoop);
aeProcessEvents(eventLoop, AE_ALL_EVENTS|AE_CALL_AFTER_SLEEP);
}
}
/* Process every pending time event, then every pending file event
* (that may be registered by time event callbacks just processed).
* Without special flags the function sleeps until some file event
* fires, or when the next time event occurs (if any).
*
* If flags is 0, the function does nothing and returns.
* if flags has AE_ALL_EVENTS set, all the kind of events are processed.
* if flags has AE_FILE_EVENTS set, file events are processed.
* if flags has AE_TIME_EVENTS set, time events are processed.
* if flags has AE_DONT_WAIT set the function returns ASAP until all
* if flags has AE_CALL_AFTER_SLEEP set, the aftersleep callback is called.
* the events that's possible to process without to wait are processed.
*
* The function returns the number of events processed. */
int aeProcessEvents(aeEventLoop *eventLoop, int flags)
{
........ /* Note that we want call select() even if there are no
* file events to process as long as we want to process time
* events, in order to sleep until the next time event is ready
* to fire. */
//优先执行 time event
if (eventLoop->maxfd != - ||
((flags & AE_TIME_EVENTS) && !(flags & AE_DONT_WAIT))) {
int j;
aeTimeEvent *shortest = NULL;
struct timeval tv, *tvp; if (flags & AE_TIME_EVENTS && !(flags & AE_DONT_WAIT))
//找到time event 链表里,最近的 time event
shortest = aeSearchNearestTimer(eventLoop);
//计算从现在起到这个time event 被执行,要等待多久
if (shortest) {
long now_sec, now_ms; aeGetTime(&now_sec, &now_ms);
tvp = &tv; /* How many milliseconds we need to wait for the next
* time event to fire? */
long long ms =
(shortest->when_sec - now_sec)* +
shortest->when_ms - now_ms; if (ms > ) {
tvp->tv_sec = ms/;
tvp->tv_usec = (ms % )*;
} else {
tvp->tv_sec = ;
tvp->tv_usec = ;
}
} else {
/* If we have to check for events but need to return
* ASAP because of AE_DONT_WAIT we need to set the timeout
* to zero */
if (flags & AE_DONT_WAIT) {
tv.tv_sec = tv.tv_usec = ;
tvp = &tv;
} else {
/* Otherwise we can block */
tvp = NULL; /* wait forever */
}
} /* Call the multiplexing API, will return only on timeout or when
* some event fires. */
//调用 IO 多路复用的代码,找到可读写的 file event
numevents = aeApiPoll(eventLoop, tvp); /* After sleep callback. */
if (eventLoop->aftersleep != NULL && flags & AE_CALL_AFTER_SLEEP)
eventLoop->aftersleep(eventLoop); //遍历 event loop 的 fired 数组对应的 fd
for (j = ; j < numevents; j++) {
aeFileEvent *fe = &eventLoop->events[eventLoop->fired[j].fd];
int mask = eventLoop->fired[j].mask;//记录了事件类型:read/write
int fd = eventLoop->fired[j].fd;//事件的 fd
int fired = ; /* Number of events fired for current fd. */ /* Normally we execute the readable event first, and the writable
* event laster. This is useful as sometimes we may be able
* to serve the reply of a query immediately after processing the
* query.
*
* However if AE_BARRIER is set in the mask, our application is
* asking us to do the reverse: never fire the writable event
* after the readable. In such a case, we invert the calls.
* This is useful when, for instance, we want to do things
* in the beforeSleep() hook, like fsynching a file to disk,
* before replying to a client. */
int invert = fe->mask & AE_BARRIER; /* Note the "fe->mask & mask & ..." code: maybe an already
* processed event removed an element that fired and we still
* didn't processed, so we check if the event is still valid.
*
* Fire the readable event if the call sequence is not
* inverted. */
if (!invert && fe->mask & mask & AE_READABLE) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
} /* Fire the writable event. */
if (fe->mask & mask & AE_WRITABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->wfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
} /* If we have to invert the call, fire the readable event now
* after the writable one. */
if (invert && fe->mask & mask & AE_READABLE) {
if (!fired || fe->wfileProc != fe->rfileProc) {
fe->rfileProc(eventLoop,fd,fe->clientData,mask);
fired++;
}
} processed++;
}
}
/* Check time events */
if (flags & AE_TIME_EVENTS)
processed += processTimeEvents(eventLoop); return processed; /* return the number of processed file/time events */
}
标准的事件驱动框架,在死循环中调用aeProcessEvents方法
aeProcessEvents 方法比较长,里面会处理两种事件TimeEvent 与 FileEvent,本文关注的重点是 FileEvent
aeProcessEvents 调用 aeApiPoll 方法来查找监听的 fd 上有哪些是可用的,找到可用的 fd 之后,根据 fd 的事件类型,决定调用 wfileProc 还是rfileProc 来处理相关的事件, 本文里我们关心的是 client 发来的 command 会被如何处理,那就是rfileProc了,rfileProc的设置过程在后文中被提及
aeApiPoll 在多个文件中被实现,Redis 用条件编译的手法决定采用哪种实现,很有意思
/* Include the best multiplexing layer supported by this system.
* The following should be ordered by performances, descending. */
//用宏实现编译期重载,很稳
#ifdef HAVE_EVPORT
#include "ae_evport.c"
#else
#ifdef HAVE_EPOLL
#include "ae_epoll.c"
#else
#ifdef HAVE_KQUEUE
#include "ae_kqueue.c"
#else
#include "ae_select.c"
#endif
#endif
#endif
就看最经典的 epoll 好了:
typedef struct aeApiState {
int epfd;
struct epoll_event *events;
} aeApiState; //创建eventloop
static int aeApiCreate(aeEventLoop *eventLoop) {
aeApiState *state = zmalloc(sizeof(aeApiState)); if (!state) return -;
state->events = zmalloc(sizeof(struct epoll_event)*eventLoop->setsize);
if (!state->events) {
zfree(state);
return -;
}
state->epfd = epoll_create(); /* 1024 is just a hint for the kernel */
if (state->epfd == -) {
zfree(state->events);
zfree(state);
return -;
}
eventLoop->apidata = state;
return ;
} static int aeApiAddEvent(aeEventLoop *eventLoop, int fd, int mask) {
aeApiState *state = eventLoop->apidata;
struct epoll_event ee = {}; /* avoid valgrind warning */
/* If the fd was already monitored for some event, we need a MOD
* operation. Otherwise we need an ADD operation. */
int op = eventLoop->events[fd].mask == AE_NONE ?
EPOLL_CTL_ADD : EPOLL_CTL_MOD;//epoll_ctl函数的 op 参数的可能的取值:EPOLL_CTL_ADD 注册、EPOLL_CTL_MOD 修 改、EPOLL_CTL_DEL 删除 ee.events = ;
//同时修改 eventLoop 里 event 的 mask 标记,和关联的 epoll fd 所监听的事件集合
mask |= eventLoop->events[fd].mask; /* Merge old events */
if (mask & AE_READABLE) ee.events |= EPOLLIN;
if (mask & AE_WRITABLE) ee.events |= EPOLLOUT;
ee.data.fd = fd;
if (epoll_ctl(state->epfd,op,fd,&ee) == -) return -;
return ;
} //传入的 tvp 是 epoll 超时时间,如果 tvp 为 null,则永久阻塞
static int aeApiPoll(aeEventLoop *eventLoop, struct timeval *tvp) {
aeApiState *state = eventLoop->apidata;
int retval, numevents = ; retval = epoll_wait(state->epfd,state->events,eventLoop->setsize,
tvp ? (tvp->tv_sec* + tvp->tv_usec/) : -);
if (retval > ) {
int j; numevents = retval;
//遍历可读写的 fd
for (j = ; j < numevents; j++) {
int mask = ;
struct epoll_event *e = state->events+j; if (e->events & EPOLLIN) mask |= AE_READABLE;
if (e->events & EPOLLOUT) mask |= AE_WRITABLE;
if (e->events & EPOLLERR) mask |= AE_WRITABLE;
if (e->events & EPOLLHUP) mask |= AE_WRITABLE; //设置 eventLoop.fired 数组里的元素,这些元素代表可读写的 fd
eventLoop->fired[j].fd = e->data.fd;
eventLoop->fired[j].mask = mask;
}
}
return numevents;
}
代码不算复杂,实际上对系统调用做了一层简单的封装
调用 epoll_ctl 方法来注册监听 fd
调用 epoll_wait 方法来等待,直到被监听的 fd 上有事件发生为止
比较有趣的做法是aeFileEvent 结构体里定义了一个 mask 属性来记录这个 fd 被监听的事件,应该是为了便于后续查找。
新 client 建立连接
networking.c
client *createClient(int fd) {
client *c = zmalloc(sizeof(client)); //fd == -1,说明这是一个用于执行 lua 脚本的无连接的伪客户端,可以省去一些开销
/* passing -1 as fd it is possible to create a non connected client.
* This is useful since all the commands needs to be executed
* in the context of a client. When commands are executed in other
* contexts (for instance a Lua script) we need a non connected client. */
if (fd != -) {
anetNonBlock(NULL,fd);//将这个 fd 设为 non block 模式
anetEnableTcpNoDelay(NULL,fd);//调用 setsockopt 方法,禁止使用nagle 算法,确保数据包能尽可能快速的发出去
if (server.tcpkeepalive)
anetKeepAlive(NULL,fd,server.tcpkeepalive);
// 给这个 client 关联的 fd 注册 read 事件处理函数:readQueryFromClient,其定义在文件尾部
if (aeCreateFileEvent(server.el,fd,AE_READABLE,
readQueryFromClient, c) == AE_ERR)
{
close(fd);
zfree(c);
return NULL;
}
}
调用 aeCreateFileEvent 方法给这个 fd 注册 read 事件处理函数 readQueryFromClient,也就是设置到这个 fd 的 rfileProc 属性里
int aeCreateFileEvent(aeEventLoop *eventLoop, int fd, int mask,
aeFileProc *proc, void *clientData)
{
if (fd >= eventLoop->setsize) {
errno = ERANGE;
return AE_ERR;
}
aeFileEvent *fe = &eventLoop->events[fd]; if (aeApiAddEvent(eventLoop, fd, mask) == -)
return AE_ERR;
fe->mask |= mask;
if (mask & AE_READABLE) fe->rfileProc = proc;
if (mask & AE_WRITABLE) fe->wfileProc = proc;
fe->clientData = clientData;
if (fd > eventLoop->maxfd)
eventLoop->maxfd = fd;
return AE_OK;
}
当 client 发送 command 过来的时候,eventloop 会发现这个 fd 可读,然后调用 readQueryFromClient 进行处理
处理client 发送的 command
//回调函数,这个函数被触发的时候,说明 client 触发了 read 事件
void readQueryFromClient(aeEventLoop *el, int fd, void *privdata, int mask) {
.....
/* Time to process the buffer. If the client is a master we need to
* compute the difference between the applied offset before and after
* processing the buffer, to understand how much of the replication stream
* was actually applied to the master state: this quantity, and its
* corresponding part of the replication stream, will be propagated to
* the sub-slaves and to the replication backlog. */
if (!(c->flags & CLIENT_MASTER)) {
processInputBuffer(c);//非 master
} else {
//本机为 master,除了处理 buffer 里的命令,还要解决主从复制的问题
size_t prev_offset = c->reploff;
processInputBuffer(c);
size_t applied = c->reploff - prev_offset;
if (applied) {
replicationFeedSlavesFromMasterStream(server.slaves,
c->pending_querybuf, applied);
sdsrange(c->pending_querybuf,applied,-);
}
}
}
当 fd 可读时,eventloop 会触发 readQueryFromClient 这个回调函数,再调用 processInputBuffer 函数
/* This function is called every time, in the client structure 'c', there is
* more query buffer to process, because we read more data from the socket
* or because a client was blocked and later reactivated, so there could be
* pending query buffer, already representing a full command, to process. */
void processInputBuffer(client *c) {
..... if (c->reqtype == PROTO_REQ_INLINE) {
if (processInlineBuffer(c) != C_OK) break;
} else if (c->reqtype == PROTO_REQ_MULTIBULK) {
if (processMultibulkBuffer(c) != C_OK) break;
} else {
serverPanic("Unknown request type");
} /* Multibulk processing could see a <= 0 length. */
if (c->argc == ) {
resetClient(c);
} else {
/* Only reset the client when the command was executed. */
//终于开始执行 command 了
if (processCommand(c) == C_OK) {
if (c->flags & CLIENT_MASTER && !(c->flags & CLIENT_MULTI)) {
/* Update the applied replication offset of our master. */
c->reploff = c->read_reploff - sdslen(c->querybuf);
} /* Don't reset the client structure for clients blocked in a
* module blocking command, so that the reply callback will
* still be able to access the client argv and argc field.
* The client will be reset in unblockClientFromModule(). */
if (!(c->flags & CLIENT_BLOCKED) || c->btype != BLOCKED_MODULE)
resetClient(c);
}
/* freeMemoryIfNeeded may flush slave output buffers. This may
* result into a slave, that may be the active client, to be
* freed. */
if (server.current_client == NULL) break;
}
}
server.current_client = NULL;
}
调用processCommand 方法,顾名思义,里面会对 client 发来的指令做处理
其实现位于server.c 里
/* If this function gets called we already read a whole
* command, arguments are in the client argv/argc fields.
* processCommand() execute the command or prepare the
* server for a bulk read from the client.
*
* If C_OK is returned the client is still alive and valid and
* other operations can be performed by the caller. Otherwise
* if C_ERR is returned the client was destroyed (i.e. after QUIT). */
int processCommand(client *c) {
......
/* Now lookup the command and check ASAP about trivial error conditions
* such as wrong arity, bad command name and so forth. */
// 从 command dict 里查找对应的 command 实现,
c->cmd = c->lastcmd = lookupCommand(c->argv[]->ptr);
//检查 command 是否存在,以及参数的数量是否正确
if (!c->cmd) {
flagTransaction(c);
addReplyErrorFormat(c,"unknown command '%s'",
(char*)c->argv[]->ptr);
return C_OK;
} else if ((c->cmd->arity > && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
flagTransaction(c);
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
c->cmd->name);
return C_OK;
}
..... //前面是检查参数和处理各种异常情况
/* Exec the command */
//如果处在 multi 命令开启的事务环境中
if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
{
//把命令放到 queue 里
queueMultiCommand(c);
addReply(c,shared.queued);
} else {
//执行非事务,普通命令,实现位于本文件的2200多行
call(c,CMD_CALL_FULL);
c->woff = server.master_repl_offset;
if (listLength(server.ready_keys))
handleClientsBlockedOnKeys();
}
return C_OK;
}
这个方法有两个关键点:
1. 调用 lookupCommand 方法查找 client 提交的 command 对应的实现(redis server 启动的时候会初始化一个 dict,里面存放了 command 名称到实现函数的映射关系,去这个 dict 里查就好了)
2. 执行函数,我们先不关注事务,只看最简单的普通命令,那么会调用call 方法
其实现位于 server.c 里
void call(client *c, int flags) {
......
/* Call the command. */
dirty = server.dirty;
start = ustime();
c->cmd->proc(c);//执行命令
duration = ustime()-start;//计算命令执行时间
dirty = server.dirty-dirty;
if (dirty < ) dirty = ;
....
}
主要是用 cmd 的 proc 属性,一个函数指针来完成实际操作
至于 cmd 和它的 proc 属性,是在上一步的 lookupCommand 方法里被设置的。
例如最简单的 get 方法,就对应于getCommand 这个方法:
{"get",getCommand,,"rF",,NULL,,,,,},
其具体实现位于t_string.c 里,细节暂时就不跟进了。
现在我们就大致上能理解client 发送的 command 的流转过程了。
Redis 源码走读(一)事件驱动机制与命令处理的更多相关文章
- Redis 源码简洁剖析 12 - 一条命令的处理过程
命令的处理过程 Redis server 和一个客户端建立连接后,会在事件驱动框架中注册可读事件--客户端的命令请求.命令处理对应 4 个阶段: 命令读取:对应 readQueryFromClient ...
- redis源码解析之事件驱动
Redis 内部有个小型的事件驱动,它主要处理两项任务: 文件事件:使用I/O多路复用技术处理多个客户端请求,并返回执行结果. 时间事件:维护服务器的资源管理,状态检查. 主要的数据结构包括文件事件结 ...
- Redis 源码走读(二)对象系统
Redis设计了多种数据结构,并以此为基础构建了多种对象,每种对象(除了新出的 stream 以外)都有超过一种的实现. redisObject 这个结构体反应了 Redis 对象的内存布局 type ...
- Redis源码解析:13Redis中的事件驱动机制
Redis中,处理网络IO时,采用的是事件驱动机制.但它没有使用libevent或者libev这样的库,而是自己实现了一个非常简单明了的事件驱动库ae_event,主要代码仅仅400行左右. 没有选择 ...
- Redis源码阅读(一)事件机制
Redis源码阅读(一)事件机制 Redis作为一款NoSQL非关系内存数据库,具有很高的读写性能,且原生支持的数据类型丰富,被广泛的作为缓存.分布式数据库.消息队列等应用.此外Redis还有许多高可 ...
- spring-data-redis-cache 使用及源码走读
预期读者 准备使用 spring 的 data-redis-cache 的同学 了解 @CacheConfig,@Cacheable,@CachePut,@CacheEvict,@Caching 的使 ...
- Redis 源码简洁剖析 09 - Reactor 模型
Reactor 模型 事件驱动框架 Redis 如何实现 Reactor 模型 事件的数据结构:aeFileEvent 主循环:aeMain 函数 事件捕获与分发:aeProcessEvents 函数 ...
- Redis 源码简洁剖析 10 - aeEventLoop 及事件
aeEventLoop IO 事件处理 IO 事件创建 读事件处理 写事件处理 时间事件处理 时间事件定义 时间事件创建 时间事件回调函数 时间事件的触发处理 参考链接 Redis 源码简洁剖析系列 ...
- Redis 源码简洁剖析 11 - 主 IO 线程及 Redis 6.0 多 IO 线程
Redis 到底是不是单线程的程序? 多 IO 线程的初始化 IO 线程运行函数 IOThreadMain 如何推迟客户端「读」操作? 如何推迟客户端「写」操作? 如何把待「读」客户端分配给 IO 线 ...
随机推荐
- 获取web服务器路径的方法 getResourceAsStream
1.先获取 serlvetContext对象 2.调用getResourceAsStream 在方法里 "\"表示当前web的根目录 还要拼接上具体的文件路径 ServletC ...
- float与定位的区别
float和绝对定位的区别 CSS中脱离文档流,也就是将元素从普通的布局排版中拿走,其他盒子在定位的时候,会当做脱离文档流的元素不存在而进行定位.1 需要注意的是,使用float脱离文档流时,其他盒子 ...
- P2127 序列排序
题目描述 小C有一个N个数的整数序列,这个序列的中的数两两不同.小C每次可以交换序列中的任意两个数,代价为这两个数之和.小C希望将整个序列升序排序,问小C需要的最小代价是多少? 输入输出格式 输入格式 ...
- IIS注册asp.net4.0
1. 运行->cmd 2. cd C:\Windows\Microsoft.NET\Framework64\v4.0.30319 3. aspnet_regiis.exe -i
- 斜率dp+cdq分治
写在前面 这个东西应该是一个非常重要的套路......所以我觉得必须写点什么记录一下,免得自己忘掉了 一直以来我的斜率dp都掌握的不算很好......也很少主动地在比赛里想到 写这个的契机是noi.a ...
- Laravel中Redis的使用
安装 laravel中使用redis首先需要你通过 Composer 安装 predis/predis 包: composer require predis/predis 配置 redis的配置文件是 ...
- [Leetcode] subsets 求数组所有的子集
Given a set of distinct integers, S, return all possible subsets. Note: Elements in a subset must be ...
- Good Substrings CodeForces - 271D
You've got string s, consisting of small English letters. Some of the English letters are good, the ...
- bzoj 4624 农场种植 fft
4624: 农场种植 Time Limit: 50 Sec Memory Limit: 512 MBSubmit: 48 Solved: 31[Submit][Status][Discuss] D ...
- 对Office文档进行授权
Microsoft.Office.Interop.Word.ApplicationClass app = new Microsoft.Office.Interop.Word.ApplicationCl ...