上一篇文章,我们从框架层面,主要介绍了redis的启动过程,以及主要的命令处理流程逻辑。这些更多的都是些差不多的道理,而要细了解redis,则需要更细节的东西。

  今天我们稍微内围的角度,来看看几个命令执行的重要方法,深入理解下redis的魅力所在。

首先,我们通过上一章知道,processCommand 是其业务主要入口,我们再来回顾下:

// server.c
/* If this function gets called we already read a whole
* command, arguments are in the client argv/argc fields.
* processCommand() execute the command or prepare the
* server for a bulk read from the client.
*
* If C_OK is returned the client is still alive and valid and
* other operations can be performed by the caller. Otherwise
* if C_ERR is returned the client was destroyed (i.e. after QUIT). */
int processCommand(client *c) {
/* The QUIT command is handled separately. Normal command procs will
* go through checking for replication and QUIT will cause trouble
* when FORCE_REPLICATION is enabled and would be implemented in
* a regular command proc. */
// 如果是 quit 命令,直接回复ok即可,由客户端主动关闭请求
if (!strcasecmp(c->argv[]->ptr,"quit")) {
addReply(c,shared.ok);
c->flags |= CLIENT_CLOSE_AFTER_REPLY;
return C_ERR;
} /* Now lookup the command and check ASAP about trivial error conditions
* such as wrong arity, bad command name and so forth. */
// 查找命令信息,根据第一个字符串进行查找,这个我们主要看看
c->cmd = c->lastcmd = lookupCommand(c->argv[]->ptr);
// 验证命令查找是否找到,以及参数个数是否匹配,否则直接响应返回
if (!c->cmd) {
flagTransaction(c);
addReplyErrorFormat(c,"unknown command '%s'",
(char*)c->argv[]->ptr);
return C_OK;
} else if ((c->cmd->arity > && c->cmd->arity != c->argc) ||
(c->argc < -c->cmd->arity)) {
flagTransaction(c);
addReplyErrorFormat(c,"wrong number of arguments for '%s' command",
c->cmd->name);
return C_OK;
} /* Check if the user is authenticated */
// 权限判定,只有先授权后,才能执行后续命令(auth 除外)
if (server.requirepass && !c->authenticated && c->cmd->proc != authCommand)
{
flagTransaction(c);
addReply(c,shared.noautherr);
return C_OK;
} /* If cluster is enabled perform the cluster redirection here.
* However we don't perform the redirection if:
* 1) The sender of this command is our master.
* 2) The command has no key arguments. */
// 集群非master写请求转移
// 此处可见 flags 设计的重要性,代表了n多属性
if (server.cluster_enabled &&
!(c->flags & CLIENT_MASTER) &&
!(c->flags & CLIENT_LUA &&
server.lua_caller->flags & CLIENT_MASTER) &&
!(c->cmd->getkeys_proc == NULL && c->cmd->firstkey == ))
{
int hashslot; if (server.cluster->state != CLUSTER_OK) {
flagTransaction(c);
clusterRedirectClient(c,NULL,,CLUSTER_REDIR_DOWN_STATE);
return C_OK;
} else {
int error_code;
clusterNode *n = getNodeByQuery(c,c->cmd,c->argv,c->argc,&hashslot,&error_code);
if (n == NULL || n != server.cluster->myself) {
flagTransaction(c);
clusterRedirectClient(c,n,hashslot,error_code);
return C_OK;
}
}
} /* Handle the maxmemory directive.
*
* First we try to free some memory if possible (if there are volatile
* keys in the dataset). If there are not the only thing we can do
* is returning an error. */
// 最大内存检查,释放
if (server.maxmemory) {
int retval = freeMemoryIfNeeded();
/* freeMemoryIfNeeded may flush slave output buffers. This may result
* into a slave, that may be the active client, to be freed. */
if (server.current_client == NULL) return C_ERR; /* It was impossible to free enough memory, and the command the client
* is trying to execute is denied during OOM conditions? Error. */
if ((c->cmd->flags & CMD_DENYOOM) && retval == C_ERR) {
flagTransaction(c);
addReply(c, shared.oomerr);
return C_OK;
}
} /* Don't accept write commands if there are problems persisting on disk
* and if this is a master instance. */
// 持久化异常检测,不接受写操作,不接受 ping
if (((server.stop_writes_on_bgsave_err &&
server.saveparamslen > &&
server.lastbgsave_status == C_ERR) ||
server.aof_last_write_status == C_ERR) &&
server.masterhost == NULL &&
(c->cmd->flags & CMD_WRITE ||
c->cmd->proc == pingCommand))
{
flagTransaction(c);
if (server.aof_last_write_status == C_OK)
addReply(c, shared.bgsaveerr);
else
addReplySds(c,
sdscatprintf(sdsempty(),
"-MISCONF Errors writing to the AOF file: %s\r\n",
strerror(server.aof_last_write_errno)));
return C_OK;
} /* Don't accept write commands if there are not enough good slaves and
* user configured the min-slaves-to-write option. */
// slave 数量不够时,不接受写操作
if (server.masterhost == NULL &&
server.repl_min_slaves_to_write &&
server.repl_min_slaves_max_lag &&
c->cmd->flags & CMD_WRITE &&
server.repl_good_slaves_count < server.repl_min_slaves_to_write)
{
flagTransaction(c);
addReply(c, shared.noreplicaserr);
return C_OK;
} /* Don't accept write commands if this is a read only slave. But
* accept write commands if this is our master. */
// 只读 slave 不接受写操作
if (server.masterhost && server.repl_slave_ro &&
!(c->flags & CLIENT_MASTER) &&
c->cmd->flags & CMD_WRITE)
{
addReply(c, shared.roslaveerr);
return C_OK;
} /* Only allow SUBSCRIBE and UNSUBSCRIBE in the context of Pub/Sub */
// pub/sub 模式仅接受极少数命令
if (c->flags & CLIENT_PUBSUB &&
c->cmd->proc != pingCommand &&
c->cmd->proc != subscribeCommand &&
c->cmd->proc != unsubscribeCommand &&
c->cmd->proc != psubscribeCommand &&
c->cmd->proc != punsubscribeCommand) {
addReplyError(c,"only (P)SUBSCRIBE / (P)UNSUBSCRIBE / PING / QUIT allowed in this context");
return C_OK;
} /* Only allow INFO and SLAVEOF when slave-serve-stale-data is no and
* we are a slave with a broken link with master. */
// 复制连接超时,slave-serve-stale-data: on 时可以处理请求
if (server.masterhost && server.repl_state != REPL_STATE_CONNECTED &&
server.repl_serve_stale_data == &&
!(c->cmd->flags & CMD_STALE))
{
flagTransaction(c);
addReply(c, shared.masterdownerr);
return C_OK;
} /* Loading DB? Return an error if the command has not the
* CMD_LOADING flag. */
// db 还在加载中,不接受任何请求
if (server.loading && !(c->cmd->flags & CMD_LOADING)) {
addReply(c, shared.loadingerr);
return C_OK;
} /* Lua script too slow? Only allow a limited number of commands. */
// lua 脚本执行缓慢时,仅接受少数命令
if (server.lua_timedout &&
c->cmd->proc != authCommand &&
c->cmd->proc != replconfCommand &&
!(c->cmd->proc == shutdownCommand &&
c->argc == &&
tolower(((char*)c->argv[]->ptr)[]) == 'n') &&
!(c->cmd->proc == scriptCommand &&
c->argc == &&
tolower(((char*)c->argv[]->ptr)[]) == 'k'))
{
flagTransaction(c);
addReply(c, shared.slowscripterr);
return C_OK;
} /* Exec the command */
if (c->flags & CLIENT_MULTI &&
c->cmd->proc != execCommand && c->cmd->proc != discardCommand &&
c->cmd->proc != multiCommand && c->cmd->proc != watchCommand)
{
// 事务型命令,将命令入队
queueMultiCommand(c);
addReply(c,shared.queued);
} else {
// 非事务性命令,直接处理请求
call(c,CMD_CALL_FULL);
c->woff = server.master_repl_offset;
// 如果有待处理事件,继续处理
if (listLength(server.ready_keys))
handleClientsBlockedOnLists();
}
return C_OK;
}

零、redis中的几个关键数据结构


1. redisServer

  redisServer 是redis中最大的全局变量,负责保存客户端,配置信息,db 等等各种重要信息。各函数之间通信,也是隐藏的使用 redisServer 进行通信。它的定义是在 server.h 中,而 实例化则是在 server.c 中。

struct redisServer {
/* General */
pid_t pid; /* Main process pid. */
char *configfile; /* Absolute config file path, or NULL */
char *executable; /* Absolute executable file path. */
char **exec_argv; /* Executable argv vector (copy). */
int hz; /* serverCron() calls frequency in hertz */
redisDb *db;
dict *commands; /* Command table */
dict *orig_commands; /* Command table before command renaming. */
aeEventLoop *el;
unsigned lruclock:LRU_BITS; /* Clock for LRU eviction */
int shutdown_asap; /* SHUTDOWN needed ASAP */
int activerehashing; /* Incremental rehash in serverCron() */
char *requirepass; /* Pass for AUTH command, or NULL */
char *pidfile; /* PID file path */
int arch_bits; /* 32 or 64 depending on sizeof(long) */
int cronloops; /* Number of times the cron function run */
char runid[CONFIG_RUN_ID_SIZE+]; /* ID always different at every exec. */
int sentinel_mode; /* True if this instance is a Sentinel. */
/* Networking */
int port; /* TCP listening port */
int tcp_backlog; /* TCP listen() backlog */
char *bindaddr[CONFIG_BINDADDR_MAX]; /* Addresses we should bind to */
int bindaddr_count; /* Number of addresses in server.bindaddr[] */
char *unixsocket; /* UNIX socket path */
mode_t unixsocketperm; /* UNIX socket permission */
int ipfd[CONFIG_BINDADDR_MAX]; /* TCP socket file descriptors */
int ipfd_count; /* Used slots in ipfd[] */
int sofd; /* Unix socket file descriptor */
int cfd[CONFIG_BINDADDR_MAX];/* Cluster bus listening socket */
int cfd_count; /* Used slots in cfd[] */
list *clients; /* List of active clients */
list *clients_to_close; /* Clients to close asynchronously */
list *clients_pending_write; /* There is to write or install handler. */
list *slaves, *monitors; /* List of slaves and MONITORs */
client *current_client; /* Current client, only used on crash report */
int clients_paused; /* True if clients are currently paused */
mstime_t clients_pause_end_time; /* Time when we undo clients_paused */
char neterr[ANET_ERR_LEN]; /* Error buffer for anet.c */
dict *migrate_cached_sockets;/* MIGRATE cached sockets */
uint64_t next_client_id; /* Next client unique ID. Incremental. */
int protected_mode; /* Don't accept external connections. */
/* RDB / AOF loading information */
int loading; /* We are loading data from disk if true */
off_t loading_total_bytes;
off_t loading_loaded_bytes;
time_t loading_start_time;
off_t loading_process_events_interval_bytes;
/* Fast pointers to often looked up command */
struct redisCommand *delCommand, *multiCommand, *lpushCommand, *lpopCommand,
*rpopCommand, *sremCommand, *execCommand;
/* Fields used only for stats */
time_t stat_starttime; /* Server start time */
long long stat_numcommands; /* Number of processed commands */
long long stat_numconnections; /* Number of connections received */
long long stat_expiredkeys; /* Number of expired keys */
long long stat_evictedkeys; /* Number of evicted keys (maxmemory) */
long long stat_keyspace_hits; /* Number of successful lookups of keys */
long long stat_keyspace_misses; /* Number of failed lookups of keys */
size_t stat_peak_memory; /* Max used memory record */
long long stat_fork_time; /* Time needed to perform latest fork() */
double stat_fork_rate; /* Fork rate in GB/sec. */
long long stat_rejected_conn; /* Clients rejected because of maxclients */
long long stat_sync_full; /* Number of full resyncs with slaves. */
long long stat_sync_partial_ok; /* Number of accepted PSYNC requests. */
long long stat_sync_partial_err;/* Number of unaccepted PSYNC requests. */
list *slowlog; /* SLOWLOG list of commands */
long long slowlog_entry_id; /* SLOWLOG current entry ID */
long long slowlog_log_slower_than; /* SLOWLOG time limit (to get logged) */
unsigned long slowlog_max_len; /* SLOWLOG max number of items logged */
size_t resident_set_size; /* RSS sampled in serverCron(). */
long long stat_net_input_bytes; /* Bytes read from network. */
long long stat_net_output_bytes; /* Bytes written to network. */
/* The following two are used to track instantaneous metrics, like
* number of operations per second, network traffic. */
struct {
long long last_sample_time; /* Timestamp of last sample in ms */
long long last_sample_count;/* Count in last sample */
long long samples[STATS_METRIC_SAMPLES];
int idx;
} inst_metric[STATS_METRIC_COUNT];
/* Configuration */
int verbosity; /* Loglevel in redis.conf */
int maxidletime; /* Client timeout in seconds */
int tcpkeepalive; /* Set SO_KEEPALIVE if non-zero. */
int active_expire_enabled; /* Can be disabled for testing purposes. */
size_t client_max_querybuf_len; /* Limit for client query buffer length */
int dbnum; /* Total number of configured DBs */
int supervised; /* 1 if supervised, 0 otherwise. */
int supervised_mode; /* See SUPERVISED_* */
int daemonize; /* True if running as a daemon */
clientBufferLimitsConfig client_obuf_limits[CLIENT_TYPE_OBUF_COUNT];
/* AOF persistence */
int aof_state; /* AOF_(ON|OFF|WAIT_REWRITE) */
int aof_fsync; /* Kind of fsync() policy */
char *aof_filename; /* Name of the AOF file */
int aof_no_fsync_on_rewrite; /* Don't fsync if a rewrite is in prog. */
int aof_rewrite_perc; /* Rewrite AOF if % growth is > M and... */
off_t aof_rewrite_min_size; /* the AOF file is at least N bytes. */
off_t aof_rewrite_base_size; /* AOF size on latest startup or rewrite. */
off_t aof_current_size; /* AOF current size. */
int aof_rewrite_scheduled; /* Rewrite once BGSAVE terminates. */
pid_t aof_child_pid; /* PID if rewriting process */
list *aof_rewrite_buf_blocks; /* Hold changes during an AOF rewrite. */
sds aof_buf; /* AOF buffer, written before entering the event loop */
int aof_fd; /* File descriptor of currently selected AOF file */
int aof_selected_db; /* Currently selected DB in AOF */
time_t aof_flush_postponed_start; /* UNIX time of postponed AOF flush */
time_t aof_last_fsync; /* UNIX time of last fsync() */
time_t aof_rewrite_time_last; /* Time used by last AOF rewrite run. */
time_t aof_rewrite_time_start; /* Current AOF rewrite start time. */
int aof_lastbgrewrite_status; /* C_OK or C_ERR */
unsigned long aof_delayed_fsync; /* delayed AOF fsync() counter */
int aof_rewrite_incremental_fsync;/* fsync incrementally while rewriting? */
int aof_last_write_status; /* C_OK or C_ERR */
int aof_last_write_errno; /* Valid if aof_last_write_status is ERR */
int aof_load_truncated; /* Don't stop on unexpected AOF EOF. */
/* AOF pipes used to communicate between parent and child during rewrite. */
int aof_pipe_write_data_to_child;
int aof_pipe_read_data_from_parent;
int aof_pipe_write_ack_to_parent;
int aof_pipe_read_ack_from_child;
int aof_pipe_write_ack_to_child;
int aof_pipe_read_ack_from_parent;
int aof_stop_sending_diff; /* If true stop sending accumulated diffs
to child process. */
sds aof_child_diff; /* AOF diff accumulator child side. */
/* RDB persistence */
long long dirty; /* Changes to DB from the last save */
long long dirty_before_bgsave; /* Used to restore dirty on failed BGSAVE */
pid_t rdb_child_pid; /* PID of RDB saving child */
struct saveparam *saveparams; /* Save points array for RDB */
int saveparamslen; /* Number of saving points */
char *rdb_filename; /* Name of RDB file */
int rdb_compression; /* Use compression in RDB? */
int rdb_checksum; /* Use RDB checksum? */
time_t lastsave; /* Unix time of last successful save */
time_t lastbgsave_try; /* Unix time of last attempted bgsave */
time_t rdb_save_time_last; /* Time used by last RDB save run. */
time_t rdb_save_time_start; /* Current RDB save start time. */
int rdb_child_type; /* Type of save by active child. */
int lastbgsave_status; /* C_OK or C_ERR */
int stop_writes_on_bgsave_err; /* Don't allow writes if can't BGSAVE */
int rdb_pipe_write_result_to_parent; /* RDB pipes used to return the state */
int rdb_pipe_read_result_from_child; /* of each slave in diskless SYNC. */
/* Propagation of commands in AOF / replication */
redisOpArray also_propagate; /* Additional command to propagate. */
/* Logging */
char *logfile; /* Path of log file */
int syslog_enabled; /* Is syslog enabled? */
char *syslog_ident; /* Syslog ident */
int syslog_facility; /* Syslog facility */
/* Replication (master) */
int slaveseldb; /* Last SELECTed DB in replication output */
long long master_repl_offset; /* Global replication offset */
int repl_ping_slave_period; /* Master pings the slave every N seconds */
char *repl_backlog; /* Replication backlog for partial syncs */
long long repl_backlog_size; /* Backlog circular buffer size */
long long repl_backlog_histlen; /* Backlog actual data length */
long long repl_backlog_idx; /* Backlog circular buffer current offset */
long long repl_backlog_off; /* Replication offset of first byte in the
backlog buffer. */
time_t repl_backlog_time_limit; /* Time without slaves after the backlog
gets released. */
time_t repl_no_slaves_since; /* We have no slaves since that time.
Only valid if server.slaves len is 0. */
int repl_min_slaves_to_write; /* Min number of slaves to write. */
int repl_min_slaves_max_lag; /* Max lag of <count> slaves to write. */
int repl_good_slaves_count; /* Number of slaves with lag <= max_lag. */
int repl_diskless_sync; /* Send RDB to slaves sockets directly. */
int repl_diskless_sync_delay; /* Delay to start a diskless repl BGSAVE. */
/* Replication (slave) */
char *masterauth; /* AUTH with this password with master */
char *masterhost; /* Hostname of master */
int masterport; /* Port of master */
int repl_timeout; /* Timeout after N seconds of master idle */
client *master; /* Client that is master for this slave */
client *cached_master; /* Cached master to be reused for PSYNC. */
int repl_syncio_timeout; /* Timeout for synchronous I/O calls */
int repl_state; /* Replication status if the instance is a slave */
off_t repl_transfer_size; /* Size of RDB to read from master during sync. */
off_t repl_transfer_read; /* Amount of RDB read from master during sync. */
off_t repl_transfer_last_fsync_off; /* Offset when we fsync-ed last time. */
int repl_transfer_s; /* Slave -> Master SYNC socket */
int repl_transfer_fd; /* Slave -> Master SYNC temp file descriptor */
char *repl_transfer_tmpfile; /* Slave-> master SYNC temp file name */
time_t repl_transfer_lastio; /* Unix time of the latest read, for timeout */
int repl_serve_stale_data; /* Serve stale data when link is down? */
int repl_slave_ro; /* Slave is read only? */
time_t repl_down_since; /* Unix time at which link with master went down */
int repl_disable_tcp_nodelay; /* Disable TCP_NODELAY after SYNC? */
int slave_priority; /* Reported in INFO and used by Sentinel. */
char repl_master_runid[CONFIG_RUN_ID_SIZE+]; /* Master run id for PSYNC. */
long long repl_master_initial_offset; /* Master PSYNC offset. */
int repl_slave_lazy_flush; /* Lazy FLUSHALL before loading DB? */
/* Replication script cache. */
dict *repl_scriptcache_dict; /* SHA1 all slaves are aware of. */
list *repl_scriptcache_fifo; /* First in, first out LRU eviction. */
unsigned int repl_scriptcache_size; /* Max number of elements. */
/* Synchronous replication. */
list *clients_waiting_acks; /* Clients waiting in WAIT command. */
int get_ack_from_slaves; /* If true we send REPLCONF GETACK. */
/* Limits */
unsigned int maxclients; /* Max number of simultaneous clients */
unsigned long long maxmemory; /* Max number of memory bytes to use */
int maxmemory_policy; /* Policy for key eviction */
int maxmemory_samples; /* Pricision of random sampling */
/* Blocked clients */
unsigned int bpop_blocked_clients; /* Number of clients blocked by lists */
list *unblocked_clients; /* list of clients to unblock before next loop */
list *ready_keys; /* List of readyList structures for BLPOP & co */
/* Sort parameters - qsort_r() is only available under BSD so we
* have to take this state global, in order to pass it to sortCompare() */
int sort_desc;
int sort_alpha;
int sort_bypattern;
int sort_store;
/* Zip structure config, see redis.conf for more information */
size_t hash_max_ziplist_entries;
size_t hash_max_ziplist_value;
size_t set_max_intset_entries;
size_t zset_max_ziplist_entries;
size_t zset_max_ziplist_value;
size_t hll_sparse_max_bytes;
/* List parameters */
int list_max_ziplist_size;
int list_compress_depth;
/* time cache */
time_t unixtime; /* Unix time sampled every cron cycle. */
long long mstime; /* Like 'unixtime' but with milliseconds resolution. */
/* Pubsub */
dict *pubsub_channels; /* Map channels to list of subscribed clients */
list *pubsub_patterns; /* A list of pubsub_patterns */
int notify_keyspace_events; /* Events to propagate via Pub/Sub. This is an
xor of NOTIFY_... flags. */
/* Cluster */
int cluster_enabled; /* Is cluster enabled? */
mstime_t cluster_node_timeout; /* Cluster node timeout. */
char *cluster_configfile; /* Cluster auto-generated config file name. */
struct clusterState *cluster; /* State of the cluster */
int cluster_migration_barrier; /* Cluster replicas migration barrier. */
int cluster_slave_validity_factor; /* Slave max data age for failover. */
int cluster_require_full_coverage; /* If true, put the cluster down if
there is at least an uncovered slot.*/
char *cluster_announce_ip; /* IP address to announce on cluster bus. */
int cluster_announce_port; /* base port to announce on cluster bus. */
int cluster_announce_bus_port; /* bus port to announce on cluster bus. */
/* Scripting */
lua_State *lua; /* The Lua interpreter. We use just one for all clients */
client *lua_client; /* The "fake client" to query Redis from Lua */
client *lua_caller; /* The client running EVAL right now, or NULL */
dict *lua_scripts; /* A dictionary of SHA1 -> Lua scripts */
mstime_t lua_time_limit; /* Script timeout in milliseconds */
mstime_t lua_time_start; /* Start time of script, milliseconds time */
int lua_write_dirty; /* True if a write command was called during the
execution of the current script. */
int lua_random_dirty; /* True if a random command was called during the
execution of the current script. */
int lua_replicate_commands; /* True if we are doing single commands repl. */
int lua_multi_emitted;/* True if we already proagated MULTI. */
int lua_repl; /* Script replication flags for redis.set_repl(). */
int lua_timedout; /* True if we reached the time limit for script
execution. */
int lua_kill; /* Kill the script if true. */
int lua_always_replicate_commands; /* Default replication type. */
/* Lazy free */
int lazyfree_lazy_eviction;
int lazyfree_lazy_expire;
int lazyfree_lazy_server_del;
/* Latency monitor */
long long latency_monitor_threshold;
dict *latency_events;
/* Assert & bug reporting */
char *assert_failed;
char *assert_file;
int assert_line;
int bug_report_start; /* True if bug report header was already logged. */
int watchdog_period; /* Software watchdog period in ms. 0 = off */
/* System hardware info */
size_t system_memory_size; /* Total memory in system as reported by OS */
};
// server.c, 实例化 server
struct redisServer server; /* server global state */

2. redisObject

  java有万事万物皆对象的说法,而redis中也可以用 一切皆 redisObject 来描述,可以说是最通用的redis数据结构。

typedef struct redisObject {
// 类型, 4个字节
unsigned type:;
// 编码, 4个字节
unsigned encoding:;
// lru 时间, 24字节
unsigned lru:LRU_BITS; /* lru time (relative to server.lruclock) */
// 引用计数, 当引用为0时,意味着无用了
int refcount;
// 数据指针,存储任意数据
void *ptr;
} robj;

3. redisDb

  redisDb 是redis作为数据库的主要存储模型,承载了所有的业务数据存储。单从这点来说,要实现一个数据库貌似很简单,但实际却是很难。

typedef struct redisDb {
// 一个数据库,就是一个kv字典,查找出 k 后,才能确定其数据类型 如 string, hash, list, set, zset
dict *dict; /* The keyspace for this DB */
// 过期数据队列
dict *expires; /* Timeout of keys with a timeout set */
dict *blocking_keys; /* Keys with clients waiting for data (BLPOP) */
dict *ready_keys; /* Blocked keys that received a PUSH */
dict *watched_keys; /* WATCHED keys for MULTI/EXEC CAS */
struct evictionPoolEntry *eviction_pool; /* Eviction pool of keys */
// 数据库号, 默认是 16, 如果想支持更多数据库号,改外部db数组大小,增大这个值就可以了
int id; /* Database ID */
long long avg_ttl; /* Average TTL, just for stats */
} redisDb;

4. client

  每一个客户端连接,就是一个 client 实例,其中包含许多全局引用信息。比如解析完客户端请求之后,会把参数,数据库指针都放到 client 中。

typedef struct client {
uint64_t id; /* Client incremental unique ID. */
// socket fd
int fd; /* Client socket. */
// 用户目前使用的db,所有的操作都是针对这个db的操作
redisDb *db; /* Pointer to currently SELECTed DB. */
int dictid; /* ID of the currently SELECTed DB. */
robj *name; /* As set by CLIENT SETNAME. */
// 用户请求相关参数放置
sds querybuf; /* Buffer we use to accumulate client queries. */
size_t querybuf_peak; /* Recent (100ms or more) peak of querybuf size. */
int argc; /* Num of arguments of current command. */
robj **argv; /* Arguments of current command. */
// 当前命令和上一个命令指针
struct redisCommand *cmd, *lastcmd; /* Last command executed. */
int reqtype; /* Request protocol type: PROTO_REQ_* */
int multibulklen; /* Number of multi bulk arguments left to read. */
long bulklen; /* Length of bulk argument in multi bulk request. */
list *reply; /* List of reply objects to send to the client. */
unsigned long long reply_bytes; /* Tot bytes of objects in reply list. */
size_t sentlen; /* Amount of bytes already sent in the current
buffer or object being sent. */
time_t ctime; /* Client creation time. */
time_t lastinteraction; /* Time of the last interaction, used for timeout */
time_t obuf_soft_limit_reached_time;
int flags; /* Client flags: CLIENT_* macros. */
// 是否已授权
int authenticated; /* When requirepass is non-NULL. */
// 复制相关
int replstate; /* Replication state if this is a slave. */
int repl_put_online_on_ack; /* Install slave write handler on ACK. */
int repldbfd; /* Replication DB file descriptor. */
off_t repldboff; /* Replication DB file offset. */
off_t repldbsize; /* Replication DB file size. */
sds replpreamble; /* Replication DB preamble. */
long long reploff; /* Replication offset if this is our master. */
long long repl_ack_off; /* Replication ack offset, if this is a slave. */
long long repl_ack_time;/* Replication ack time, if this is a slave. */
long long psync_initial_offset; /* FULLRESYNC reply offset other slaves
copying this slave output buffer
should use. */
char replrunid[CONFIG_RUN_ID_SIZE+]; /* Master run id if is a master. */
int slave_listening_port; /* As configured with: SLAVECONF listening-port */
int slave_capa; /* Slave capabilities: SLAVE_CAPA_* bitwise OR. */
multiState mstate; /* MULTI/EXEC state */
int btype; /* Type of blocking op if CLIENT_BLOCKED. */
blockingState bpop; /* blocking state */
long long woff; /* Last write global replication offset. */
list *watched_keys; /* Keys WATCHED for MULTI/EXEC CAS */
dict *pubsub_channels; /* channels a client is interested in (SUBSCRIBE) */
list *pubsub_patterns; /* patterns a client is interested in (SUBSCRIBE) */
sds peerid; /* Cached peer ID. */ /* Response buffer */
int bufpos;
char buf[PROTO_REPLY_CHUNK_BYTES];
} client;

一、命令查找 dict


  当一个请求发到redis服务器后,我们将其数据解析出来,自然先要明白命令是哪个,然后才知道如何处理它。我们看一下,redis是如何查找具体的处理命令的?

// server.c, 其实无非就是一个 map 形式的查找而已
struct redisCommand *lookupCommand(sds name) {
// 直接基于 server.commands 查询, server.commands 是在启动的时候初始化好的
return dictFetchValue(server.commands, name);
}
// dict.c , 查找字典, 返回任意类型的地址
void *dictFetchValue(dict *d, const void *key) {
dictEntry *he;
// 找到 dict 后,直接取其 value 即可,否则返回 NULL
he = dictFind(d,key);
return he ? dictGetVal(he) : NULL;
}
// dict.c, 查找字典 entry, 也就是 hashmap 那一套东西了
dictEntry *dictFind(dict *d, const void *key)
{
dictEntry *he;
unsigned int h, idx, table; if (d->ht[].size == ) return NULL; /* We don't have a table at all */
// 如果正在进行 rehash 缩扩容, 则进行一次增量式rehash数据迁移,这是redis独有的玩意
if (dictIsRehashing(d)) _dictRehashStep(d);
h = dictHashKey(d, key);
// 最大查找 ht的2个表,如果进行 rehash 的话,否则只遍历一次
for (table = ; table <= ; table++) {
idx = h & d->ht[table].sizemask;
he = d->ht[table].table[idx];
while(he) {
// 找到对应元素,则返回,否则链表查询
if (dictCompareKeys(d, key, he->key))
return he;
he = he->next;
}
// 如果没有进行rehash, 那么应当是一遍历就可以拿到结果或者拿不到
if (!dictIsRehashing(d)) return NULL;
}
return NULL;
}
// dict.h, 只有 rehashidx=-1 才表示在进行 rehash, rehashidx 表示正在进行的rehash 的元素数
#define dictIsRehashing(d) ((d)->rehashidx != -1)
// dict.c, 内部rehash
/* This function performs just a step of rehashing, and only if there are
* no safe iterators bound to our hash table. When we have iterators in the
* middle of a rehashing we can't mess with the two hash tables otherwise
* some element can be missed or duplicated.
*
* This function is called by common lookup or update operations in the
* dictionary so that the hash table automatically migrates from H1 to H2
* while it is actively used. */
static void _dictRehashStep(dict *d) {
// 只会进行一次rehash 操作,不会用时很久
if (d->iterators == ) dictRehash(d,);
}
// dict.c
/* Performs N steps of incremental rehashing. Returns 1 if there are still
* keys to move from the old to the new hash table, otherwise 0 is returned.
*
* Note that a rehashing step consists in moving a bucket (that may have more
* than one key as we use chaining) from the old to the new hash table, however
* since part of the hash table may be composed of empty spaces, it is not
* guaranteed that this function will rehash even a single bucket, since it
* will visit at max N*10 empty buckets in total, otherwise the amount of
* work it does would be unbound and the function may block for a long time. */
int dictRehash(dict *d, int n) {
// 最大只会访问 n*10 个元素,避免长时间hash导致暂停
int empty_visits = n*; /* Max number of empty buckets to visit. */
if (!dictIsRehashing(d)) return ;
// rehash 并不是一次性完成的,而是遇到 used=0, 则退出了
while(n-- && d->ht[].used != ) {
dictEntry *de, *nextde; /* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[].size > (unsigned long)d->rehashidx);
// 从上次 rehash 的地方开始进行, 如果中间的值都是空的,则本次不再进行深度rehash了
while(d->ht[].table[d->rehashidx] == NULL) {
d->rehashidx++;
if (--empty_visits == ) return ;
}
de = d->ht[].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
// 针对找到的需要 rehash 的元素,做转移
while(de) {
unsigned int h; nextde = de->next;
/* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[].sizemask;
// 解决冲突问题,与原有slot元素链接
de->next = d->ht[].table[h];
d->ht[].table[h] = de;
d->ht[].used--;
d->ht[].used++;
de = nextde;
}
d->ht[].table[d->rehashidx] = NULL;
// rehashidx++, 表示正在进行的rehash
d->rehashidx++;
} /* Check if we already rehashed the whole table... */
if (d->ht[].used == ) {
zfree(d->ht[].table);
// rehash 完所有元素后,直接交换 ht 0/1
d->ht[] = d->ht[];
_dictReset(&d->ht[]);
d->rehashidx = -;
return ;
} /* More to rehash... */
// 返回1代表还需要进行rehash
return ;
}

  综上,可以看出一个 命令的查找过程,其实就是一个hash字典的查找过程。做的比较特别的优化是,进行rehash时,只做部分rehash,将停顿时间最大可能减小。使用 两个hash表进行互相替换,来保证数据的完整性。

  hash表是个很有用的数据结构,上面是对对于命令的查找使用,但是对于后面的 kv 的查找,同样可以使用 dict 这种结构,所以后续对其他命令的解析时,只需注意其特有的处理方式即可。

二、溯源: 命令的添加


  要想实现如上的查找,我们有必要了解下其是在何时添加的。下面,我们来看看,这些命令是如何写入到 server.commands 中的。

// server.c, 在进行配置初始化 initServerConfig() 时,添加命令集到 server.commands
/* Populates the Redis Command Table starting from the hard coded list
* we have on top of redis.c file. */
void populateCommandTable(void) {
int j;
int numcommands = sizeof(redisCommandTable)/sizeof(struct redisCommand); for (j = ; j < numcommands; j++) {
// 通过在文件开头定义的一个数组,将命令集加入
struct redisCommand *c = redisCommandTable+j;
char *f = c->sflags;
int retval1, retval2;
// 转换 flags 到 command 中,用一个位表示一个 flag 标识
while(*f != '\0') {
switch(*f) {
case 'w': c->flags |= CMD_WRITE; break;
case 'r': c->flags |= CMD_READONLY; break;
case 'm': c->flags |= CMD_DENYOOM; break;
case 'a': c->flags |= CMD_ADMIN; break;
case 'p': c->flags |= CMD_PUBSUB; break;
case 's': c->flags |= CMD_NOSCRIPT; break;
case 'R': c->flags |= CMD_RANDOM; break;
case 'S': c->flags |= CMD_SORT_FOR_SCRIPT; break;
case 'l': c->flags |= CMD_LOADING; break;
case 't': c->flags |= CMD_STALE; break;
case 'M': c->flags |= CMD_SKIP_MONITOR; break;
case 'k': c->flags |= CMD_ASKING; break;
case 'F': c->flags |= CMD_FAST; break;
default: serverPanic("Unsupported command flag"); break;
}
f++;
}
// 添加 command 到 server.commands 中,不外乎就是 hash, rehash...
retval1 = dictAdd(server.commands, sdsnew(c->name), c);
/* Populate an additional dictionary that will be unaffected
* by rename-command statements in redis.conf. */
retval2 = dictAdd(server.orig_commands, sdsnew(c->name), c);
serverAssert(retval1 == DICT_OK && retval2 == DICT_OK);
}
}
// dict.c, 字典数据的添加,其实已经超过前面理解的范围
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key); if (!entry) return DICT_ERR;
// 设置value到 entry 上
dictSetVal(d, entry, val);
return DICT_OK;
}
// 不过既然都到这里了,我们索性把dict的添加过程也给了解了吧,省得后面再花时间
// dict.c, 将 key 添加到 dict 中,并返回 dictEntry 添加的实例
/* Low level add. This function adds the entry but instead of setting
* a value returns the dictEntry structure to the user, that will make
* sure to fill the value field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned.
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht;
// 和 dictFetchValue 一样,先检查 rehash 情况
if (dictIsRehashing(d)) _dictRehashStep(d); /* Get the index of the new element, or -1 if
* the element already exists. */
// 如果元素已经存在,则返回 -1,否则走后续添加流程
// 其含义是 元素只允许新增,不允许修改
if ((index = _dictKeyIndex(d, key)) == -)
return NULL; /* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
ht = dictIsRehashing(d) ? &d->ht[] : &d->ht[];
// 以链表形式保存数据
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++; /* Set the hash entry fields. */
// 将新组织的 entry 设置值到key上, 小技巧 do {} while(0); 的应用
dictSetKey(d, entry, key);
return entry;
}
// dict.c, 获取元素所在的数组下标
/* Returns the index of a free slot that can be populated with
* a hash entry for the given 'key'.
* If the key already exists, -1 is returned.
*
* Note that if we are in the process of rehashing the hash table, the
* index is always returned in the context of the second (new) hash table. */
static int _dictKeyIndex(dict *d, const void *key)
{
unsigned int h, idx, table;
dictEntry *he; /* Expand the hash table if needed */
// 做扩容操作
if (_dictExpandIfNeeded(d) == DICT_ERR)
return -;
/* Compute the key hash value */
h = dictHashKey(d, key);
for (table = ; table <= ; table++) {
idx = h & d->ht[table].sizemask;
/* Search if this slot does not already contain the given key */
he = d->ht[table].table[idx];
while(he) {
if (dictCompareKeys(d, key, he->key))
return -;
he = he->next;
}
if (!dictIsRehashing(d)) break;
}
return idx;
} // 扩容过程
// dict.c,
/* Expand the hash table if needed */
static int _dictExpandIfNeeded(dict *d)
{
/* Incremental rehashing already in progress. Return. */
if (dictIsRehashing(d)) return DICT_OK; /* If the hash table is empty expand it to the initial size. */
// 默认 DICT_HT_INITIAL_SIZE=4
if (d->ht[].size == ) return dictExpand(d, DICT_HT_INITIAL_SIZE); /* If we reached the 1:1 ratio, and we are allowed to resize the hash
* table (global setting) or we should avoid it but the ratio between
* elements/buckets is over the "safe" threshold, we resize doubling
* the number of buckets. */
// 扩容,直接 *2 后得到
if (d->ht[].used >= d->ht[].size &&
(dict_can_resize ||
d->ht[].used/d->ht[].size > dict_force_resize_ratio))
{
return dictExpand(d, d->ht[].used*);
}
return DICT_OK;
}
// dict.c, 扩容,其实就是创建一个空的 dictht hash表,备用做一步步rehash
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
dictht n; /* the new hash table */
// 大于 size的首个 2n次方作为 realsize
unsigned long realsize = _dictNextPower(size); /* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[].used > size)
return DICT_ERR; /* Rehashing to the same table size is not useful. */
if (realsize == d->ht[].size) return DICT_ERR; /* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = ; /* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
if (d->ht[].table == NULL) {
d->ht[] = n;
return DICT_OK;
} /* Prepare a second hash table for incremental rehashing */
// 将新开辟的 hash 表赋给 ht[1], 并将 rehashidx=0, 表示需要进行rehash
// 然后,后续任务就依次进入rehash阶段了
d->ht[] = n;
d->rehashidx = ;
return DICT_OK;
}
// dict.h, setVal, setKey, do while 0 防止宏编译报错
#define dictSetKey(d, entry, _key_) do { \
if ((d)->type->keyDup) \
entry->key = (d)->type->keyDup((d)->privdata, _key_); \
else \
entry->key = (_key_); \
} while()
#define dictSetVal(d, entry, _val_) do { \
if ((d)->type->valDup) \
entry->v.val = (d)->type->valDup((d)->privdata, _val_); \
else \
entry->v.val = (_val_); \
} while()

  以上,自然就是一个 hash 表插入数据过程,然后 server.commands 就有数据了,请求进来自然就可以处理了。

三、命令集的定义


  在server.c的头部,就有一个数组,专门用于定义各个命令的处理方法,并最终被初始化到 server.commands 中。

/* Our command table.
*
* Every entry is composed of the following fields:
*
* name: a string representing the command name.
* function: pointer to the C function implementing the command.
* arity: number of arguments, it is possible to use -N to say >= N
* sflags: command flags as string. See below for a table of flags.
* flags: flags as bitmask. Computed by Redis using the 'sflags' field.
* get_keys_proc: an optional function to get key arguments from a command.
* This is only used when the following three fields are not
* enough to specify what arguments are keys.
* first_key_index: first argument that is a key
* last_key_index: last argument that is a key
* key_step: step to get all the keys from first to last argument. For instance
* in MSET the step is two since arguments are key,val,key,val,...
* microseconds: microseconds of total execution time for this command.
* calls: total number of calls of this command.
*
* The flags, microseconds and calls fields are computed by Redis and should
* always be set to zero.
*
* Command flags are expressed using strings where every character represents
* a flag. Later the populateCommandTable() function will take care of
* populating the real 'flags' field using this characters.
*
* This is the meaning of the flags:
*
* w: write command (may modify the key space).
* r: read command (will never modify the key space).
* m: may increase memory usage once called. Don't allow if out of memory.
* a: admin command, like SAVE or SHUTDOWN.
* p: Pub/Sub related command.
* f: force replication of this command, regardless of server.dirty.
* s: command not allowed in scripts.
* R: random command. Command is not deterministic, that is, the same command
* with the same arguments, with the same key space, may have different
* results. For instance SPOP and RANDOMKEY are two random commands.
* S: Sort command output array if called from script, so that the output
* is deterministic.
* l: Allow command while loading the database.
* t: Allow command while a slave has stale data but is not allowed to
* server this data. Normally no command is accepted in this condition
* but just a few.
* M: Do not automatically propagate the command on MONITOR.
* k: Perform an implicit ASKING for this command, so the command will be
* accepted in cluster mode if the slot is marked as 'importing'.
* F: Fast command: O(1) or O(log(N)) command that should never delay
* its execution as long as the kernel scheduler is giving us time.
* Note that commands that may trigger a DEL as a side effect (like SET)
* are not fast commands.
*/
struct redisCommand redisCommandTable[] = {
{"get",getCommand,,"rF",,NULL,,,,,},
{"set",setCommand,-,"wm",,NULL,,,,,},
{"setnx",setnxCommand,,"wmF",,NULL,,,,,},
{"setex",setexCommand,,"wm",,NULL,,,,,},
{"psetex",psetexCommand,,"wm",,NULL,,,,,},
{"append",appendCommand,,"wm",,NULL,,,,,},
{"strlen",strlenCommand,,"rF",,NULL,,,,,},
{"del",delCommand,-,"w",,NULL,,-,,,},
{"unlink",unlinkCommand,-,"wF",,NULL,,-,,,},
{"exists",existsCommand,-,"rF",,NULL,,-,,,},
{"setbit",setbitCommand,,"wm",,NULL,,,,,},
{"getbit",getbitCommand,,"rF",,NULL,,,,,},
{"setrange",setrangeCommand,,"wm",,NULL,,,,,},
{"getrange",getrangeCommand,,"r",,NULL,,,,,},
{"substr",getrangeCommand,,"r",,NULL,,,,,},
{"incr",incrCommand,,"wmF",,NULL,,,,,},
{"decr",decrCommand,,"wmF",,NULL,,,,,},
{"mget",mgetCommand,-,"r",,NULL,,-,,,},
{"rpush",rpushCommand,-,"wmF",,NULL,,,,,},
{"lpush",lpushCommand,-,"wmF",,NULL,,,,,},
{"rpushx",rpushxCommand,,"wmF",,NULL,,,,,},
{"lpushx",lpushxCommand,,"wmF",,NULL,,,,,},
{"linsert",linsertCommand,,"wm",,NULL,,,,,},
{"rpop",rpopCommand,,"wF",,NULL,,,,,},
{"lpop",lpopCommand,,"wF",,NULL,,,,,},
{"brpop",brpopCommand,-,"ws",,NULL,,,,,},
{"brpoplpush",brpoplpushCommand,,"wms",,NULL,,,,,},
{"blpop",blpopCommand,-,"ws",,NULL,,-,,,},
{"llen",llenCommand,,"rF",,NULL,,,,,},
{"lindex",lindexCommand,,"r",,NULL,,,,,},
{"lset",lsetCommand,,"wm",,NULL,,,,,},
{"lrange",lrangeCommand,,"r",,NULL,,,,,},
{"ltrim",ltrimCommand,,"w",,NULL,,,,,},
{"lrem",lremCommand,,"w",,NULL,,,,,},
{"rpoplpush",rpoplpushCommand,,"wm",,NULL,,,,,},
{"sadd",saddCommand,-,"wmF",,NULL,,,,,},
{"srem",sremCommand,-,"wF",,NULL,,,,,},
{"smove",smoveCommand,,"wF",,NULL,,,,,},
{"sismember",sismemberCommand,,"rF",,NULL,,,,,},
{"scard",scardCommand,,"rF",,NULL,,,,,},
{"spop",spopCommand,-,"wRsF",,NULL,,,,,},
{"srandmember",srandmemberCommand,-,"rR",,NULL,,,,,},
{"sinter",sinterCommand,-,"rS",,NULL,,-,,,},
{"sinterstore",sinterstoreCommand,-,"wm",,NULL,,-,,,},
{"sunion",sunionCommand,-,"rS",,NULL,,-,,,},
{"sunionstore",sunionstoreCommand,-,"wm",,NULL,,-,,,},
{"sdiff",sdiffCommand,-,"rS",,NULL,,-,,,},
{"sdiffstore",sdiffstoreCommand,-,"wm",,NULL,,-,,,},
{"smembers",sinterCommand,,"rS",,NULL,,,,,},
{"sscan",sscanCommand,-,"rR",,NULL,,,,,},
{"zadd",zaddCommand,-,"wmF",,NULL,,,,,},
{"zincrby",zincrbyCommand,,"wmF",,NULL,,,,,},
{"zrem",zremCommand,-,"wF",,NULL,,,,,},
{"zremrangebyscore",zremrangebyscoreCommand,,"w",,NULL,,,,,},
{"zremrangebyrank",zremrangebyrankCommand,,"w",,NULL,,,,,},
{"zremrangebylex",zremrangebylexCommand,,"w",,NULL,,,,,},
{"zunionstore",zunionstoreCommand,-,"wm",,zunionInterGetKeys,,,,,},
{"zinterstore",zinterstoreCommand,-,"wm",,zunionInterGetKeys,,,,,},
{"zrange",zrangeCommand,-,"r",,NULL,,,,,},
{"zrangebyscore",zrangebyscoreCommand,-,"r",,NULL,,,,,},
{"zrevrangebyscore",zrevrangebyscoreCommand,-,"r",,NULL,,,,,},
{"zrangebylex",zrangebylexCommand,-,"r",,NULL,,,,,},
{"zrevrangebylex",zrevrangebylexCommand,-,"r",,NULL,,,,,},
{"zcount",zcountCommand,,"rF",,NULL,,,,,},
{"zlexcount",zlexcountCommand,,"rF",,NULL,,,,,},
{"zrevrange",zrevrangeCommand,-,"r",,NULL,,,,,},
{"zcard",zcardCommand,,"rF",,NULL,,,,,},
{"zscore",zscoreCommand,,"rF",,NULL,,,,,},
{"zrank",zrankCommand,,"rF",,NULL,,,,,},
{"zrevrank",zrevrankCommand,,"rF",,NULL,,,,,},
{"zscan",zscanCommand,-,"rR",,NULL,,,,,},
{"hset",hsetCommand,,"wmF",,NULL,,,,,},
{"hsetnx",hsetnxCommand,,"wmF",,NULL,,,,,},
{"hget",hgetCommand,,"rF",,NULL,,,,,},
{"hmset",hmsetCommand,-,"wm",,NULL,,,,,},
{"hmget",hmgetCommand,-,"r",,NULL,,,,,},
{"hincrby",hincrbyCommand,,"wmF",,NULL,,,,,},
{"hincrbyfloat",hincrbyfloatCommand,,"wmF",,NULL,,,,,},
{"hdel",hdelCommand,-,"wF",,NULL,,,,,},
{"hlen",hlenCommand,,"rF",,NULL,,,,,},
{"hstrlen",hstrlenCommand,,"rF",,NULL,,,,,},
{"hkeys",hkeysCommand,,"rS",,NULL,,,,,},
{"hvals",hvalsCommand,,"rS",,NULL,,,,,},
{"hgetall",hgetallCommand,,"r",,NULL,,,,,},
{"hexists",hexistsCommand,,"rF",,NULL,,,,,},
{"hscan",hscanCommand,-,"rR",,NULL,,,,,},
{"incrby",incrbyCommand,,"wmF",,NULL,,,,,},
{"decrby",decrbyCommand,,"wmF",,NULL,,,,,},
{"incrbyfloat",incrbyfloatCommand,,"wmF",,NULL,,,,,},
{"getset",getsetCommand,,"wm",,NULL,,,,,},
{"mset",msetCommand,-,"wm",,NULL,,-,,,},
{"msetnx",msetnxCommand,-,"wm",,NULL,,-,,,},
{"randomkey",randomkeyCommand,,"rR",,NULL,,,,,},
{"select",selectCommand,,"rlF",,NULL,,,,,},
{"move",moveCommand,,"wF",,NULL,,,,,},
{"rename",renameCommand,,"w",,NULL,,,,,},
{"renamenx",renamenxCommand,,"wF",,NULL,,,,,},
{"expire",expireCommand,,"wF",,NULL,,,,,},
{"expireat",expireatCommand,,"wF",,NULL,,,,,},
{"pexpire",pexpireCommand,,"wF",,NULL,,,,,},
{"pexpireat",pexpireatCommand,,"wF",,NULL,,,,,},
{"keys",keysCommand,,"rS",,NULL,,,,,},
{"scan",scanCommand,-,"rR",,NULL,,,,,},
{"dbsize",dbsizeCommand,,"rF",,NULL,,,,,},
{"auth",authCommand,,"rsltF",,NULL,,,,,},
{"ping",pingCommand,-,"rtF",,NULL,,,,,},
{"echo",echoCommand,,"rF",,NULL,,,,,},
{"save",saveCommand,,"ars",,NULL,,,,,},
{"bgsave",bgsaveCommand,,"ar",,NULL,,,,,},
{"bgrewriteaof",bgrewriteaofCommand,,"ar",,NULL,,,,,},
{"shutdown",shutdownCommand,-,"arlt",,NULL,,,,,},
{"lastsave",lastsaveCommand,,"rRF",,NULL,,,,,},
{"type",typeCommand,,"rF",,NULL,,,,,},
{"multi",multiCommand,,"rsF",,NULL,,,,,},
{"exec",execCommand,,"sM",,NULL,,,,,},
{"discard",discardCommand,,"rsF",,NULL,,,,,},
{"sync",syncCommand,,"ars",,NULL,,,,,},
{"psync",syncCommand,,"ars",,NULL,,,,,},
{"replconf",replconfCommand,-,"arslt",,NULL,,,,,},
{"flushdb",flushdbCommand,-,"w",,NULL,,,,,},
{"flushall",flushallCommand,-,"w",,NULL,,,,,},
{"sort",sortCommand,-,"wm",,sortGetKeys,,,,,},
{"info",infoCommand,-,"rlt",,NULL,,,,,},
{"monitor",monitorCommand,,"ars",,NULL,,,,,},
{"ttl",ttlCommand,,"rF",,NULL,,,,,},
{"pttl",pttlCommand,,"rF",,NULL,,,,,},
{"persist",persistCommand,,"wF",,NULL,,,,,},
{"slaveof",slaveofCommand,,"ast",,NULL,,,,,},
{"role",roleCommand,,"lst",,NULL,,,,,},
{"debug",debugCommand,-,"as",,NULL,,,,,},
{"config",configCommand,-,"art",,NULL,,,,,},
{"subscribe",subscribeCommand,-,"rpslt",,NULL,,,,,},
{"unsubscribe",unsubscribeCommand,-,"rpslt",,NULL,,,,,},
{"psubscribe",psubscribeCommand,-,"rpslt",,NULL,,,,,},
{"punsubscribe",punsubscribeCommand,-,"rpslt",,NULL,,,,,},
{"publish",publishCommand,,"pltrF",,NULL,,,,,},
{"pubsub",pubsubCommand,-,"pltrR",,NULL,,,,,},
{"watch",watchCommand,-,"rsF",,NULL,,-,,,},
{"unwatch",unwatchCommand,,"rsF",,NULL,,,,,},
{"cluster",clusterCommand,-,"ar",,NULL,,,,,},
{"restore",restoreCommand,-,"wm",,NULL,,,,,},
{"restore-asking",restoreCommand,-,"wmk",,NULL,,,,,},
{"migrate",migrateCommand,-,"w",,migrateGetKeys,,,,,},
{"asking",askingCommand,,"r",,NULL,,,,,},
{"readonly",readonlyCommand,,"rF",,NULL,,,,,},
{"readwrite",readwriteCommand,,"rF",,NULL,,,,,},
{"dump",dumpCommand,,"r",,NULL,,,,,},
{"object",objectCommand,,"r",,NULL,,,,,},
{"client",clientCommand,-,"rs",,NULL,,,,,},
{"eval",evalCommand,-,"s",,evalGetKeys,,,,,},
{"evalsha",evalShaCommand,-,"s",,evalGetKeys,,,,,},
{"slowlog",slowlogCommand,-,"r",,NULL,,,,,},
{"script",scriptCommand,-,"rs",,NULL,,,,,},
{"time",timeCommand,,"rRF",,NULL,,,,,},
{"bitop",bitopCommand,-,"wm",,NULL,,-,,,},
{"bitcount",bitcountCommand,-,"r",,NULL,,,,,},
{"bitpos",bitposCommand,-,"r",,NULL,,,,,},
{"wait",waitCommand,,"rs",,NULL,,,,,},
{"command",commandCommand,,"rlt",,NULL,,,,,},
{"geoadd",geoaddCommand,-,"wm",,NULL,,,,,},
{"georadius",georadiusCommand,-,"r",,NULL,,,,,},
{"georadiusbymember",georadiusByMemberCommand,-,"r",,NULL,,,,,},
{"geohash",geohashCommand,-,"r",,NULL,,,,,},
{"geopos",geoposCommand,-,"r",,NULL,,,,,},
{"geodist",geodistCommand,-,"r",,NULL,,,,,},
{"pfselftest",pfselftestCommand,,"r",,NULL,,,,,},
{"pfadd",pfaddCommand,-,"wmF",,NULL,,,,,},
{"pfcount",pfcountCommand,-,"r",,NULL,,-,,,},
{"pfmerge",pfmergeCommand,-,"wm",,NULL,,-,,,},
{"pfdebug",pfdebugCommand,-,"w",,NULL,,,,,},
{"latency",latencyCommand,-,"arslt",,NULL,,,,,}
};

  可以说,核心功能都是在这里定义的哟(小手册),如果想自己添加功能,也是从这里开始,然后去实现它。其中,lua的调用则直接使用 eval进行即可。

四、执行命令的模板方法


  调用真正命令之前,之后,会各种判断,才可以进行命令的调用。这也是数据库的与简单函数调用的差别之一。

// server.c, call 框架
/* Call() is the core of Redis execution of a command.
*
* The following flags can be passed:
* CMD_CALL_NONE No flags.
* CMD_CALL_SLOWLOG Check command speed and log in the slow log if needed.
* CMD_CALL_STATS Populate command stats.
* CMD_CALL_PROPAGATE_AOF Append command to AOF if it modified the dataset
* or if the client flags are forcing propagation.
* CMD_CALL_PROPAGATE_REPL Send command to salves if it modified the dataset
* or if the client flags are forcing propagation.
* CMD_CALL_PROPAGATE Alias for PROPAGATE_AOF|PROPAGATE_REPL.
* CMD_CALL_FULL Alias for SLOWLOG|STATS|PROPAGATE.
*
* The exact propagation behavior depends on the client flags.
* Specifically:
*
* 1. If the client flags CLIENT_FORCE_AOF or CLIENT_FORCE_REPL are set
* and assuming the corresponding CMD_CALL_PROPAGATE_AOF/REPL is set
* in the call flags, then the command is propagated even if the
* dataset was not affected by the command.
* 2. If the client flags CLIENT_PREVENT_REPL_PROP or CLIENT_PREVENT_AOF_PROP
* are set, the propagation into AOF or to slaves is not performed even
* if the command modified the dataset.
*
* Note that regardless of the client flags, if CMD_CALL_PROPAGATE_AOF
* or CMD_CALL_PROPAGATE_REPL are not set, then respectively AOF or
* slaves propagation will never occur.
*
* Client flags are modified by the implementation of a given command
* using the following API:
*
* forceCommandPropagation(client *c, int flags);
* preventCommandPropagation(client *c);
* preventCommandAOF(client *c);
* preventCommandReplication(client *c);
*
*/
void call(client *c, int flags) {
long long dirty, start, duration;
int client_old_flags = c->flags; /* Sent the command to clients in MONITOR mode, only if the commands are
* not generated from reading an AOF. */
// monitors 模式下,先把命令传播给需要的客户端
if (listLength(server.monitors) &&
!server.loading &&
!(c->cmd->flags & (CMD_SKIP_MONITOR|CMD_ADMIN)))
{
replicationFeedMonitors(c,server.monitors,c->db->id,c->argv,c->argc);
} /* Initialization: clear the flags that must be set by the command on
* demand, and initialize the array for additional commands propagation. */
c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
// 重置 server.also_propagate
redisOpArrayInit(&server.also_propagate); /* Call the command. */
// 调用具体命令操作,该命令会自已负责客户端的响应,外部仅记录时间
dirty = server.dirty;
start = ustime();
c->cmd->proc(c);
duration = ustime()-start;
dirty = server.dirty-dirty;
if (dirty < ) dirty = ; /* When EVAL is called loading the AOF we don't want commands called
* from Lua to go into the slowlog or to populate statistics. */
if (server.loading && c->flags & CLIENT_LUA)
flags &= ~(CMD_CALL_SLOWLOG | CMD_CALL_STATS); /* If the caller is Lua, we want to force the EVAL caller to propagate
* the script if the command flag or client flag are forcing the
* propagation. */
if (c->flags & CLIENT_LUA && server.lua_caller) {
if (c->flags & CLIENT_FORCE_REPL)
server.lua_caller->flags |= CLIENT_FORCE_REPL;
if (c->flags & CLIENT_FORCE_AOF)
server.lua_caller->flags |= CLIENT_FORCE_AOF;
} /* Log the command into the Slow log if needed, and populate the
* per-command statistics that we show in INFO commandstats. */
// 慢查询日志记录
if (flags & CMD_CALL_SLOWLOG && c->cmd->proc != execCommand) {
char *latency_event = (c->cmd->flags & CMD_FAST) ?
"fast-command" : "command";
latencyAddSampleIfNeeded(latency_event,duration/);
slowlogPushEntryIfNeeded(c->argv,c->argc,duration);
}
if (flags & CMD_CALL_STATS) {
c->cmd->microseconds += duration;
c->cmd->calls++;
} /* Propagate the command into the AOF and replication link */
if (flags & CMD_CALL_PROPAGATE &&
(c->flags & CLIENT_PREVENT_PROP) != CLIENT_PREVENT_PROP)
{
int propagate_flags = PROPAGATE_NONE; /* Check if the command operated changes in the data set. If so
* set for replication / AOF propagation. */
if (dirty) propagate_flags |= (PROPAGATE_AOF|PROPAGATE_REPL); /* If the client forced AOF / replication of the command, set
* the flags regardless of the command effects on the data set. */
if (c->flags & CLIENT_FORCE_REPL) propagate_flags |= PROPAGATE_REPL;
if (c->flags & CLIENT_FORCE_AOF) propagate_flags |= PROPAGATE_AOF; /* However prevent AOF / replication propagation if the command
* implementatino called preventCommandPropagation() or similar,
* or if we don't have the call() flags to do so. */
if (c->flags & CLIENT_PREVENT_REPL_PROP ||
!(flags & CMD_CALL_PROPAGATE_REPL))
propagate_flags &= ~PROPAGATE_REPL;
if (c->flags & CLIENT_PREVENT_AOF_PROP ||
!(flags & CMD_CALL_PROPAGATE_AOF))
propagate_flags &= ~PROPAGATE_AOF; /* Call propagate() only if at least one of AOF / replication
* propagation is needed. */
// 只要不是 PROPAGATE_NONE, 都会进行命令传播
if (propagate_flags != PROPAGATE_NONE)
propagate(c->cmd,c->db->id,c->argv,c->argc,propagate_flags);
} /* Restore the old replication flags, since call() can be executed
* recursively. */
c->flags &= ~(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP);
c->flags |= client_old_flags &
(CLIENT_FORCE_AOF|CLIENT_FORCE_REPL|CLIENT_PREVENT_PROP); /* Handle the alsoPropagate() API to handle commands that want to propagate
* multiple separated commands. Note that alsoPropagate() is not affected
* by CLIENT_PREVENT_PROP flag. */
if (server.also_propagate.numops) {
int j;
redisOp *rop;
// 命令传播到 server.also_propagate 中
if (flags & CMD_CALL_PROPAGATE) {
for (j = ; j < server.also_propagate.numops; j++) {
rop = &server.also_propagate.ops[j];
int target = rop->target;
/* Whatever the command wish is, we honor the call() flags. */
if (!(flags&CMD_CALL_PROPAGATE_AOF)) target &= ~PROPAGATE_AOF;
if (!(flags&CMD_CALL_PROPAGATE_REPL)) target &= ~PROPAGATE_REPL;
if (target)
propagate(rop->cmd,rop->dbid,rop->argv,rop->argc,target);
}
}
redisOpArrayFree(&server.also_propagate);
}
server.stat_numcommands++;
}

四、响应客户端

  响应客户端可以使用addReply(), 当然还有其他简化版本, 道理一致。

// networking.c, 向 client c 中响应数据 obj
/* -----------------------------------------------------------------------------
* Higher level functions to queue data on the client output buffer.
* The following functions are the ones that commands implementations will call.
* -------------------------------------------------------------------------- */ void addReply(client *c, robj *obj) {
// 客户端连接不可写时,直接返回本次写操作
if (prepareClientToWrite(c) != C_OK) return; /* This is an important place where we can avoid copy-on-write
* when there is a saving child running, avoiding touching the
* refcount field of the object if it's not needed.
*
* If the encoding is RAW and there is room in the static buffer
* we'll be able to send the object to the client without
* messing with its page. */
// 检测编码是否是 OBJ_ENCODING_RAW/OBJ_ENCODING_EMBSTR
if (sdsEncodedObject(obj)) {
// 将数据添加到 c->buf 中
// 添加失败则将 obj 直接添加到 c->reply 队列中
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != C_OK)
_addReplyObjectToList(c,obj);
} else if (obj->encoding == OBJ_ENCODING_INT) {
/* Optimization: if there is room in the static buffer for 32 bytes
* (more than the max chars a 64 bit integer can take as string) we
* avoid decoding the object and go for the lower level approach. */
if (listLength(c->reply) == && (sizeof(c->buf) - c->bufpos) >= ) {
char buf[];
int len; len = ll2string(buf,sizeof(buf),(long)obj->ptr);
if (_addReplyToBuffer(c,buf,len) == C_OK)
return;
/* else... continue with the normal code path, but should never
* happen actually since we verified there is room. */
}
obj = getDecodedObject(obj);
if (_addReplyToBuffer(c,obj->ptr,sdslen(obj->ptr)) != C_OK)
_addReplyObjectToList(c,obj);
decrRefCount(obj);
} else {
serverPanic("Wrong obj->encoding in addReply()");
}
}
// networking.c,
int _addReplyToBuffer(client *c, const char *s, size_t len) {
size_t available = sizeof(c->buf)-c->bufpos; if (c->flags & CLIENT_CLOSE_AFTER_REPLY) return C_OK; /* If there already are entries in the reply list, we cannot
* add anything more to the static buffer. */
if (listLength(c->reply) > ) return C_ERR; /* Check that the buffer has enough space available for this string. */
if (len > available) return C_ERR; memcpy(c->buf+c->bufpos,s,len);
c->bufpos+=len;
return C_OK;
}

  将数据写入到 c->buf 后,又是谁向客户端写出了结果呢?

  要么是有一个后台线程一直写,要么是在下一次循环的时候再主动写,你觉得呢?( 请参考: server.clients_pending_write )

Redis(二):redis命令构建及关键属性解析的更多相关文章

  1. redis(二)redis+TCMALLOC高性能的缓存服务器的安装配置

    安装  1准备编译环境    yum -y install gcc gcc+ gcc-c++ openssl openssl-devel pcre pcre-devel  2 下载源码包(由于goog ...

  2. Redis(二):Redis的九大应用场景

    毫无疑问,Redis开创了一种新的数据存储思路,使用Redis,我们不用在面对功能单调的数据库时,把精力放在如何把大象放进冰箱这样的问题上,而是利用Redis灵活多变的数据结构和数据操作,为不同的大象 ...

  3. redis(二)-----redis基本数据类型之字符串

    Redis的全称是REmote Dictionary Server,它主要提供了5种数据结构:字符串.哈希.列表.集合.有序集合,同时在字符串的基础之上演变 出了位图(Bitmaps)和HyperLo ...

  4. angular-cli.json配置参数解释,以及依稀常用命令的通用关键参数解释

    一. angular-cli.json常见配置 { "project": { "name": "ng-admin", //项目名称 &quo ...

  5. Redis系列(二):Redis的数据类型及命令操作

    原文链接(转载请注明出处):Redis系列(二):Redis的数据类型及命令操作 Redis 中常用命令 Redis 官方的文档是英文版的,当然网上也有大量的中文翻译版,例如:Redis 命令参考.这 ...

  6. redis之字符串命令源代码解析(二)

    形象化设计模式实战             HELLO!架构                     redis命令源代码解析 在redis之字符串命令源代码解析(一)中讲了get的简单实现,并没有对 ...

  7. Redis(二十一):Redis性能问题排查解决手册(转)

    性能相关的数据指标 通过Redis-cli命令行界面访问到Redis服务器,然后使用info命令获取所有与Redis服务相关的信息.通过这些信息来分析文章后面提到的一些性能指标. info命令输出的数 ...

  8. redis 的简单命令

    以下实例讲解了如何启动 redis 客户端: 启动 redis 客户端,打开终端并输入命令 redis-cli.该命令会连接本地的 redis 服务. $redis-cli redis > re ...

  9. Redis的KEYS命令引起宕机事件

    摘要: 使用 Redis 的开发者必看,吸取教训啊! 原文:Redis 的 KEYS 命令引起 RDS 数据库雪崩,RDS 发生两次宕机,造成几百万的资金损失 作者:陈浩翔 Fundebug经授权转载 ...

随机推荐

  1. React 简书

    create-react-app   jianshu yarn add styled-components -D       利用js写css样式  样式会更高效 https://github.com ...

  2. java 文件拷贝

    需求:源和目标! 那么我们需要源文件和目标文件! 构建管道的时候就需要两个:输出流和输入流管道! Eg: package july7file; //java7开始的自动关闭资源 import java ...

  3. linux 内存区

    GFP_DMA 和 GFP_HIGHMEM 都有一个平台相关的角色, 尽管对所有平台它们的使用都 有效. Linux 内核知道最少 3 个内存区: DMA-能够 内存, 普通内存, 和高端内存. 尽管 ...

  4. 【codeforces 764A】Taymyr is calling you

    time limit per test1 second memory limit per test256 megabytes inputstandard input outputstandard ou ...

  5. Javascript中那些你不知道的事之-- false、0、null、undefined和空字符串

    话不多说直接进入主题:(如果有写的不对的地方欢迎指正) 我们先来看看他们的类型分别是什么: typeof类型检测结果 结论:false是布尔类型对象,0是数字类型对象,null是object对象,un ...

  6. VJhrbustacm0304专题一题解

    L:搬果子 用一般的priority_queue做就可以了. 优先队列//扔进去就能自动排序的序列,记得T组数据要pop干净 #include<iostream> #include< ...

  7. 微信小程序开发踩坑之旅

    项目之始: 一.搭建新项目时出现了 page[pages/XXX/XXX] not found.May be caused by :1. Forgot to add page route in app ...

  8. codeforces 1185G1 状压dp

    codeforces 1185G1. Playlist for Polycarp (easy version)(动态规划) 传送门:https://codeforces.com/contest/118 ...

  9. 超简单!pytorch入门教程(二):Autograd

    一.autograd自动微分 autograd是专门为了BP算法设计的,所以这autograd只对输出值为标量的有用,因为损失函数的输出是一个标量.如果y是一个向量,那么backward()函数就会失 ...

  10. 雪花算法 Snowflake & Sonyflake

    唯一ID算法Snowflake相信大家都不墨生,他是Twitter公司提出来的算法.非常广泛的应用在各种业务系统里.也因为Snowflake的灵活性和缺点,对他的改造层出不穷,比百度的UidGener ...