通过前面几篇文章,我们可以从整体上看到zk是如何处理网络数据的宏观架构。

  本文我们从细节着手,看一下一个tcp的包是如何转换到内部的数据流处理的。

一、监听用户请求socket

  基于NIO的端口监听,获取tcp数据流。

        // org.apache.zookeeper.server.NIOServerCnxnFactory.AcceptThread#AcceptThread
public AcceptThread(ServerSocketChannel ss, InetSocketAddress addr, Set<SelectorThread> selectorThreads) throws IOException {
super("NIOServerCxnFactory.AcceptThread:" + addr);
this.acceptSocket = ss;
// 只监听 OP_ACCEPT 事件
this.acceptKey = acceptSocket.register(selector, SelectionKey.OP_ACCEPT);
this.selectorThreads = Collections.unmodifiableList(new ArrayList<SelectorThread>(selectorThreads));
selectorIterator = this.selectorThreads.iterator();
}
// org.apache.zookeeper.server.NIOServerCnxnFactory.AcceptThread#run
public void run() {
try {
// 死循环一直监听
while (!stopped && !acceptSocket.socket().isClosed()) {
try {
select();
} catch (RuntimeException e) {
LOG.warn("Ignoring unexpected runtime exception", e);
} catch (Exception e) {
LOG.warn("Ignoring unexpected exception", e);
}
}
} finally {
closeSelector();
// This will wake up the selector threads, and tell the
// worker thread pool to begin shutdown.
if (!reconfiguring) {
NIOServerCnxnFactory.this.stop();
}
LOG.info("accept thread exitted run method");
}
}
// org.apache.zookeeper.server.NIOServerCnxnFactory.AcceptThread#select
private void select() {
try {
// nio select
selector.select(); Iterator<SelectionKey> selectedKeys = selector.selectedKeys().iterator();
while (!stopped && selectedKeys.hasNext()) {
SelectionKey key = selectedKeys.next();
selectedKeys.remove(); if (!key.isValid()) {
continue;
}
if (key.isAcceptable()) {
// 有可用数据,接收数据
if (!doAccept()) {
// If unable to pull a new connection off the accept
// queue, pause accepting to give us time to free
// up file descriptors and so the accept thread
// doesn't spin in a tight loop.
pauseAccept(10);
}
} else {
LOG.warn("Unexpected ops in accept select {}", key.readyOps());
}
}
} catch (IOException e) {
LOG.warn("Ignoring IOException while selecting", e);
}
} // org.apache.zookeeper.server.NIOServerCnxnFactory.AcceptThread#doAccept
/**
* Accept new socket connections. Enforces maximum number of connections
* per client IP address. Round-robin assigns to selector thread for
* handling. Returns whether pulled a connection off the accept queue
* or not. If encounters an error attempts to fast close the socket.
*
* @return whether was able to accept a connection or not
*/
private boolean doAccept() {
boolean accepted = false;
SocketChannel sc = null;
try {
sc = acceptSocket.accept();
accepted = true;
InetAddress ia = sc.socket().getInetAddress();
int cnxncount = getClientCnxnCount(ia); if (maxClientCnxns > 0 && cnxncount >= maxClientCnxns) {
throw new IOException("Too many connections from " + ia + " - max is " + maxClientCnxns);
} LOG.debug("Accepted socket connection from {}", sc.socket().getRemoteSocketAddress()); sc.configureBlocking(false); // Round-robin assign this connection to a selector thread
// 选择一个 selector线程出来进行接收连接请求,然后由该线程负责后续处理
if (!selectorIterator.hasNext()) {
selectorIterator = selectorThreads.iterator();
}
SelectorThread selectorThread = selectorIterator.next();
if (!selectorThread.addAcceptedConnection(sc)) {
throw new IOException("Unable to add connection to selector queue"
+ (stopped ? " (shutdown in progress)" : ""));
}
acceptErrorLogger.flush();
} catch (IOException e) {
// accept, maxClientCnxns, configureBlocking
ServerMetrics.getMetrics().CONNECTION_REJECTED.add(1);
acceptErrorLogger.rateLimitLog("Error accepting new connection: " + e.getMessage());
fastCloseSock(sc);
}
return accepted;
} }
// org.apache.zookeeper.server.NIOServerCnxnFactory.SelectorThread#addAcceptedConnection
/**
* Place new accepted connection onto a queue for adding. Do this
* so only the selector thread modifies what keys are registered
* with the selector.
*/
public boolean addAcceptedConnection(SocketChannel accepted) {
// 添加到 SelectorThread 的队列中,然后即返回继续监听
if (stopped || !acceptedQueue.offer(accepted)) {
return false;
}
// 唤醒 selector, 使其开始工作
wakeupSelector();
return true;
}

二、从连接中解析数据

  接上一个nio提交过来的连接后,由 SelectorThread 进行数据的读写。

        // org.apache.zookeeper.server.NIOServerCnxnFactory.SelectorThread
/**
* The main loop for the thread selects() on the connections and
* dispatches ready I/O work requests, then registers all pending
* newly accepted connections and updates any interest ops on the
* queue.
*/
public void run() {
try {
// 死循环一直处理
while (!stopped) {
try {
select();
processAcceptedConnections();
processInterestOpsUpdateRequests();
} catch (RuntimeException e) {
LOG.warn("Ignoring unexpected runtime exception", e);
} catch (Exception e) {
LOG.warn("Ignoring unexpected exception", e);
}
} // Close connections still pending on the selector. Any others
// with in-flight work, let drain out of the work queue.
for (SelectionKey key : selector.keys()) {
NIOServerCnxn cnxn = (NIOServerCnxn) key.attachment();
if (cnxn.isSelectable()) {
cnxn.close(ServerCnxn.DisconnectReason.SERVER_SHUTDOWN);
}
cleanupSelectionKey(key);
}
SocketChannel accepted;
while ((accepted = acceptedQueue.poll()) != null) {
fastCloseSock(accepted);
}
updateQueue.clear();
} finally {
closeSelector();
// This will wake up the accept thread and the other selector
// threads, and tell the worker thread pool to begin shutdown.
NIOServerCnxnFactory.this.stop();
LOG.info("selector thread exitted run method");
}
} private void select() {
try {
// 该selector为本线程私有,所以其操作将是线程安全的
// 都是由 this.selector = Selector.open(); 获得
// 由 AcceptThread 线程调用 wakeupSelector, selector.wakeup(); 唤醒处理
selector.select(); Set<SelectionKey> selected = selector.selectedKeys();
ArrayList<SelectionKey> selectedList = new ArrayList<SelectionKey>(selected);
Collections.shuffle(selectedList);
Iterator<SelectionKey> selectedKeys = selectedList.iterator();
while (!stopped && selectedKeys.hasNext()) {
SelectionKey key = selectedKeys.next();
selected.remove(key); if (!key.isValid()) {
cleanupSelectionKey(key);
continue;
}
if (key.isReadable() || key.isWritable()) {
// 确认进入读写事件的处理流程
handleIO(key);
} else {
LOG.warn("Unexpected ops in select {}", key.readyOps());
}
}
} catch (IOException e) {
LOG.warn("Ignoring IOException while selecting", e);
}
} /**
* Schedule I/O for processing on the connection associated with
* the given SelectionKey. If a worker thread pool is not being used,
* I/O is run directly by this thread.
*/
private void handleIO(SelectionKey key) {
IOWorkRequest workRequest = new IOWorkRequest(this, key);
NIOServerCnxn cnxn = (NIOServerCnxn) key.attachment(); // Stop selecting this key while processing on its
// connection
cnxn.disableSelectable();
key.interestOps(0);
touchCnxn(cnxn);
// 封装IOWorkRequest 添加到工作线程池中去
workerPool.schedule(workRequest);
}
// org.apache.zookeeper.server.WorkerService#schedule
/**
* Schedule work to be done. If a worker thread pool is not being
* used, work is done directly by this thread. This schedule API is
* for use with non-assignable WorkerServices. For assignable
* WorkerServices, will always run on the first thread.
*/
public void schedule(WorkRequest workRequest) {
schedule(workRequest, 0);
}
/**
* Schedule work to be done by the thread assigned to this id. Thread
* assignment is a single mod operation on the number of threads. If a
* worker thread pool is not being used, work is done directly by
* this thread.
*/
public void schedule(WorkRequest workRequest, long id) {
if (stopped) {
workRequest.cleanup();
return;
} ScheduledWorkRequest scheduledWorkRequest = new ScheduledWorkRequest(workRequest); // If we have a worker thread pool, use that; otherwise, do the work
// directly.
int size = workers.size();
if (size > 0) {
try {
// make sure to map negative ids as well to [0, size-1]
// 此处的 id=0, 所以只会有一个线程池使用,提交到 scheduledWorkRequest 去处理
int workerNum = ((int) (id % size) + size) % size;
ExecutorService worker = workers.get(workerNum);
worker.execute(scheduledWorkRequest);
} catch (RejectedExecutionException e) {
LOG.warn("ExecutorService rejected execution", e);
workRequest.cleanup();
}
} else {
// When there is no worker thread pool, do the work directly
// and wait for its completion
scheduledWorkRequest.run();
}
}

三、获取指定通道的具体数据 WorkerService

  接上一个线程submit过来的数据!

    // org.apache.zookeeper.server.WorkerService#schedule
/**
* Schedule work to be done by the thread assigned to this id. Thread
* assignment is a single mod operation on the number of threads. If a
* worker thread pool is not being used, work is done directly by
* this thread.
*/
public void schedule(WorkRequest workRequest, long id) {
if (stopped) {
workRequest.cleanup();
return;
} ScheduledWorkRequest scheduledWorkRequest = new ScheduledWorkRequest(workRequest); // If we have a worker thread pool, use that; otherwise, do the work
// directly.
int size = workers.size();
if (size > 0) {
try {
// make sure to map negative ids as well to [0, size-1]
int workerNum = ((int) (id % size) + size) % size;
ExecutorService worker = workers.get(workerNum);
worker.execute(scheduledWorkRequest);
} catch (RejectedExecutionException e) {
LOG.warn("ExecutorService rejected execution", e);
workRequest.cleanup();
}
} else {
// When there is no worker thread pool, do the work directly
// and wait for its completion
scheduledWorkRequest.run();
}
} // org.apache.zookeeper.server.WorkerService.ScheduledWorkRequest
private class ScheduledWorkRequest implements Runnable { private final WorkRequest workRequest; ScheduledWorkRequest(WorkRequest workRequest) {
this.workRequest = workRequest;
} @Override
public void run() {
try {
// Check if stopped while request was on queue
if (stopped) {
workRequest.cleanup();
return;
}
// 调用 WorkRequest
workRequest.doWork();
} catch (Exception e) {
LOG.warn("Unexpected exception", e);
workRequest.cleanup();
}
} }
// org.apache.zookeeper.server.NIOServerCnxnFactory.IOWorkRequest#doWork
public void doWork() throws InterruptedException {
if (!key.isValid()) {
selectorThread.cleanupSelectionKey(key);
return;
} // 针对客户端请求,进行读写处理
if (key.isReadable() || key.isWritable()) {
// 将key交给 对应的 事务处理服务处理
cnxn.doIO(key); // Check if we shutdown or doIO() closed this connection
if (stopped) {
cnxn.close(ServerCnxn.DisconnectReason.SERVER_SHUTDOWN);
return;
}
if (!key.isValid()) {
selectorThread.cleanupSelectionKey(key);
return;
}
// 关联到对应的channnel, 以便可以进行响应
touchCnxn(cnxn);
} // Mark this connection as once again ready for selection
cnxn.enableSelectable();
// Push an update request on the queue to resume selecting
// on the current set of interest ops, which may have changed
// as a result of the I/O operations we just performed.
if (!selectorThread.addInterestOpsUpdateRequest(key)) {
cnxn.close(ServerCnxn.DisconnectReason.CONNECTION_MODE_CHANGED);
}
} // 处理读写数据
// org.apache.zookeeper.server.NIOServerCnxn#doIO
void doIO(SelectionKey k) throws InterruptedException {
try {
if (!isSocketOpen()) {
LOG.warn("trying to do i/o on a null socket for session: 0x{}", Long.toHexString(sessionId)); return;
}
if (k.isReadable()) {
int rc = sock.read(incomingBuffer);
if (rc < 0) {
handleFailedRead();
}
if (incomingBuffer.remaining() == 0) {
boolean isPayload;
if (incomingBuffer == lenBuffer) { // start of next request
incomingBuffer.flip();
isPayload = readLength(k);
incomingBuffer.clear();
} else {
// continuation
isPayload = true;
}
if (isPayload) { // not the case for 4letterword
// 数据准备完成,读取消息体,顺便进行数据传递到下游
readPayload();
} else {
// four letter words take care
// need not do anything else
return;
}
}
}
if (k.isWritable()) {
handleWrite(k); if (!initialized && !getReadInterest() && !getWriteInterest()) {
throw new CloseRequestException("responded to info probe", DisconnectReason.INFO_PROBE);
}
}
} catch (CancelledKeyException e) {
LOG.warn("CancelledKeyException causing close of session: 0x{}", Long.toHexString(sessionId)); LOG.debug("CancelledKeyException stack trace", e); close(DisconnectReason.CANCELLED_KEY_EXCEPTION);
} catch (CloseRequestException e) {
// expecting close to log session closure
close();
} catch (EndOfStreamException e) {
LOG.warn("Unexpected exception", e);
// expecting close to log session closure
close(e.getReason());
} catch (ClientCnxnLimitException e) {
// Common case exception, print at debug level
ServerMetrics.getMetrics().CONNECTION_REJECTED.add(1);
LOG.warn("Closing session 0x{}", Long.toHexString(sessionId), e);
close(DisconnectReason.CLIENT_CNX_LIMIT);
} catch (IOException e) {
LOG.warn("Close of session 0x{}", Long.toHexString(sessionId), e);
close(DisconnectReason.IO_EXCEPTION);
}
} // org.apache.zookeeper.server.NIOServerCnxn#readPayload
/** Read the request payload (everything following the length prefix) */
private void readPayload() throws IOException, InterruptedException, ClientCnxnLimitException {
if (incomingBuffer.remaining() != 0) { // have we read length bytes?
int rc = sock.read(incomingBuffer); // sock is non-blocking, so ok
if (rc < 0) {
handleFailedRead();
}
} // 多次确认数据是否已到齐
if (incomingBuffer.remaining() == 0) { // have we read length bytes?
incomingBuffer.flip();
packetReceived(4 + incomingBuffer.remaining());
if (!initialized) {
readConnectRequest();
} else {
// 对初始化后的数据处理,直接读取 buffers
readRequest();
}
lenBuffer.clear();
incomingBuffer = lenBuffer;
}
} // org.apache.zookeeper.server.NIOServerCnxn#readRequest
private void readRequest() throws IOException {
// 交给 zkServer 处理,其中包含了各种准备好的处理链配置
zkServer.processPacket(this, incomingBuffer);
} // org.apache.zookeeper.server.ZooKeeperServer#processPacket
public void processPacket(ServerCnxn cnxn, ByteBuffer incomingBuffer) throws IOException {
// We have the request, now process and setup for next
InputStream bais = new ByteBufferInputStream(incomingBuffer);
BinaryInputArchive bia = BinaryInputArchive.getArchive(bais);
RequestHeader h = new RequestHeader();
h.deserialize(bia, "header"); // Need to increase the outstanding request count first, otherwise
// there might be a race condition that it enabled recv after
// processing request and then disabled when check throttling.
//
// Be aware that we're actually checking the global outstanding
// request before this request.
//
// It's fine if the IOException thrown before we decrease the count
// in cnxn, since it will close the cnxn anyway.
cnxn.incrOutstandingAndCheckThrottle(h); // Through the magic of byte buffers, txn will not be
// pointing
// to the start of the txn
incomingBuffer = incomingBuffer.slice();
// 根据请求类型,简单分类处理
if (h.getType() == OpCode.auth) {
LOG.info("got auth packet {}", cnxn.getRemoteSocketAddress());
AuthPacket authPacket = new AuthPacket();
ByteBufferInputStream.byteBuffer2Record(incomingBuffer, authPacket);
String scheme = authPacket.getScheme();
ServerAuthenticationProvider ap = ProviderRegistry.getServerProvider(scheme);
Code authReturn = KeeperException.Code.AUTHFAILED;
if (ap != null) {
try {
// handleAuthentication may close the connection, to allow the client to choose
// a different server to connect to.
authReturn = ap.handleAuthentication(
new ServerAuthenticationProvider.ServerObjs(this, cnxn),
authPacket.getAuth());
} catch (RuntimeException e) {
LOG.warn("Caught runtime exception from AuthenticationProvider: {}", scheme, e);
authReturn = KeeperException.Code.AUTHFAILED;
}
}
if (authReturn == KeeperException.Code.OK) {
LOG.debug("Authentication succeeded for scheme: {}", scheme);
LOG.info("auth success {}", cnxn.getRemoteSocketAddress());
ReplyHeader rh = new ReplyHeader(h.getXid(), 0, KeeperException.Code.OK.intValue());
cnxn.sendResponse(rh, null, null);
} else {
if (ap == null) {
LOG.warn(
"No authentication provider for scheme: {} has {}",
scheme,
ProviderRegistry.listProviders());
} else {
LOG.warn("Authentication failed for scheme: {}", scheme);
}
// send a response...
ReplyHeader rh = new ReplyHeader(h.getXid(), 0, KeeperException.Code.AUTHFAILED.intValue());
cnxn.sendResponse(rh, null, null);
// ... and close connection
cnxn.sendBuffer(ServerCnxnFactory.closeConn);
cnxn.disableRecv();
}
return;
} else if (h.getType() == OpCode.sasl) {
processSasl(incomingBuffer, cnxn, h);
} else {
// 通用请求处理
if (shouldRequireClientSaslAuth() && !hasCnxSASLAuthenticated(cnxn)) {
ReplyHeader replyHeader = new ReplyHeader(h.getXid(), 0, Code.SESSIONCLOSEDREQUIRESASLAUTH.intValue());
cnxn.sendResponse(replyHeader, null, "response");
cnxn.sendCloseSession();
cnxn.disableRecv();
} else {
// 封装 Request, 至此, 内部数据模型已生成, sessionId, xid, buffers...
Request si = new Request(cnxn, cnxn.getSessionId(), h.getXid(), h.getType(), incomingBuffer, cnxn.getAuthInfo());
int length = incomingBuffer.limit();
if (isLargeRequest(length)) {
// checkRequestSize will throw IOException if request is rejected
checkRequestSizeWhenMessageReceived(length);
si.setLargeRequestSize(length);
}
si.setOwner(ServerCnxn.me);
// 提交到请求队列中
submitRequest(si);
}
}
} public void submitRequest(Request si) {
enqueueRequest(si);
} public void enqueueRequest(Request si) {
if (requestThrottler == null) {
synchronized (this) {
try {
// Since all requests are passed to the request
// processor it should wait for setting up the request
// processor chain. The state will be updated to RUNNING
// after the setup.
while (state == State.INITIAL) {
wait(1000);
}
} catch (InterruptedException e) {
LOG.warn("Unexpected interruption", e);
}
if (requestThrottler == null) {
throw new RuntimeException("Not started");
}
}
}
// 提交请求给限流器
// requestThrottler = new RequestThrottler(this); 节流器处理完成后,最终仍会提交给当前 zkServer 处理
requestThrottler.submitRequest(si);
} // org.apache.zookeeper.server.RequestThrottler#submitRequest
public void submitRequest(Request request) {
// 如果已停止,则删除队列
if (stopping) {
LOG.debug("Shutdown in progress. Request cannot be processed");
dropRequest(request);
} else {
// LinkedBlockingQueue 入队,最终由该线程去异步处理
submittedRequests.add(request);
}
}

四、第一个队列处理请求 - 限流器 RequestThrottler

  通过上面的tcp数据转换,将其转换为了 Request 实例,提交到队列了。接下来这个队列将被 RequestThrottler 处理,它的作用是判定是否超出了设置的最大请求数,如果超出,则作等待处理,防止下游无法应对。

    // org.apache.zookeeper.server.RequestThrottler#run
@Override
public void run() {
try {
// 死循环一直处理
while (true) {
if (killed) {
break;
}
// 阻塞式获取,即只要数据被提交,就会被立即处理
Request request = submittedRequests.take();
if (Request.requestOfDeath == request) {
break;
} if (request.mustDrop()) {
continue;
} // Throttling is disabled when maxRequests = 0
if (maxRequests > 0) {
while (!killed) {
if (dropStaleRequests && request.isStale()) {
// Note: this will close the connection
dropRequest(request);
ServerMetrics.getMetrics().STALE_REQUESTS_DROPPED.add(1);
request = null;
break;
}
// 只要没达到最大限制,直接通过
if (zks.getInProcess() < maxRequests) {
break;
}
throttleSleep(stallTime);
}
} if (killed) {
break;
} // A dropped stale request will be null
if (request != null) {
if (request.isStale()) {
ServerMetrics.getMetrics().STALE_REQUESTS.add(1);
}
// 验证通过后,提交给 zkServer 处理
zks.submitRequestNow(request);
}
}
} catch (InterruptedException e) {
LOG.error("Unexpected interruption", e);
}
int dropped = drainQueue();
LOG.info("RequestThrottler shutdown. Dropped {} requests", dropped);
} // org.apache.zookeeper.server.ZooKeeperServer#submitRequestNow
public void submitRequestNow(Request si) {
// 确保处理器链路已生成
if (firstProcessor == null) {
synchronized (this) {
try {
// Since all requests are passed to the request
// processor it should wait for setting up the request
// processor chain. The state will be updated to RUNNING
// after the setup.
while (state == State.INITIAL) {
wait(1000);
}
} catch (InterruptedException e) {
LOG.warn("Unexpected interruption", e);
}
if (firstProcessor == null || state != State.RUNNING) {
throw new RuntimeException("Not started");
}
}
}
try {
touch(si.cnxn);
// 验证数据类型是否支持
boolean validpacket = Request.isValid(si.type);
if (validpacket) {
setLocalSessionFlag(si);
// 调用第一个处理器进行处理了,进入正式流程,如 LeaderRequestProcessor
firstProcessor.processRequest(si);
if (si.cnxn != null) {
incInProcess();
}
} else {
LOG.warn("Received packet at server of unknown type {}", si.type);
// Update request accounting/throttling limits
requestFinished(si);
new UnimplementedRequestProcessor().processRequest(si);
}
} catch (MissingSessionException e) {
LOG.debug("Dropping request.", e);
// Update request accounting/throttling limits
requestFinished(si);
} catch (RequestProcessorException e) {
LOG.error("Unable to process request", e);
// Update request accounting/throttling limits
requestFinished(si);
}
}

  至此,请求数据初始化就完成了,就可以好好去分析处理器的处理过程了。

五、 firstProcessor 示例一号传球手 --LeaderRequestProcessor

    // org.apache.zookeeper.server.quorum.LeaderRequestProcessor#processRequest
@Override
public void processRequest(Request request) throws RequestProcessorException {
// Screen quorum requests against ACLs first
if (!lzks.authWriteRequest(request)) {
return;
} // Check if this is a local session and we are trying to create
// an ephemeral node, in which case we upgrade the session
Request upgradeRequest = null;
try {
upgradeRequest = lzks.checkUpgradeSession(request);
} catch (KeeperException ke) {
if (request.getHdr() != null) {
LOG.debug("Updating header");
request.getHdr().setType(OpCode.error);
request.setTxn(new ErrorTxn(ke.code().intValue()));
}
request.setException(ke);
LOG.warn("Error creating upgrade request", ke);
} catch (IOException ie) {
LOG.error("Unexpected error in upgrade", ie);
}
if (upgradeRequest != null) {
nextProcessor.processRequest(upgradeRequest);
}
// 直接转给下一处理器方法,本身未做过多逻辑
nextProcessor.processRequest(request);
}
// org.apache.zookeeper.server.PrepRequestProcessor#processRequest
public void processRequest(Request request) {
request.prepQueueStartTime = Time.currentElapsedTime();
// LinkedBlockingQueue, 添加到 PrepRequestProcessor 的队列中,进入真正的处理链逻辑
submittedRequests.add(request);
// 添加监控信息统计
ServerMetrics.getMetrics().PREP_PROCESSOR_QUEUED.add(1);
}

六、firstProcessor 示例一号传球手 --FollowerRequestProcessor

    // org.apache.zookeeper.server.quorum.FollowerRequestProcessor#processRequest
public void processRequest(Request request) {
processRequest(request, true);
} void processRequest(Request request, boolean checkForUpgrade) {
if (!finished) {
if (checkForUpgrade) {
// Before sending the request, check if the request requires a
// global session and what we have is a local session. If so do
// an upgrade.
Request upgradeRequest = null;
try {
upgradeRequest = zks.checkUpgradeSession(request);
} catch (KeeperException ke) {
if (request.getHdr() != null) {
request.getHdr().setType(OpCode.error);
request.setTxn(new ErrorTxn(ke.code().intValue()));
}
request.setException(ke);
LOG.warn("Error creating upgrade request", ke);
} catch (IOException ie) {
LOG.error("Unexpected error in upgrade", ie);
}
if (upgradeRequest != null) {
queuedRequests.add(upgradeRequest);
}
}
// FollowerRequestProcessor 有一定的处理逻辑,将其添加到自有队列 queuedRequests 中
queuedRequests.add(request);
}
}
// org.apache.zookeeper.server.quorum.FollowerRequestProcessor#run
@Override
public void run() {
try {
while (!finished) {
Request request = queuedRequests.take();
if (LOG.isTraceEnabled()) {
ZooTrace.logRequest(LOG, ZooTrace.CLIENT_REQUEST_TRACE_MASK, 'F', request, "");
}
if (request == Request.requestOfDeath) {
break;
} // Screen quorum requests against ACLs first
if (!zks.authWriteRequest(request)) {
continue;
} // We want to queue the request to be processed before we submit
// the request to the leader so that we are ready to receive
// the response
// 此处将会提交给 CommitProcessor, 进入 Follower 本地处理
nextProcessor.processRequest(request); // We now ship the request to the leader. As with all
// other quorum operations, sync also follows this code
// path, but different from others, we need to keep track
// of the sync operations this follower has pending, so we
// add it to pendingSyncs.
// 针对写请求,将其转发给 Leader 处理
switch (request.type) {
case OpCode.sync:
zks.pendingSyncs.add(request);
zks.getFollower().request(request);
break;
case OpCode.create:
case OpCode.create2:
case OpCode.createTTL:
case OpCode.createContainer:
case OpCode.delete:
case OpCode.deleteContainer:
case OpCode.setData:
case OpCode.reconfig:
case OpCode.setACL:
case OpCode.multi:
case OpCode.check:
zks.getFollower().request(request);
break;
case OpCode.createSession:
case OpCode.closeSession:
// Don't forward local sessions to the leader.
if (!request.isLocalSession()) {
zks.getFollower().request(request);
}
break;
}
}
} catch (Exception e) {
handleException(this.getName(), e);
}
LOG.info("FollowerRequestProcessor exited loop!");
}
// org.apache.zookeeper.server.quorum.Learner#request
/**
* send a request packet to the leader
*
* @param request
* the request from the client
* @throws IOException
*/
void request(Request request) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream oa = new DataOutputStream(baos);
oa.writeLong(request.sessionId);
oa.writeInt(request.cxid);
oa.writeInt(request.type);
if (request.request != null) {
request.request.rewind();
int len = request.request.remaining();
byte[] b = new byte[len];
request.request.get(b);
request.request.rewind();
oa.write(b);
}
oa.close();
// 转发请求给 Leader, 详情待续
QuorumPacket qp = new QuorumPacket(Leader.REQUEST, -1, baos.toByteArray(), request.authInfo);
writePacket(qp, true);
}

七、firstProcessor 示例一号传球手 --ObserverRequestProcessor

  ObserverRequestProcessor 与 Follower 类似.

    // org.apache.zookeeper.server.quorum.ObserverRequestProcessor#processRequest
/**
* Simply queue the request, which will be processed in FIFO order.
*/
public void processRequest(Request request) {
if (!finished) {
Request upgradeRequest = null;
try {
upgradeRequest = zks.checkUpgradeSession(request);
} catch (KeeperException ke) {
if (request.getHdr() != null) {
request.getHdr().setType(OpCode.error);
request.setTxn(new ErrorTxn(ke.code().intValue()));
}
request.setException(ke);
LOG.info("Error creating upgrade request", ke);
} catch (IOException ie) {
LOG.error("Unexpected error in upgrade", ie);
}
if (upgradeRequest != null) {
queuedRequests.add(upgradeRequest);
}
// 添加到自身的工作队列中,稍后处理
queuedRequests.add(request);
}
}
// org.apache.zookeeper.server.quorum.ObserverRequestProcessor#run
@Override
public void run() {
try {
while (!finished) {
Request request = queuedRequests.take();
if (LOG.isTraceEnabled()) {
ZooTrace.logRequest(LOG, ZooTrace.CLIENT_REQUEST_TRACE_MASK, 'F', request, "");
}
if (request == Request.requestOfDeath) {
break;
} // Screen quorum requests against ACLs first
if (!zks.authWriteRequest(request)) {
continue;
} // We want to queue the request to be processed before we submit
// the request to the leader so that we are ready to receive
// the response
nextProcessor.processRequest(request); // We now ship the request to the leader. As with all
// other quorum operations, sync also follows this code
// path, but different from others, we need to keep track
// of the sync operations this Observer has pending, so we
// add it to pendingSyncs.
switch (request.type) {
case OpCode.sync:
zks.pendingSyncs.add(request);
zks.getObserver().request(request);
break;
case OpCode.create:
case OpCode.create2:
case OpCode.createTTL:
case OpCode.createContainer:
case OpCode.delete:
case OpCode.deleteContainer:
case OpCode.setData:
case OpCode.reconfig:
case OpCode.setACL:
case OpCode.multi:
case OpCode.check:
zks.getObserver().request(request);
break;
case OpCode.createSession:
case OpCode.closeSession:
// Don't forward local sessions to the leader.
if (!request.isLocalSession()) {
zks.getObserver().request(request);
}
break;
}
}
} catch (Exception e) {
handleException(this.getName(), e);
}
LOG.info("ObserverRequestProcessor exited loop!");
}

  OK!至此,我们已经完整地看到了一个 tcp 数据流如何转变为 zk 的数据包,并准备进入 正式的业务处理流程。

整个过程总结下:

  1. 一个 Accept 线程接入客户端请求;
  2. 一组 Selector 线程轮询处理读写请求;
  3. 线程池处理, 一组 WorkerService 进行提交读写数据;
  4. 由 ZooKeeperServer 进行 Request包的封装;
  5. 经限流器把关 RequestThrottler, hold过多的请求, 然后再回高 ZooKeeperServer 入队;
  6. ZooKeeperServer 找到 firstProcessor, 将请求 Request 传递到处理链中, 准备过程完毕;
  7. 各个 firstProcessor 的处理逻辑不一,其中 Leader 仅是将 Request 传递到处理链中, 而 Follower/Observer 则要自己做一些额外的工作;

  整个过程,是在N个线程之间不停地传递处理,各自负责各自的领域点。而理清zk的工作方式,难点就在于其多线程的配合过程。

ZooKeeper(四):从TCP数据流到zk内部处理包的转换的更多相关文章

  1. Zookeeper(四))持久化日志文件

    Zookeeper(四))持久化日志文件 持久化用途 存储两种文件 snapshot:内存快照 log:事务日志,类似MySQL的binlog,存储数据节点的操作日志 问题 序列化的本质其实就是将原数 ...

  2. zookeeper 四字命令

    zookeeper四字命令   ZooKeeper3.4.6支持某些特定的四字命令字母与其的交互.它们大多是查询命令,用来获取 ZooKeeper 服务的当前状态及相关信息.用户在客户端可以通过 te ...

  3. Netty系列(四)TCP拆包和粘包

    Netty系列(四)TCP拆包和粘包 一.拆包和粘包问题 (1) 一个小的Socket Buffer问题 在基于流的传输里比如 TCP/IP,接收到的数据会先被存储到一个 socket 接收缓冲里.不 ...

  4. Zookeeper四字命令

    ZooKeeper 支持某些特定的四字命令(The Four Letter Words)与其进行交互.它们大多是查询命令,用来获取 ZooKeeper 服务的当前状态及相关信息.用户在客户端可以通过 ...

  5. 《TCP/IP详解 卷一》读书笔记-----TCP数据流

    1.Delayed Acknowledgements:TCP通常不会在收到数据之后立即返回一个ACK,而是会有一个延时,希望能ACK报文段中带上一些数据,通常这个延时为200ms 2.Nagle Al ...

  6. 基于libnids的TCP数据流的还原(多线程实现) .

    我们知道,libnids本身可以实现TCP数据流的重组,但是如果一个TCP流数据量比较大的时候,就会分成好多个TCP报文段,这些报文段在网络中的传播可能是乱序的,利用libnids可以帮助我们按顺序接 ...

  7. TCP/IP具体解释学习笔记--TCP数据流

    1.TCP的交互数据流 (1)基本概念 所谓交互数据流,其对TCP而言,就是他们所产生的大多数的TCP报文段中所包括的数据不超过10个字节.比如聊天等telnet的软件的TCP数据流就属于TCP交互数 ...

  8. 被面试官问懵:TCP 四次挥手收到乱序的 FIN 包会如何处理?

    摘要:收到个读者的问题,他在面试的时候,被搞懵了,因为面试官问了他这么一个网络问题. 本文分享自华为云社区<TCP 四次挥手收到乱序的 FIN 包会如何处理?>,作者:小林coding . ...

  9. TCP数据流稳定性--TCP分片,重组及乱序

    http://www.cnblogs.com/derekchen/archive/2009/07/15/1524415.html 1.IP分片的情况.IP软件包有一个[分片]和[重组]模块,一个IP数 ...

随机推荐

  1. poj 1321 棋盘问题 (回溯法)

    棋盘问题 Time Limit: 1000MS   Memory Limit: 10000K Total Submissions: 69951   Accepted: 33143 Descriptio ...

  2. Bash脚本编程之变量与多命令执行

    变量基础知识 程序由指令加数据所组成,而变量可以理解为数据来源的一种. 变量名可以理解为指向了某个内存空间的地址,对于变量的赋值可理解为向内存空间写入数据,对于变量的引用可理解为从内存空间读取数据. ...

  3. 【性能测评】DSP库,MDK5的AC5,AC6,IAR和Embedded Studio的三角函数性能

    测试条件: 1.IAR8.30开最高等级速度优化. 2.MDK5.27正式版使用AC5开最高等级优化3,开启时间优化,测试C标准库和微库MicroLib两种. 3.MDK5.27正式版使用AC6开最高 ...

  4. 想精通分布式以及高并发架构?那你得先搞定ZooKeeper架构原理!

    Zookeeper是分布式一致性问题的工业解决方案,是Apache Hadoop下解决分布式一致性的一个组件,后被分离出来成为Apache的顶级项目. 工程来源:是雅虎公司内部项目,据说雅虎内部很多项 ...

  5. Jmeter脚本参数化和正则匹配

    我们在做接口测试过程中,往往会遇到以下几种情况 每次发送请求,都需要更改参数值为未使用的参数值,比如手机号注册.动态时间等 上一个接口的请求体参数用于下一个接口的请求体参数 上一个接口的响应体参数用于 ...

  6. alpine制作jdk、jre镜像、自定义镜像上传阿里云

    alpine制作jdk镜像 alpine Linux简介 1.Alpine Linux是一个轻型Linux发行版,它不同于通常的Linux发行版,Alpine采用了musl libc 和 BusyBo ...

  7. 基于Git的数据库sql文件的管理——完美解决团队sql操作协同问题

    目录 基于Git的数据库sql文件的管理--完美解决团队sql操作协同问题 1.产生背景 2.之前没用Git管理数据库出现的问题 2.1 用同一个库调试带来的问题 3.解决方案 3.1 Sql文件的创 ...

  8. GO基础之异常处理

    一.异常 1. 错误指程序中出现不正常的情况,从而导致程序无法正常执行.•大多语言中使用try... catch... finally语句执行.假设我们正在尝试打开一个文件,文件系统中不存在这个文件. ...

  9. ASP.NET MVC快速开发框架FastExecutor开发全过程感受及总结

    困境 追溯到2018年5月份,是个炎热的夏天,毕业后1年7个月我提出了离职,原因是受不了原来公司过度的封装框架感觉一年多毫无进步与实施天天轰炸般的电话,偶然间出去面试了一次发现自己知识真的是比较局限, ...

  10. Zstack的安装部署

    ZStack是下一代开源的云计算IaaS(基础架构即服务)软件. 它主要面向的是未来的智能数据中心,通过提供全完善的API来管理包括计算.存储和网络在内的数据中心的各种资源.跟OpenStack相比, ...