Replica

对于local replica, 需要记录highWatermarkValue,表示当前已经committed的数据
对于remote replica,需要记录logEndOffsetValue以及更新的时间

package kafka.cluster

class Replica(val brokerId: Int,
val partition: Partition,
time: Time = SystemTime,
initialHighWatermarkValue: Long = 0L,
val log: Option[Log] = None) extends Logging {
//only defined in local replica
private[this] var highWatermarkValue: AtomicLong = new AtomicLong(initialHighWatermarkValue)
// only used for remote replica; logEndOffsetValue for local replica is kept in log
private[this] var logEndOffsetValue = new AtomicLong(ReplicaManager.UnknownLogEndOffset)
private[this] var logEndOffsetUpdateTimeMsValue: AtomicLong = new AtomicLong(time.milliseconds)
val topic = partition.topic
val partitionId = partition.partitionId
}

Partition

主要用于管理leader,ISR,AR

package kafka.cluster

/**
* Data structure that represents a topic partition. The leader maintains the AR, ISR, CUR, RAR
*/
class Partition(val topic: String,
val partitionId: Int,
var replicationFactor: Int,
time: Time,
val replicaManager: ReplicaManager) extends Logging with KafkaMetricsGroup {
private val localBrokerId = replicaManager.config.brokerId //当前的brokerId
private val logManager = replicaManager.logManager
private val zkClient = replicaManager.zkClient
var leaderReplicaIdOpt: Option[Int] = None //leaderReplica的id
var inSyncReplicas: Set[Replica] = Set.empty[Replica] //ISR
private val assignedReplicaMap = new Pool[Int,Replica] //AR,往往是大于等于ISR的
private val leaderIsrUpdateLock = new Object
private var zkVersion: Int = LeaderAndIsr.initialZKVersion
private var leaderEpoch: Int = LeaderAndIsr.initialLeaderEpoch - 1
/* Epoch of the controller that last changed the leader. This needs to be initialized correctly upon broker startup.
* One way of doing that is through the controller's start replica state change command. When a new broker starts up
* the controller sends it a start replica command containing the leader for each partition that the broker hosts.
* In addition to the leader, the controller can also send the epoch of the controller that elected the leader for
* each partition. */
private var controllerEpoch: Int = KafkaController.InitialControllerEpoch - 1
}

getOrCreateReplica

  def getOrCreateReplica(replicaId: Int = localBrokerId): Replica = {
val replicaOpt = getReplica(replicaId)
replicaOpt match {
case Some(replica) => replica
case None => //需要create
if (isReplicaLocal(replicaId)) {
val config = LogConfig.fromProps(logManager.defaultConfig.toProps, AdminUtils.fetchTopicConfig(zkClient, topic))
val log = logManager.createLog(TopicAndPartition(topic, partitionId), config) //创建logfile
val checkpoint = replicaManager.highWatermarkCheckpoints(log.dir.getParent) //试图读取HW的checkpoint
val offsetMap = checkpoint.read
if (!offsetMap.contains(TopicAndPartition(topic, partitionId)))
warn("No checkpointed highwatermark is found for partition [%s,%d]".format(topic, partitionId))
val offset = offsetMap.getOrElse(TopicAndPartition(topic, partitionId), 0L).min(log.logEndOffset) //offset为cp读出的HW和logEndOffset中小的那个
val localReplica = new Replica(replicaId, this, time, offset, Some(log)) //创建Replica对象
addReplicaIfNotExists(localReplica) //加到AR中去
} else { //Remote Replica,直接创建对象
val remoteReplica = new Replica(replicaId, this, time)
addReplicaIfNotExists(remoteReplica)
}
getReplica(replicaId).get
}
}

makeLeader

  /**
* Make the local replica the leader by resetting LogEndOffset for remote replicas (there could be old LogEndOffset from the time when this broker was the leader last time)
* and setting the new leader and ISR
*/
def makeLeader(controllerId: Int,
partitionStateInfo: PartitionStateInfo, correlationId: Int): Boolean = {
leaderIsrUpdateLock synchronized {
val allReplicas = partitionStateInfo.allReplicas //取出所有replicaid
val leaderIsrAndControllerEpoch = partitionStateInfo.leaderIsrAndControllerEpoch
val leaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
// record the epoch of the controller that made the leadership decision. This is useful while updating the isr
// to maintain the decision maker controller's epoch in the zookeeper path
controllerEpoch = leaderIsrAndControllerEpoch.controllerEpoch //每次做leadership decision的时候需要更新controllerEpoch
// add replicas that are new
allReplicas.foreach(replica => getOrCreateReplica(replica)) //生成所有replica对象,对于new的需要create
val newInSyncReplicas = leaderAndIsr.isr.map(r => getOrCreateReplica(r)).toSet //生成新的ISR
// remove assigned replicas that have been removed by the controller
(assignedReplicas().map(_.brokerId) -- allReplicas).foreach(removeReplica(_)) //对于AR中已经不再有效的replica,remove
// reset LogEndOffset for remote replicas
assignedReplicas.foreach(r => if (r.brokerId != localBrokerId) r.logEndOffset = ReplicaManager.UnknownLogEndOffset) //清空所有remote replica的LEO
inSyncReplicas = newInSyncReplicas
leaderEpoch = leaderAndIsr.leaderEpoch
zkVersion = leaderAndIsr.zkVersion
leaderReplicaIdOpt = Some(localBrokerId)
// we may need to increment high watermark
maybeIncrementLeaderHW(getReplica().get) //更新HW,用min(所有remote replica的LEO)
true
}
}

maybeIncrementLeaderHW
用所有remote replica的LEO的最小值来替换当前的HW(如果大于HW的话)

  /**
* There is no need to acquire the leaderIsrUpdate lock here since all callers of this private API acquire that lock
* @param leaderReplica
*/
private def maybeIncrementLeaderHW(leaderReplica: Replica) {
val allLogEndOffsets = inSyncReplicas.map(_.logEndOffset)
val newHighWatermark = allLogEndOffsets.min
val oldHighWatermark = leaderReplica.highWatermark
if(newHighWatermark > oldHighWatermark) {
leaderReplica.highWatermark = newHighWatermark //更新HW
}
else
}

makeFollower

  /**
* Make the local replica the follower by setting the new leader and ISR to empty
*/
def makeFollower(controllerId: Int,
partitionStateInfo: PartitionStateInfo,
leaders: Set[Broker], correlationId: Int): Boolean = {
leaderIsrUpdateLock synchronized {
val allReplicas = partitionStateInfo.allReplicas
val leaderIsrAndControllerEpoch = partitionStateInfo.leaderIsrAndControllerEpoch
val leaderAndIsr = leaderIsrAndControllerEpoch.leaderAndIsr
val newLeaderBrokerId: Int = leaderAndIsr.leader //新的leaderid
// record the epoch of the controller that made the leadership decision. This is useful while updating the isr
// to maintain the decision maker controller's epoch in the zookeeper path
controllerEpoch = leaderIsrAndControllerEpoch.controllerEpoch
// TODO: Delete leaders from LeaderAndIsrRequest in 0.8.1
leaders.find(_.id == newLeaderBrokerId) match {
case Some(leaderBroker) =>
// add replicas that are new
allReplicas.foreach(r => getOrCreateReplica(r))
// remove assigned replicas that have been removed by the controller
(assignedReplicas().map(_.brokerId) -- allReplicas).foreach(removeReplica(_))
inSyncReplicas = Set.empty[Replica] //将ISR置空
leaderEpoch = leaderAndIsr.leaderEpoch
zkVersion = leaderAndIsr.zkVersion
leaderReplicaIdOpt = Some(newLeaderBrokerId)
case None => // we should not come here
}
true
}
}

maybeShrinkIsr

从ISR中将Stuck followers和Slow followers去除

  def maybeShrinkIsr(replicaMaxLagTimeMs: Long,  replicaMaxLagMessages: Long) {
leaderIsrUpdateLock synchronized {
leaderReplicaIfLocal() match {
case Some(leaderReplica) =>
val outOfSyncReplicas = getOutOfSyncReplicas(leaderReplica, replicaMaxLagTimeMs, replicaMaxLagMessages) // 获取OutofSync的replicas
if(outOfSyncReplicas.size > 0) {
val newInSyncReplicas = inSyncReplicas – outOfSyncReplicas //从ISR中去除outOfSyncReplicas
// update ISR in zk and in cache
updateIsr(newInSyncReplicas)
// we may need to increment high watermark since ISR could be down to 1
maybeIncrementLeaderHW(leaderReplica)
replicaManager.isrShrinkRate.mark()
}
case None => // do nothing if no longer leader
}
}
} def getOutOfSyncReplicas(leaderReplica: Replica, keepInSyncTimeMs: Long, keepInSyncMessages: Long): Set[Replica] = {
/**
* there are two cases that need to be handled here -
* 1. Stuck followers: If the leo of the replica hasn't been updated for keepInSyncTimeMs ms,
* the follower is stuck and should be removed from the ISR
* 2. Slow followers: If the leo of the slowest follower is behind the leo of the leader by keepInSyncMessages, the
* follower is not catching up and should be removed from the ISR
**/
val leaderLogEndOffset = leaderReplica.logEndOffset
val candidateReplicas = inSyncReplicas - leaderReplica
// Case 1 above
val stuckReplicas = candidateReplicas.filter(r => (time.milliseconds - r.logEndOffsetUpdateTimeMs) > keepInSyncTimeMs)
if(stuckReplicas.size > 0)
debug("Stuck replicas for partition [%s,%d] are %s".format(topic, partitionId, stuckReplicas.map(_.brokerId).mkString(",")))
// Case 2 above
val slowReplicas = candidateReplicas.filter(r => r.logEndOffset >= 0 && (leaderLogEndOffset - r.logEndOffset) > keepInSyncMessages)
if(slowReplicas.size > 0)
debug("Slow replicas for partition [%s,%d] are %s".format(topic, partitionId, slowReplicas.map(_.brokerId).mkString(",")))
stuckReplicas ++ slowReplicas
}

Apache Kafka源码分析 – Replica and Partition的更多相关文章

  1. Apache Kafka源码分析 – Broker Server

    1. Kafka.scala 在Kafka的main入口中startup KafkaServerStartable, 而KafkaServerStartable这是对KafkaServer的封装 1: ...

  2. apache kafka源码分析-Producer分析---转载

    原文地址:http://www.aboutyun.com/thread-9938-1-1.html 问题导读1.Kafka提供了Producer类作为java producer的api,此类有几种发送 ...

  3. Apache Kafka源码分析 – Controller

    https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Controller+Internalshttps://cwiki.apache.org ...

  4. Apache Kafka源码分析 - kafka controller

    前面已经分析过kafka server的启动过程,以及server所能处理的所有的request,即KafkaApis 剩下的,其实关键就是controller,以及partition和replica ...

  5. Apache Kafka源码分析 - autoLeaderRebalanceEnable

    在broker的配置中,auto.leader.rebalance.enable (false) 那么这个leader是如何进行rebalance的? 首先在controller启动的时候会打开一个s ...

  6. Apache Kafka源码分析 - KafkaApis

    kafka apis反映出kafka broker server可以提供哪些服务,broker server主要和producer,consumer,controller有交互,搞清这些api就清楚了 ...

  7. Apache Kafka源码分析 – Log Management

    LogManager LogManager会管理broker上所有的logs(在一个log目录下),一个topic的一个partition对应于一个log(一个log子目录)首先loadLogs会加载 ...

  8. Apache Kafka源码分析 - ReplicaStateMachine

    startup 在onControllerFailover中被调用, /** * Invoked on successful controller election. First registers ...

  9. Apache Kafka源码分析 - PartitionStateMachine

    startup 在onControllerFailover中被调用, initializePartitionState private def initializePartitionState() { ...

随机推荐

  1. sphider 丁廷臣简体中文完美汉化版带蜘蛛搜索引擎程序 v1.3.4

    sphider 丁廷臣简体中文完美汉化版带蜘蛛搜索引擎程序 v1.3.4是最官方的新版,免费开源,用官方最新发布原版汉化.未更改任何内核文件. Sphider 是一个完美的带有蜘蛛的搜索引擎程序. S ...

  2. Java连接postgresql数据库

    1.下载驱动jar下载地址:https://jdbc.postgresql.org/download.html 2.导入jar包新建lib文件夹,将下载的jar驱动包拖到文件夹中.将jar驱动包添加到 ...

  3. sass 的使用心得

    //定义颜色 $c55:#; $c22:#; $c33:#; $c99:#; $c77:#; $c00:#; $cff:#fff; $caa:#aaa; $ccc:#ccc; $cf0:#f0f0f0 ...

  4. ASP.NET基础(一)

    ExecuteNonQuery()的用法 下面我们将详细讲解如何在Page_Load()中对数据库的增加.删除.修改,最后我们再来总结一下ExecuteNonQuery(),ExecuteScalar ...

  5. Entity Framework(三):使用特性(数据注解)创建表结构

    一.理解Code First及其约定和配置 传统设计应用的方式都是由下而上的,即我们习惯优先考虑数据库,然后使用这个以数据为中心的方法在数据之上构建应用程序.这种方法非常适合于数据密集的应用或者数据库 ...

  6. 001Maven_基本介绍及安装

    Maven入门教程 介绍.环境配置 Maven介绍 Maven是一个采用纯Java编写的开源项目管理工具, Maven采用了一种被称之为Project Object Model (POM)概念来管理项 ...

  7. Unity5 AssetBundle打包加载及服务器加载

    Assetbundle为资源包不是资源 打包1:通过脚本指定打包 AssetBundleBuild ab = new AssetBundleBuild                         ...

  8. Unity5.X 新版AssetBundle打包控制

    一.什么是AssetBundle 估计很多人只知道Unity的模型之类的东西可以导出成一种叫做AssetBundle的文件,然后打包后可以在Unity程序运行的时候再加载出来用.那么AssetBund ...

  9. c#方法生成mysql if方法(算工作日)

    public static string retunSQl(string s,string e){ return @"IF ( "+s+ ">" +e+ ...

  10. sublime text 2自定义代码片段

    本文引用   http://www.blogjava.net/Hafeyang/archive/2012/08/17/how_to_create_code_snippet_in_subline_tex ...