个人觉得akka提供的cluster工具中,sharding是最吸引人的。当我们需要把actor分布在不同的节点上时,Cluster sharding非常有用。我们可以使用actor的逻辑标识符与actor进行通信,而不用关心其物理位置。简单来说就是把actor的actorPath或actorRef进一步抽象,用一个字符串表示。









class Counter extends PersistentActor {
import ShardRegion.Passivate context.setReceiveTimeout(120.seconds) // self.path.name is the entity identifier (utf-8 URL-encoded)
override def persistenceId: String = "Counter-" + self.path.name var count = 0 def updateState(event: CounterChanged): Unit =
count += event.delta override def receiveRecover: Receive = {
case evt: CounterChanged ⇒ updateState(evt)
} override def receiveCommand: Receive = {
case Increment ⇒ persist(CounterChanged(+1))(updateState)
case Decrement ⇒ persist(CounterChanged(-1))(updateState)
case Get(_) ⇒ sender() ! count
case ReceiveTimeout ⇒ context.parent ! Passivate(stopMessage = Stop)
case Stop ⇒ context.stop(self)


    val counterRegion: ActorRef = ClusterSharding(system).start(
typeName = "Counter",
entityProps = Props[Counter],
settings = ClusterShardingSettings(system),
extractEntityId = extractEntityId,
extractShardId = extractShardId)


    val extractEntityId: ShardRegion.ExtractEntityId = {
case EntityEnvelope(id, payload) ⇒ (id.toString, payload)
case msg @ Get(id) ⇒ (id.toString, msg)
} val numberOfShards = 100 val extractShardId: ShardRegion.ExtractShardId = {
case EntityEnvelope(id, _) ⇒ (id % numberOfShards).toString
case Get(id) ⇒ (id % numberOfShards).toString
case ShardRegion.StartEntity(id) ⇒
// StartEntity is used by remembering entities feature
(id.toLong % numberOfShards).toString


* Interface of the partial function used by the [[ShardRegion]] to
* extract the entity id and the message to send to the entity from an
* incoming message. The implementation is application specific.
* If the partial function does not match the message will be
* `unhandled`, i.e. posted as `Unhandled` messages on the event stream.
* Note that the extracted message does not have to be the same as the incoming
* message to support wrapping in message envelope that is unwrapped before
* sending to the entity actor.
type ExtractEntityId = PartialFunction[Msg, (EntityId, Msg)]

  ExtractEntityId是一个片函数,它用来从消息中提取实体ID和消息。如果这个片函数无法处理消息,那么就会把这个未处理的消息发送给event stream。注意,输入的消息和输出的消息不一定是同类型的。

* Interface of the function used by the [[ShardRegion]] to
* extract the shard id from an incoming message.
* Only messages that passed the [[ExtractEntityId]] will be used
* as input to this function.
type ExtractShardId = Msg ⇒ ShardId





* Scala API: Register a named entity type by defining the [[akka.actor.Props]] of the entity actor
* and functions to extract entity and shard identifier from messages. The [[ShardRegion]] actor
* for this type can later be retrieved with the [[shardRegion]] method.
* This method will start a [[ShardRegion]] in proxy mode in case if there is no match between the roles of
* the current cluster node and the role specified in [[ClusterShardingSettings]] passed to this method.
* Some settings can be configured as described in the `akka.cluster.sharding` section
* of the `reference.conf`.
* @return the actor ref of the [[ShardRegion]] that is to be responsible for the shard
def start(
typeName: String,
entityProps: Props,
settings: ClusterShardingSettings,
extractEntityId: ShardRegion.ExtractEntityId,
extractShardId: ShardRegion.ExtractShardId,
allocationStrategy: ShardAllocationStrategy,
handOffStopMessage: Any): ActorRef = { internalStart(typeName, _ ⇒ entityProps, settings, extractEntityId, extractShardId, allocationStrategy, handOffStopMessage)


@InternalApi private[akka] def internalStart(
typeName: String,
entityProps: String ⇒ Props,
settings: ClusterShardingSettings,
extractEntityId: ShardRegion.ExtractEntityId,
extractShardId: ShardRegion.ExtractShardId,
allocationStrategy: ShardAllocationStrategy,
handOffStopMessage: Any): ActorRef = { if (settings.shouldHostShard(cluster)) { implicit val timeout = system.settings.CreationTimeout
val startMsg = Start(typeName, entityProps, settings,
extractEntityId, extractShardId, allocationStrategy, handOffStopMessage)
val Started(shardRegion) = Await.result(guardian ? startMsg, timeout.duration)
regions.put(typeName, shardRegion)
} else {
log.debug("Starting Shard Region Proxy [{}] (no actors will be hosted on this node)...", typeName) startProxy(
dataCenter = None, // startProxy method must be used directly to start a proxy for another DC


 private lazy val guardian: ActorRef = {
val guardianName: String =
val dispatcher = system.settings.config
.getString("akka.cluster.sharding.use-dispatcher") match {
case "" ⇒ Dispatchers.DefaultDispatcherId
case id ⇒ id
system.systemActorOf(Props[ClusterShardingGuardian].withDispatcher(dispatcher), guardianName)



val rep = replicator(settings)
val encName = URLEncoder.encode(typeName, ByteString.UTF_8)
val cName = coordinatorSingletonManagerName(encName)
val cPath = coordinatorPath(encName)
val shardRegion = context.child(encName).getOrElse {
if (context.child(cName).isEmpty) {
val coordinatorProps =
if (settings.stateStoreMode == ClusterShardingSettings.StateStoreModePersistence)
ShardCoordinator.props(typeName, settings, allocationStrategy)
ShardCoordinator.props(typeName, settings, allocationStrategy, rep, majorityMinCap)
val singletonProps = BackoffSupervisor.props(
childProps = coordinatorProps,
childName = "coordinator",
minBackoff = coordinatorFailureBackoff,
maxBackoff = coordinatorFailureBackoff * 5,
randomFactor = 0.2)
val singletonSettings = settings.coordinatorSingletonSettings
ClusterSingletonManager.props(singletonProps, terminationMessage = PoisonPill, singletonSettings)
name = cName)
} context.actorOf(
typeName = typeName,
entityProps = entityProps,
settings = settings,
coordinatorPath = cPath,
extractEntityId = extractEntityId,
extractShardId = extractShardId,
handOffStopMessage = handOffStopMessage,
replicator = rep,
name = encName
sender() ! Started(shardRegion)



* This actor creates children entity actors on demand for the shards that it is told to be
* responsible for. It delegates messages targeted to other shards to the responsible
* `ShardRegion` actor on other nodes.
* @see [[ClusterSharding$ ClusterSharding extension]]
private[akka] class ShardRegion(
typeName: String,
entityProps: Option[String ⇒ Props],
dataCenter: Option[DataCenter],
settings: ClusterShardingSettings,
coordinatorPath: String,
extractEntityId: ShardRegion.ExtractEntityId,
extractShardId: ShardRegion.ExtractShardId,
handOffStopMessage: Any,
replicator: ActorRef,
majorityMinCap: Int) extends Actor with ActorLogging


 def receive: Receive = {
case Terminated(ref) ⇒ receiveTerminated(ref)
case ShardInitialized(shardId) ⇒ initializeShard(shardId, sender())
case evt: ClusterDomainEvent ⇒ receiveClusterEvent(evt)
case state: CurrentClusterState ⇒ receiveClusterState(state)
case msg: CoordinatorMessage ⇒ receiveCoordinatorMessage(msg)
case cmd: ShardRegionCommand ⇒ receiveCommand(cmd)
case query: ShardRegionQuery ⇒ receiveQuery(query)
case msg: RestartShard ⇒ deliverMessage(msg, sender())
case msg: StartEntity ⇒ deliverStartEntity(msg, sender())
case msg if extractEntityId.isDefinedAt(msg) ⇒ deliverMessage(msg, sender())
case unknownMsg ⇒ log.warning("Message does not have an extractor defined in shard [{}] so it was ignored: {}", typeName, unknownMsg)


def deliverMessage(msg: Any, snd: ActorRef): Unit =
msg match {
case RestartShard(shardId) ⇒
regionByShard.get(shardId) match {
case Some(ref) ⇒
if (ref == self)
case None ⇒
if (!shardBuffers.contains(shardId)) {
log.debug("Request shard [{}] home. Coordinator [{}]", shardId, coordinator)
coordinator.foreach(_ ! GetShardHome(shardId))
val buf = shardBuffers.getOrEmpty(shardId)
log.debug("Buffer message for shard [{}]. Total [{}] buffered messages.", shardId, buf.size + 1)
shardBuffers.append(shardId, msg, snd)
} case _ ⇒
val shardId = extractShardId(msg)
regionByShard.get(shardId) match {
case Some(ref) if ref == self ⇒
getShard(shardId) match {
case Some(shard) ⇒
if (shardBuffers.contains(shardId)) {
// Since now messages to a shard is buffered then those messages must be in right order
bufferMessage(shardId, msg, snd)
deliverBufferedMessages(shardId, shard)
} else shard.tell(msg, snd)
case None ⇒ bufferMessage(shardId, msg, snd)
case Some(ref) ⇒
log.debug("Forwarding request for shard [{}] to [{}]", shardId, ref)
ref.tell(msg, snd)
case None if shardId == null || shardId == "" ⇒
log.warning("Shard must not be empty, dropping message [{}]", msg.getClass.getName)
context.system.deadLetters ! msg
case None ⇒
if (!shardBuffers.contains(shardId)) {
log.debug("Request shard [{}] home. Coordinator [{}]", shardId, coordinator)
coordinator.foreach(_ ! GetShardHome(shardId))
bufferMessage(shardId, msg, snd)




case GetShardHome(shard) ⇒
if (!handleGetShardHome(shard)) {
// location not know, yet
val activeRegions = state.regions -- gracefulShutdownInProgress
if (activeRegions.nonEmpty) {
val getShardHomeSender = sender()
val regionFuture = allocationStrategy.allocateShard(getShardHomeSender, shard, activeRegions)
regionFuture.value match {
case Some(Success(region)) ⇒
continueGetShardHome(shard, region, getShardHomeSender)
case _ ⇒
// continue when future is completed
regionFuture.map { region ⇒
AllocateShardResult(shard, Some(region), getShardHomeSender)
}.recover {
case _ ⇒ AllocateShardResult(shard, None, getShardHomeSender)


def changeMembers(newMembers: immutable.SortedSet[Member]): Unit = {
val before = membersByAge.headOption
val after = newMembers.headOption
membersByAge = newMembers
if (before != after) {
if (log.isDebugEnabled)
log.debug("Coordinator moved from [{}] to [{}]", before.map(_.address).getOrElse(""), after.map(_.address).getOrElse(""))
coordinator = None


def register(): Unit = {
coordinatorSelection.foreach(_ ! registrationMessage)
if (shardBuffers.nonEmpty && retryCount >= 5) coordinatorSelection match {
case Some(actorSelection) ⇒
val coordinatorMessage =
if (cluster.state.unreachable(membersByAge.head)) s"Coordinator [${membersByAge.head}] is unreachable."
else s"Coordinator [${membersByAge.head}] is reachable."
"Trying to register to coordinator at [{}], but no acknowledgement. Total [{}] buffered messages. [{}]",
actorSelection, shardBuffers.totalSize, coordinatorMessage
case None ⇒ log.warning(
"No coordinator found to register. Probably, no seed-nodes configured and manual cluster join not performed? Total [{}] buffered messages.",


  def registrationMessage: Any =
if (entityProps.isDefined) Register(self) else RegisterProxy(self)


case Register(region) ⇒
if (isMember(region)) {
log.debug("ShardRegion registered: [{}]", region)
aliveRegions += region
if (state.regions.contains(region)) {
region ! RegisterAck(self)
} else {
gracefulShutdownInProgress -= region
update(ShardRegionRegistered(region)) { evt ⇒
state = state.updated(evt)
region ! RegisterAck(self)
} else {
log.debug("ShardRegion {} was not registered since the coordinator currently does not know about a node of that region", region)


 case RegisterAck(coord) ⇒
coordinator = Some(coord)


  我们回到GetShardHome处理的代码块,其实它最终调用了allocationStrategy.allocateShard(getShardHomeSender, shard, activeRegions)创建了Shard。

* Invoked when the location of a new shard is to be decided.
* @param requester actor reference to the [[ShardRegion]] that requested the location of the
* shard, can be returned if preference should be given to the node where the shard was first accessed
* @param shardId the id of the shard to allocate
* @param currentShardAllocations all actor refs to `ShardRegion` and their current allocated shards,
* in the order they were allocated
* @return a `Future` of the actor ref of the [[ShardRegion]] that is to be responsible for the shard, must be one of
* the references included in the `currentShardAllocations` parameter
def allocateShard(requester: ActorRef, shardId: ShardId,
currentShardAllocations: Map[ActorRef, immutable.IndexedSeq[ShardId]]): Future[ActorRef]


* The default implementation of [[ShardCoordinator.LeastShardAllocationStrategy]]
* allocates new shards to the `ShardRegion` with least number of previously allocated shards.
* It picks shards for rebalancing handoff from the `ShardRegion` with most number of previously allocated shards.
* They will then be allocated to the `ShardRegion` with least number of previously allocated shards,
* i.e. new members in the cluster. There is a configurable threshold of how large the difference
* must be to begin the rebalancing. The number of ongoing rebalancing processes can be limited.
class LeastShardAllocationStrategy(rebalanceThreshold: Int, maxSimultaneousRebalance: Int)
extends ShardAllocationStrategy with Serializable
override def allocateShard(requester: ActorRef, shardId: ShardId,
currentShardAllocations: Map[ActorRef, immutable.IndexedSeq[ShardId]]): Future[ActorRef] = {
val (regionWithLeastShards, _) = currentShardAllocations.minBy { case (_, v) ⇒ v.size }


def continueGetShardHome(shard: ShardId, region: ActorRef, getShardHomeSender: ActorRef): Unit =
if (rebalanceInProgress.contains(shard)) {
deferGetShardHomeRequest(shard, getShardHomeSender)
} else {
state.shards.get(shard) match {
case Some(ref) ⇒ getShardHomeSender ! ShardHome(shard, ref)
case None ⇒
if (state.regions.contains(region) && !gracefulShutdownInProgress.contains(region)) {
update(ShardHomeAllocated(shard, region)) { evt ⇒
state = state.updated(evt)
log.debug("Shard [{}] allocated at [{}]", evt.shard, evt.region) sendHostShardMsg(evt.shard, evt.region)
getShardHomeSender ! ShardHome(evt.shard, evt.region)
} else
"Allocated region {} for shard [{}] is not (any longer) one of the registered regions: {}",
region, shard, state)


def sendHostShardMsg(shard: ShardId, region: ActorRef): Unit = {
region ! HostShard(shard)
val cancel = context.system.scheduler.scheduleOnce(shardStartTimeout, self, ResendShardHost(shard, region))
unAckedHostShards = unAckedHostShards.updated(shard, cancel)





def receiveCommand: Receive = {
case Terminated(ref) ⇒ receiveTerminated(ref)
case msg: CoordinatorMessage ⇒ receiveCoordinatorMessage(msg)
case msg: ShardCommand ⇒ receiveShardCommand(msg)
case msg: ShardRegion.StartEntity ⇒ receiveStartEntity(msg)
case msg: ShardRegion.StartEntityAck ⇒ receiveStartEntityAck(msg)
case msg: ShardRegionCommand ⇒ receiveShardRegionCommand(msg)
case msg: ShardQuery ⇒ receiveShardQuery(msg)
case msg if extractEntityId.isDefinedAt(msg) ⇒ deliverMessage(msg, sender())


def deliverMessage(msg: Any, snd: ActorRef): Unit = {
val (id, payload) = extractEntityId(msg)
if (id == null || id == "") {
log.warning("Id must not be empty, dropping message [{}]", msg.getClass.getName)
context.system.deadLetters ! msg
} else if (payload.isInstanceOf[ShardRegion.StartEntity]) {
// in case it was wrapped, used in Typed
} else {
messageBuffers.contains(id) match {
case false ⇒ deliverTo(id, msg, payload, snd) case true if messageBuffers.totalSize >= bufferSize ⇒
log.debug("Buffer is full, dropping message for entity [{}]", id)
context.system.deadLetters ! msg case true ⇒
log.debug("Message for entity [{}] buffered", id)
messageBuffers.append(id, msg, snd)


def deliverTo(id: EntityId, msg: Any, payload: Msg, snd: ActorRef): Unit = {
val name = URLEncoder.encode(id, "utf-8")
context.child(name) match {
case Some(actor) ⇒ actor.tell(payload, snd)
case None ⇒ getEntity(id).tell(payload, snd)


def getEntity(id: EntityId): ActorRef = {
val name = URLEncoder.encode(id, "utf-8")
context.child(name).getOrElse {
log.debug("Starting entity [{}] in shard [{}]", id, shardId) val a = context.watch(context.actorOf(entityProps(id), name))
idByRef = idByRef.updated(a, id)
refById = refById.updated(id, a)
state = state.copy(state.entities + id)



