1. executor上执行launchTask

   def launchTask(
context: ExecutorBackend, taskId: Long, taskName: String, serializedTask: ByteBuffer) {
val tr = new TaskRunner(context, taskId, taskName, serializedTask)
runningTasks.put(taskId, tr)
threadPool.execute(tr)
}

2. executor上执行TaskRunner的run

  class TaskRunner(
execBackend: ExecutorBackend, val taskId: Long, taskName: String, serializedTask: ByteBuffer)
extends Runnable { @volatile private var killed = false
@volatile var task: Task[Any] = _
@volatile var attemptedTask: Option[Task[Any]] = None def kill(interruptThread: Boolean) {
logInfo(s"Executor is trying to kill $taskName (TID $taskId)")
killed = true
if (task != null) {
task.kill(interruptThread)
}
} override def run() {
val startTime = System.currentTimeMillis()
SparkEnv.set(env)
Thread.currentThread.setContextClassLoader(replClassLoader)
val ser = SparkEnv.get.closureSerializer.newInstance()
logInfo(s"Running $taskName (TID $taskId)")
execBackend.statusUpdate(taskId, TaskState.RUNNING, EMPTY_BYTE_BUFFER)
var taskStart: Long = 0
def gcTime = ManagementFactory.getGarbageCollectorMXBeans.map(_.getCollectionTime).sum
val startGCTime = gcTime try {
SparkEnv.set(env)
Accumulators.clear()
val (taskFiles, taskJars, taskBytes) = Task.deserializeWithDependencies(serializedTask) //反序列化出 taskFiles,taskJars,taskBytes
updateDependencies(taskFiles, taskJars)
task = ser.deserialize[Task[Any]](taskBytes, Thread.currentThread.getContextClassLoader) //反序列化出task对象 // If this task has been killed before we deserialized it, let's quit now. Otherwise,
// continue executing the task.
if (killed) {
// Throw an exception rather than returning, because returning within a try{} block
// causes a NonLocalReturnControl exception to be thrown. The NonLocalReturnControl
// exception will be caught by the catch block, leading to an incorrect ExceptionFailure
// for the task.
throw new TaskKilledException
} attemptedTask = Some(task)
logDebug("Task " + taskId + "'s epoch is " + task.epoch)
env.mapOutputTracker.updateEpoch(task.epoch) 49 // Run the actual task and measure its runtime.
50 taskStart = System.currentTimeMillis()
51 val value = task.run(taskId.toInt)
52 val taskFinish = System.currentTimeMillis() // If the task has been killed, let's fail it.
if (task.killed) {
throw new TaskKilledException
}

3. task.run

 private[spark] abstract class Task[T](val stageId: Int, var partitionId: Int) extends Serializable {

   final def run(attemptId: Long): T = {
context = new TaskContext(stageId, partitionId, attemptId, runningLocally = false)
context.taskMetrics.hostname = Utils.localHostName()
taskThread = Thread.currentThread()
if (_killed) {
kill(interruptThread = false)
}
10 runTask(context)
}

4. task是抽象类,对于具体的类(resultTask和shuffleMapTask)会执行相应的runTask。

a. resultTask

   override def runTask(context: TaskContext): U = {
2 // Deserialize the RDD and the func using the broadcast variables.
val ser = SparkEnv.get.closureSerializer.newInstance()
val (rdd, func) = ser.deserialize[(RDD[T], (TaskContext, Iterator[T]) => U)](
ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) metrics = Some(context.taskMetrics)
try {
func(context, rdd.iterator(partition, context))
} finally {
context.markTaskCompleted()
}
}

b. shuffleMapTask

   override def runTask(context: TaskContext): MapStatus = {
// Deserialize the RDD using the broadcast variable.
val ser = SparkEnv.get.closureSerializer.newInstance()
val (rdd, dep) = ser.deserialize[(RDD[_], ShuffleDependency[_, _, _])](
ByteBuffer.wrap(taskBinary.value), Thread.currentThread.getContextClassLoader) metrics = Some(context.taskMetrics)
var writer: ShuffleWriter[Any, Any] = null
try {
val manager = SparkEnv.get.shuffleManager
writer = manager.getWriter[Any, Any](dep.shuffleHandle, partitionId, context)
12 writer.write(rdd.iterator(partition, context).asInstanceOf[Iterator[_ <: Product2[Any, Any]]])
return writer.stop(success = true).get
} catch {
case e: Exception =>
if (writer != null) {
writer.stop(success = false)
}
throw e
} finally {
context.markTaskCompleted()
}
}
   /** Write a bunch of records to this task's output */
override def write(records: Iterator[_ <: Product2[K, V]]): Unit = {
val iter = if (dep.aggregator.isDefined) {
if (dep.mapSideCombine) {
dep.aggregator.get.combineValuesByKey(records, context)
} else {
records
}
} else if (dep.aggregator.isEmpty && dep.mapSideCombine) {
throw new IllegalStateException("Aggregator is empty for map-side combine")
} else {
records
} for (elem <- iter) {
val bucketId = dep.partitioner.getPartition(elem._1)
shuffle.writers(bucketId).write(elem)
}
}
 1   /**
2 * Get a ShuffleWriterGroup for the given map task, which will register it as complete
3 * when the writers are closed successfully
4 */
def forMapTask(shuffleId: Int, mapId: Int, numBuckets: Int, serializer: Serializer,
writeMetrics: ShuffleWriteMetrics) = {
new ShuffleWriterGroup {
shuffleStates.putIfAbsent(shuffleId, new ShuffleState(numBuckets))
private val shuffleState = shuffleStates(shuffleId)
private var fileGroup: ShuffleFileGroup = null val writers: Array[BlockObjectWriter] = if (consolidateShuffleFiles) {
fileGroup = getUnusedFileGroup()
Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
blockManager.getDiskWriter(blockId, fileGroup(bucketId), serializer, bufferSize,
writeMetrics)
}
} else {
Array.tabulate[BlockObjectWriter](numBuckets) { bucketId =>
val blockId = ShuffleBlockId(shuffleId, mapId, bucketId)
val blockFile = blockManager.diskBlockManager.getFile(blockId)
// Because of previous failures, the shuffle file may already exist on this machine.
// If so, remove it.
if (blockFile.exists) {
if (blockFile.delete()) {
logInfo(s"Removed existing shuffle file $blockFile")
} else {
logWarning(s"Failed to remove existing shuffle file $blockFile")
}
}
blockManager.getDiskWriter(blockId, blockFile, serializer, bufferSize, writeMetrics)
}
}

spark1.1.0源码阅读-executor的更多相关文章

  1. spark1.1.0源码阅读-taskScheduler

    1. sparkContext中设置createTaskScheduler case "yarn-standalone" | "yarn-cluster" =& ...

  2. spark1.1.0源码阅读-dagscheduler and stage

    1. rdd action ->sparkContext.runJob->dagscheduler.runJob def runJob[T, U: ClassTag]( rdd: RDD[ ...

  3. Yii2.0源码阅读-一次请求的完整过程

    Yii2.0框架源码阅读,从请求发起,到结束的运行步骤 其实最初阅读是从yii\web\UrlManager这个类开始看起,不断的寻找这个类中方法的调用者,最终回到了yii\web\Applicati ...

  4. Vue2.0源码阅读笔记(四):nextTick

      在阅读 nextTick 的源码之前,要先弄明白 JS 执行环境运行机制,介绍 JS 执行环境的事件循环机制的文章很多,大部分都阐述的比较笼统,甚至有些文章说的是错误的,以下为个人理解,如有错误, ...

  5. Vue2.0源码阅读笔记--生命周期

    一.Vue2.0的生命周期 Vue2.0的整个生命周期有八个:分别是 1.beforeCreate,2.created,3.beforeMount,4.mounted,5.beforeUpdate,6 ...

  6. Vue2.0源码阅读笔记--双向绑定实现原理

    上一篇 文章 了解了Vue.js的生命周期.这篇分析Observe Data过程,了解Vue.js的双向数据绑定实现原理. 一.实现双向绑定的做法 前端MVVM最令人激动的就是双向绑定机制了,实现双向 ...

  7. Yii2.0源码阅读-从路由到控制器

    之前的文章弄清了一次请求的开始到结束.主要讲了Yii Applicaton实例的创建.初始化,UrlManager如何返回Yii中的路由信息,到runAction,最后将Response发送给客户端. ...

  8. Yii2.0源码阅读-视图(View)渲染过程

    之前的文章我们根据源码的分析,弄清了Yii如何处理一次请求,以及根据解析的路由如何调用控制器中的action,那接下来好奇的可能就是,我在控制器action中执行了return $this->r ...

  9. Vue2.0源码阅读笔记(二):响应式原理

      Vue是数据驱动的框架,在修改数据时,视图会进行更新.数据响应式系统使得状态管理变的简单直接,在开发过程中减少与DOM元素的接触.而深入学习其中的原理十分有必要,能够回避一些常见的问题,使开发变的 ...

随机推荐

  1. 使用 Docker 容器应该避免的 10 个事情

    当你最后投入容器的怀抱,发现它能解决很多问题,而且还具有众多的优点: 第一:它是不可变的 – 操作系统,库版本,配置,文件夹和应用都是一样的.您可以使用通过相同QA测试的镜像,使产品具有相同的表现. ...

  2. java中集合杂记

    HashSet类按照哈希算法来存取集合中的对象,具有很有的性能.当HashSet向集合中加入一个对象时,会调用对象的hashCode()方法获得哈希码,然后根据这个哈希码进一步计算出对象在集合中的存放 ...

  3. ANT打包 小实例

    ANT打包 package src; public class Hello { /**   * @param args   */  public static void main(String[] a ...

  4. 区分width()、css('width')、innerWidth()

    #widthTest1 { width: 200px; height: 200px; background-color: #00CCFF; -webkit-box-sizing: border-box ...

  5. EMV/PBOC解析(三) TLV格式解析(C#)

    1.什么是TLV格式? TLV即Tag-Length-Value,常在IC卡与POS终端设备中通过这样的一个应用通信协议进行数据交换. 金融系统中的TLV是BER-TLV编码的一个特例编码规范,而BE ...

  6. openssl生成自签名证书

    1.生成x509格式的CA自签名证书 openssl req -new -x509 -keyout ca.key -out ca.crt 2.生成服务端的私钥(key文件)及申请证书文件csr文件 o ...

  7. [转] PostgreSQL的时间/日期函数使用

    PS:http://blog.csdn.net/love_rongrong/article/details/6712883 字符串模糊比较 日期类型的模糊查询是不能直接进行的,要先转换成字符串然后再查 ...

  8. Chapter 7. Dependency Management Basics 依赖管理基础

    This chapter introduces some of the basics of dependency management in Gradle. 7.1. What is dependen ...

  9. 用户 'IIS APPPOOL\ExportExcel' 登录失败。

    解决了前两个错误,在成功打开项目后,在访问数据库又越到如下错误 “/”应用程序中的服务器错误. 用户 'IIS APPPOOL\ExportExcel' 登录失败. 说明: 执行当前 Web 请求期间 ...

  10. YII框架中php入口文件隐藏

    Apache配置修改 主要修改下httpd文件中的两个地方 1.启用mod_rewrite.so模块,在Apache的配置文件中找到如下行,去掉前面的字符"#",保存 #LoadM ...