小记--------spark的worker原理分析及源码分析
/**
*启动driver的源码分析
*/
case LaunchDriver(driverId, driverDesc) =>
logInfo(s"Asked to launch driver $driverId") //创建DriverRunner线程
val driver = new DriverRunner(
conf,
driverId,
workDir,
sparkHome,
driverDesc.copy(command = Worker.maybeUpdateSSLSettings(driverDesc.command, conf)),
self,
workerUri,
securityMgr) //把DriverRunner线程加入Drivers的hashset中
drivers(driverId) = driver //启动driver
driver.start() //详细代码见:代码1 coresUsed += driverDesc.cores
memoryUsed += driverDesc.mem 代码1
/** Starts a thread to run and manage the driver. */
private[worker] def start() = { //DriverRunner机制分析
//启动一个java线程
new Thread("DriverRunner for " + driverId) {
override def run() {
var shutdownHook: AnyRef = null
try {
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
logInfo(s"Worker shutting down, killing driver $driverId")
kill()
} // prepare driver jars and run driver
// 在此处进行第一步:创建DriverRunner的工作目录
// 第二步,下载用户上传的jar(我们编写完的spark应用程序,如果是java,用maven打个jar包,如果是scala,那么会用export将它导出为jar包)
//第三步 构建ProcessBuilder
val exitCode = prepareAndRunDriver()//详细代码见:代码2 // set final state depending on if forcibly killed and process exit code
// 对driver的退出状态做一些处理
finalState = if (exitCode == ) {
Some(DriverState.FINISHED)
} else if (killed) {
Some(DriverState.KILLED)
} else {
Some(DriverState.FAILED)
}
} catch {
case e: Exception =>
kill()
finalState = Some(DriverState.ERROR)
finalException = Some(e)
} finally {
if (shutdownHook != null) {
ShutdownHookManager.removeShutdownHook(shutdownHook)
}
} // notify worker of final driver state, possible exception
// 这个DriverRunner这个线程,向它所属的worker的actor,发送一个DriverStateChanged的事件
worker.send(DriverStateChanged(driverId, finalState.get, finalException))//详细代码见:代码3
}
}.start()
} 代码2
private[worker] def prepareAndRunDriver(): Int = {
val driverDir = createWorkingDirectory()//创建DriverRunner的工作目录
val localJarFilename = downloadUserJar(driverDir)//第二步,下载用户上传的jar def substituteVariables(argument: String): String = argument match {
case "{{WORKER_URL}}" => workerUrl
case "{{USER_JAR}}" => localJarFilename
case other => other
} // TODO: If we add ability to submit multiple jars they should also be added here // 构建ProcessBuilder
// 传入了driver的启动命令,需要的内存大小等信息
val builder = CommandUtils.buildProcessBuilder(driverDesc.command, securityManager,
driverDesc.mem, sparkHome.getAbsolutePath, substituteVariables) runDriver(builder, driverDir, driverDesc.supervise)
} 代码3
//driver执行完以后,driverrunner线程会发送一个状态给worker
//然后worker实际上会将DriverStateChanged消息发送给Master
case driverStateChanged @ DriverStateChanged(driverId, state, exception) =>
handleDriverStateChanged(driverStateChanged)//详细代码见:代码4 代码4
private[worker] def handleDriverStateChanged(driverStateChanged: DriverStateChanged): Unit = {
val driverId = driverStateChanged.driverId
val exception = driverStateChanged.exception
val state = driverStateChanged.state
state match {
case DriverState.ERROR =>
logWarning(s"Driver $driverId failed with unrecoverable exception: ${exception.get}")
case DriverState.FAILED =>
logWarning(s"Driver $driverId exited with failure")
case DriverState.FINISHED =>
logInfo(s"Driver $driverId exited successfully")
case DriverState.KILLED =>
logInfo(s"Driver $driverId was killed by user")
case _ =>
logDebug(s"Driver $driverId changed state to $state")
} //worker把DriverStateChanged消息发送给Master
// Master会对状态进行修改
sendToMaster(driverStateChanged) //将driver从本地缓存中移除
val driver = drivers.remove(driverId).get //将driver加入完成driver的队列
finishedDrivers(driverId) = driver
trimFinishedDriversIfNecessary() //将driver的内存和CPU进行释放
memoryUsed -= driver.driverDesc.mem
coresUsed -= driver.driverDesc.cores
}
/**
*启动Executor的源码分析
*/
case LaunchExecutor(masterUrl, appId, execId, appDesc, cores_, memory_) =>
if (masterUrl != activeMasterUrl) {
logWarning("Invalid Master (" + masterUrl + ") attempted to launch executor.")
} else {
try {
logInfo("Asked to launch executor %s/%d for %s".format(appId, execId, appDesc.name)) // Create the executor's working directory
// 创建executor本地工作目录
val executorDir = new File(workDir, appId + "/" + execId)
if (!executorDir.mkdirs()) {
throw new IOException("Failed to create directory " + executorDir)
} // Create local dirs for the executor. These are passed to the executor via the
// SPARK_EXECUTOR_DIRS environment variable, and deleted by the Worker when the
// application finishes.
val appLocalDirs = appDirectories.getOrElse(appId,
Utils.getOrCreateLocalRootDirs(conf).map { dir =>
val appDir = Utils.createDirectory(dir, namePrefix = "executor")
Utils.chmod700(appDir)
appDir.getAbsolutePath()
}.toSeq)
appDirectories(appId) = appLocalDirs //创建ExecutorRunner
val manager = new ExecutorRunner(
appId,
execId,
appDesc.copy(command = Worker.maybeUpdateSSLSettings(appDesc.command, conf)),
cores_,
memory_,
self,
workerId,
host,
webUi.boundPort,
publicAddress,
sparkHome,
executorDir,
workerUri,
conf,
appLocalDirs, ExecutorState.RUNNING) //把executorRunner加入本地缓存
executors(appId + "/" + execId) = manager //启动ExecutorRunner
manager.start()//详细代码:见代码5 //加上Executor需要使用的CPU 内存的资源
coresUsed += cores_
memoryUsed += memory_ //向master返回一个ExecutorStateChanged事件,用于master修改状态
sendToMaster(ExecutorStateChanged(appId, execId, manager.state, None, None))
} catch {
case e: Exception =>
logError(s"Failed to launch executor $appId/$execId for ${appDesc.name}.", e)
if (executors.contains(appId + "/" + execId)) {
executors(appId + "/" + execId).kill()
executors -= appId + "/" + execId
}
sendToMaster(ExecutorStateChanged(appId, execId, ExecutorState.FAILED,
Some(e.toString), None))
}
} 代码5
private[worker] def start() { //创建一个java线程
workerThread = new Thread("ExecutorRunner for " + fullId) {
override def run() { fetchAndRunExecutor() }//详细代码见代码6
}
workerThread.start()
// Shutdown hook that kills actors on shutdown.
shutdownHook = ShutdownHookManager.addShutdownHook { () =>
// It's possible that we arrive here before calling `fetchAndRunExecutor`, then `state` will
// be `ExecutorState.RUNNING`. In this case, we should set `state` to `FAILED`.
if (state == ExecutorState.RUNNING) {
state = ExecutorState.FAILED
}
killProcess(Some("Worker shutting down")) }
} 代码6
/**
* Download and run the executor described in our ApplicationDescription
*/
private def fetchAndRunExecutor() {
try {
// Launch the process //封装一个ProcessBuilder
val builder = CommandUtils.buildProcessBuilder(appDesc.command, new SecurityManager(conf),
memory, sparkHome.getAbsolutePath, substituteVariables)
val command = builder.command()
val formattedCommand = command.asScala.mkString("\"", "\" \"", "\"")
logInfo(s"Launch command: $formattedCommand") builder.directory(executorDir)
builder.environment.put("SPARK_EXECUTOR_DIRS", appLocalDirs.mkString(File.pathSeparator))
// In case we are running this from within the Spark Shell, avoid creating a "scala"
// parent process for the executor command
builder.environment.put("SPARK_LAUNCH_WITH_SCALA", "") // Add webUI log urls
val baseUrl =
if (conf.getBoolean("spark.ui.reverseProxy", false)) {
s"/proxy/$workerId/logPage/?appId=$appId&executorId=$execId&logType="
} else {
s"http://$publicAddress:$webUiPort/logPage/?appId=$appId&executorId=$execId&logType="
}
builder.environment.put("SPARK_LOG_URL_STDERR", s"${baseUrl}stderr")
builder.environment.put("SPARK_LOG_URL_STDOUT", s"${baseUrl}stdout") process = builder.start() //重定向到输出流文件(将是stdout和stderr)
//将executor的InputStream和ErrorStream,输出的信息
//分贝重定向到本地工作目录的stdout文件,和stderr文件中
val header = "Spark Executor Command: %s\n%s\n\n".format(
formattedCommand, "=" * ) // Redirect its stdout and stderr to files
val stdout = new File(executorDir, "stdout")
stdoutAppender = FileAppender(process.getInputStream, stdout, conf) val stderr = new File(executorDir, "stderr")
Files.write(header, stderr, StandardCharsets.UTF_8)
stderrAppender = FileAppender(process.getErrorStream, stderr, conf) // Wait for it to exit; executor may exit with code 0 (when driver instructs it to shutdown)
// or with nonzero exit code
// 调用Proess的waitFor()方法,启动executor进程
val exitCode = process.waitFor() // executor执行完之后拿到返回值状态
state = ExecutorState.EXITED
val message = "Command exited with code " + exitCode //向ExecutorRunner线程所属的Worker actor,发送ExecutorStateChanged消息
worker.send(ExecutorStateChanged(appId, execId, state, Some(message), Some(exitCode)))//详细代码见:代码7
} catch {
case interrupted: InterruptedException =>
logInfo("Runner thread for executor " + fullId + " interrupted")
state = ExecutorState.KILLED
killProcess(None)
case e: Exception =>
logError("Error running executor", e)
state = ExecutorState.FAILED
killProcess(Some(e.toString))
}
} 代码7
//向master发送executorstatechanged事件
case executorStateChanged @ ExecutorStateChanged(appId, execId, state, message, exitStatus) =>
handleExecutorStateChanged(executorStateChanged)//详细代码见:代码8 代码8
private[worker] def handleExecutorStateChanged(executorStateChanged: ExecutorStateChanged):
Unit = { // 直接向master也发送一个executorstatechanged消息
sendToMaster(executorStateChanged)
val state = executorStateChanged.state // 如果executor状态是finished
if (ExecutorState.isFinished(state)) {
val appId = executorStateChanged.appId
val fullId = appId + "/" + executorStateChanged.execId
val message = executorStateChanged.message
val exitStatus = executorStateChanged.exitStatus
executors.get(fullId) match {
case Some(executor) =>
logInfo("Executor " + fullId + " finished with state " + state +
message.map(" message " + _).getOrElse("") +
exitStatus.map(" exitStatus " + _).getOrElse("")) // 将executor从内存中移除
executors -= fullId
finishedExecutors(fullId) = executor
trimFinishedExecutorsIfNecessary() // 释放executor占用的内存和CPU资源
coresUsed -= executor.cores
memoryUsed -= executor.memory
case None =>
logInfo("Unknown Executor " + fullId + " finished with state " + state +
message.map(" message " + _).getOrElse("") +
exitStatus.map(" exitStatus " + _).getOrElse(""))
}
maybeCleanupApplication(appId)
}
}
小记--------spark的worker原理分析及源码分析的更多相关文章
- 65、Spark Streaming:数据接收原理剖析与源码分析
一.数据接收原理 二.源码分析 入口包org.apache.spark.streaming.receiver下ReceiverSupervisorImpl类的onStart()方法 ### overr ...
- SpringMVC关于json、xml自动转换的原理研究[附带源码分析 --转
SpringMVC关于json.xml自动转换的原理研究[附带源码分析] 原文地址:http://www.cnblogs.com/fangjian0423/p/springMVC-xml-json-c ...
- k8s client-go源码分析 informer源码分析(2)-初始化与启动分析
k8s client-go源码分析 informer源码分析(2)-初始化与启动分析 前面一篇文章对k8s informer做了概要分析,本篇文章将对informer的初始化与启动进行分析. info ...
- k8s client-go源码分析 informer源码分析(3)-Reflector源码分析
k8s client-go源码分析 informer源码分析(3)-Reflector源码分析 1.Reflector概述 Reflector从kube-apiserver中list&watc ...
- 66、Spark Streaming:数据处理原理剖析与源码分析(block与batch关系透彻解析)
一.数据处理原理剖析 每隔我们设置的batch interval 的time,就去找ReceiverTracker,将其中的,从上次划分batch的时间,到目前为止的这个batch interval ...
- 64、Spark Streaming:StreamingContext初始化与Receiver启动原理剖析与源码分析
一.StreamingContext源码分析 ###入口 org.apache.spark.streaming/StreamingContext.scala /** * 在创建和完成StreamCon ...
- 19、Executor原理剖析与源码分析
一.原理图解 二.源码分析 1.Executor注册机制 worker中为Application启动的executor,实际上是启动了这个CoarseGrainedExecutorBackend进程: ...
- 18、TaskScheduler原理剖析与源码分析
一.源码分析 ###入口 ###org.apache.spark.scheduler/DAGScheduler.scala // 最后,针对stage的task,创建TaskSet对象,调用taskS ...
- 22、BlockManager原理剖析与源码分析
一.原理 1.图解 Driver上,有BlockManagerMaster,它的功能,就是负责对各个节点上的BlockManager内部管理的数据的元数据进行维护, 比如Block的增删改等操作,都会 ...
随机推荐
- 物联网是前端工程师的新蓝海吗? | Live笔记
物联网是继 Web .无线之后的又一次重大技术变革,在变革的大潮中,程序员的知识体系和思维方式将面临全面更新. 前端开发的历史 在准备这个live的过程中,我回顾了前端开发短暂的历史,有几次我认为非常 ...
- Django-视图函数/模板渲染/过滤器
一.Django的视图函数 一个视图函数(类),简称视图,是一个简单的Python 函数(类),它接受Web请求并且返回Web响应. 响应可以是一张网页的HTML内容,一个重定向,一个404错误,一个 ...
- LG2216 理想的正方形
题意 有一个\(a \times b\)的整数组成的矩阵,现请你从中找出一个\(n \times n\)的正方形区域,使得该区域所有数中的最大值和最小值的差最小 思路 对于每一列,都用两个单调队列维护 ...
- HTML5调用本地摄像头画面,拍照,上传服务器
实现功能和适用业务 采集本地摄像头获取摄像头画面,拍照保存,上传服务器: 前端上传图片处理,展示,缩小,裁剪,上传服务器 实现步骤 调取本地摄像头(getUserMedia)/上传图片,将图片/视频显 ...
- Java写入的常用技巧(二)
在一般从流接收数据写入介质的场景中,大部分存在每批次数据较小,导致小文件较多的问题. 一般考虑设置一个缓冲池,将多个批次的数据先缓冲进去,达到一定大小,再一次性批量写入 //公共缓冲池和缓冲池大小,如 ...
- A*算法解决15数码问题_Python实现
1问题描述 数码问题常被用来演示如何在状态空间中生成动作序列.一个典型的例子是15数码问题,它是由放在一个4×4的16宫格棋盘中的15个数码(1-15)构成,棋盘中的一个单元是空的,它的邻接单元中的数 ...
- react 闲谈
从事前端一段时间了,公司用的框架都是vue,但是不知为何对react却情有独钟,这是不是所谓的吃着碗里的看着锅里的 哈哈哈 从头好好总结下react吧 小白一个 大神勿喷 瞎说一 react是由两部分 ...
- Android解决AVD Hardware Buttons 和DPAD无法使用问题
如图所示按键用鼠标点击时无法响应. 解决方案: 以我创建的AVD名为Tablet为例 1.找到用户目录(我的用户目录yummy),然后进入如下目录 mac: ~/yummy/.android/avd/ ...
- 从数组中找出所有组合为s的数
java版本 package numCombine; /** * 从数组中找出所有组合为s的数 * @author root * */ public class NumComberAll { publ ...
- LC 969. Pancake Sorting
Given an array A, we can perform a pancake flip: We choose some positive integer k <= A.length, t ...