Jobmanager的submitJob逻辑,

/**
* Submits a job to the job manager. The job is registered at the libraryCacheManager which
* creates the job's class loader. The job graph is appended to the corresponding execution
* graph and the execution vertices are queued for scheduling.
*
* @param jobGraph representing the Flink job
* @param jobInfo the job info
* @param isRecovery Flag indicating whether this is a recovery or initial submission
*/
private def submitJob(jobGraph: JobGraph, jobInfo: JobInfo, isRecovery: Boolean = false): Unit = {
if (jobGraph == null) {
jobInfo.notifyClients(
decorateMessage(JobResultFailure(
new SerializedThrowable(
new JobSubmissionException(null, "JobGraph must not be null.")))))
}
else {
val jobId = jobGraph.getJobID
val jobName = jobGraph.getName
var executionGraph: ExecutionGraph = null try {
// Important: We need to make sure that the library registration is the first action,
// because this makes sure that the uploaded jar files are removed in case of
// unsuccessful
try {
libraryCacheManager.registerJob(jobGraph.getJobID, jobGraph.getUserJarBlobKeys,
jobGraph.getClasspaths)
}
var userCodeLoader = libraryCacheManager.getClassLoader(jobGraph.getJobID) //加载Jar val restartStrategy = //加载重启策略
Option(jobGraph.getSerializedExecutionConfig()
.deserializeValue(userCodeLoader)
.getRestartStrategy())
.map(RestartStrategyFactory.createRestartStrategy)
.filter(p => p != null) match {
case Some(strategy) => strategy
case None => restartStrategyFactory.createRestartStrategy()
} val jobMetrics = jobManagerMetricGroup match { //生成job manager metric group
case Some(group) =>
group.addJob(jobGraph) match {
case (jobGroup:Any) => jobGroup
case null => new UnregisteredMetricsGroup()
}
case None =>
new UnregisteredMetricsGroup()
} val numSlots = scheduler.getTotalNumberOfSlots() //现有的slots数目 // see if there already exists an ExecutionGraph for the corresponding job ID
val registerNewGraph = currentJobs.get(jobGraph.getJobID) match {
case Some((graph, currentJobInfo)) =>
executionGraph = graph
currentJobInfo.setLastActive()
false
case None =>
true
} executionGraph = ExecutionGraphBuilder.buildGraph( //build ExecutionGraph
executionGraph,
jobGraph,
flinkConfiguration,
futureExecutor,
ioExecutor,
userCodeLoader,
checkpointRecoveryFactory,
Time.of(timeout.length, timeout.unit),
restartStrategy,
jobMetrics,
numSlots,
log.logger) if (registerNewGraph) { //如果是新的JobGraph,注册到currentJobs
currentJobs.put(jobGraph.getJobID, (executionGraph, jobInfo))
} // get notified about job status changes
executionGraph.registerJobStatusListener( //jobmananger加到通知listeners
new StatusListenerMessenger(self, leaderSessionID.orNull)) jobInfo.clients foreach { //client加到通知listeners
// the sender wants to be notified about state changes
case (client, ListeningBehaviour.EXECUTION_RESULT_AND_STATE_CHANGES) =>
val listener = new StatusListenerMessenger(client, leaderSessionID.orNull)
executionGraph.registerExecutionListener(listener)
executionGraph.registerJobStatusListener(listener)
case _ => // do nothing
} } catch { //失败
case t: Throwable =>
log.error(s"Failed to submit job $jobId ($jobName)", t) libraryCacheManager.unregisterJob(jobId)
currentJobs.remove(jobId) if (executionGraph != null) {
executionGraph.fail(t) //fail executionGraph
} val rt: Throwable = if (t.isInstanceOf[JobExecutionException]) {
t
} else {
new JobExecutionException(jobId, s"Failed to submit job $jobId ($jobName)", t)
} jobInfo.notifyClients(
decorateMessage(JobResultFailure(new SerializedThrowable(rt)))) //通知提交失败
return
} //上面是准备executionGraph,下面是异步提交
// execute the recovery/writing the jobGraph into the SubmittedJobGraphStore asynchronously
// because it is a blocking operation
future {
try {
if (isRecovery) {
// this is a recovery of a master failure (this master takes over)
executionGraph.restoreLatestCheckpointedState(false, false) //加载checkpoint状态
}
else {
// load a savepoint only if this is not starting from a newer checkpoint
// as part of an master failure recovery
val savepointSettings = jobGraph.getSavepointRestoreSettings
if (savepointSettings.restoreSavepoint()) { //处理savePoint
try {
val savepointPath = savepointSettings.getRestorePath()
val allowNonRestored = savepointSettings.allowNonRestoredState() log.info(s"Starting job from savepoint '$savepointPath'" +
(if (allowNonRestored) " (allowing non restored state)" else "") + ".") // load the savepoint as a checkpoint into the system
val savepoint: CompletedCheckpoint = SavepointLoader.loadAndValidateSavepoint(
jobId,
executionGraph.getAllVertices,
savepointPath,
executionGraph.getUserClassLoader,
allowNonRestored) executionGraph.getCheckpointCoordinator.getCheckpointStore
.addCheckpoint(savepoint) // Reset the checkpoint ID counter
val nextCheckpointId: Long = savepoint.getCheckpointID + 1
log.info(s"Reset the checkpoint ID to $nextCheckpointId")
executionGraph.getCheckpointCoordinator.getCheckpointIdCounter
.setCount(nextCheckpointId) executionGraph.restoreLatestCheckpointedState(true, allowNonRestored)
} catch {
case e: Exception =>
jobInfo.notifyClients(
decorateMessage(JobResultFailure(new SerializedThrowable(e))))
throw new SuppressRestartsException(e)
}
} try {
submittedJobGraphs.putJobGraph(new SubmittedJobGraph(jobGraph, jobInfo)) //存储该JobGraph到zk,ZooKeeperSubmittedJobGraphStore
} catch {
case t: Throwable =>
// Don't restart the execution if this fails. Otherwise, the
// job graph will skip ZooKeeper in case of HA.
jobInfo.notifyClients(
decorateMessage(JobResultFailure(new SerializedThrowable(t))))
throw new SuppressRestartsException(t)
}
} jobInfo.notifyClients(
decorateMessage(JobSubmitSuccess(jobGraph.getJobID))) //通知clients提交成功 if (leaderElectionService.hasLeadership) {
// There is a small chance that multiple job managers schedule the same job after if
// they try to recover at the same time. This will eventually be noticed, but can not be
// ruled out from the beginning. // NOTE: Scheduling the job for execution is a separate action from the job submission.
// The success of submitting the job must be independent from the success of scheduling
// the job.
log.info(s"Scheduling job $jobId ($jobName).") executionGraph.scheduleForExecution(scheduler) //开始调度
} else {
// Remove the job graph. Otherwise it will be lingering around and possibly removed from
// ZooKeeper by this JM.
self ! decorateMessage(RemoveJob(jobId, removeJobFromStateBackend = false)) log.warn(s"Submitted job $jobId, but not leader. The other leader needs to recover " +
"this. I am not scheduling the job for execution.")
}
} catch {
case t: Throwable => try {
executionGraph.fail(t)
} catch {
case tt: Throwable =>
log.error("Error while marking ExecutionGraph as failed.", tt)
}
}
}(context.dispatcher)
}
}

可以看到executionGraph在调度前就已经通知用户提交成功

 

当job发生问题,需要调用到tryRestartOrFail

private boolean tryRestartOrFail() {
JobStatus currentState = state; if (currentState == JobStatus.FAILING || currentState == JobStatus.RESTARTING) {
synchronized (progressLock) { //锁 final boolean isFailureCauseAllowingRestart = !(failureCause instanceof SuppressRestartsException);
final boolean isRestartStrategyAllowingRestart = restartStrategy.canRestart(); //重启策略是否允许重启
boolean isRestartable = isFailureCauseAllowingRestart && isRestartStrategyAllowingRestart; if (isRestartable && transitionState(currentState, JobStatus.RESTARTING)) {
restartStrategy.restart(this); return true;
} else if (!isRestartable && transitionState(currentState, JobStatus.FAILED, failureCause)) { //如果不允许重启,就failed
final List<String> reasonsForNoRestart = new ArrayList<>(2);
if (!isFailureCauseAllowingRestart) {
reasonsForNoRestart.add("a type of SuppressRestartsException was thrown");
}
if (!isRestartStrategyAllowingRestart) {
reasonsForNoRestart.add("the restart strategy prevented it");
} LOG.info("Could not restart the job {} ({}) because {}.", getJobName(), getJobID(),
StringUtils.join(reasonsForNoRestart, " and "), failureCause);
postRunCleanup(); return true;
} else {
// we must have changed the state concurrently, thus we cannot complete this operation
return false;
}
}
} else {
// this operation is only allowed in the state FAILING or RESTARTING
return false;
}
}

 

有两处会调用到tryRestartOrFail

1. ExecutionGraph.jobVertexInFinalState

void jobVertexInFinalState() {
synchronized (progressLock) {
if (numFinishedJobVertices >= verticesInCreationOrder.size()) {
throw new IllegalStateException("All vertices are already finished, cannot transition vertex to finished.");
} numFinishedJobVertices++; if (numFinishedJobVertices == verticesInCreationOrder.size()) { //当所有的vertices都已经finished // we are done, transition to the final state
JobStatus current;
while (true) {
current = this.state; if (current == JobStatus.RUNNING) {
if (transitionState(current, JobStatus.FINISHED)) {
postRunCleanup();
break;
}
}
else if (current == JobStatus.CANCELLING) {
if (transitionState(current, JobStatus.CANCELED)) {
postRunCleanup();
break;
}
}
else if (current == JobStatus.FAILING) {
if (tryRestartOrFail()) { //如果failing,调用tryRestartOrFail
break;
}
// concurrent job status change, let's check again
}

2. 显式的调用到ExecutionGraph.fail

} else if (current == JobStatus.RESTARTING) {
this.failureCause = t; if (tryRestartOrFail()) {
return;
}
// concurrent job status change, let's check again
}

 

上面调用到restartStrategy.restart(this);

restartStrategy有很多种,我们先看看

FixedDelayRestartStrategy

 

@Override
public void restart(final ExecutionGraph executionGraph) {
currentRestartAttempt++;
FlinkFuture.supplyAsync(ExecutionGraphRestarter.restartWithDelay(executionGraph, delayBetweenRestartAttempts), executionGraph.getFutureExecutor());
}

异步的调用,ExecutionGraphRestarter.restartWithDelay

最终调用到

executionGraph.restart();
public void restart() {
try {
synchronized (progressLock) {
this.currentExecutions.clear(); Collection<CoLocationGroup> colGroups = new HashSet<>(); for (ExecutionJobVertex jv : this.verticesInCreationOrder) { CoLocationGroup cgroup = jv.getCoLocationGroup();
if(cgroup != null && !colGroups.contains(cgroup)){
cgroup.resetConstraints();
colGroups.add(cgroup);
} jv.resetForNewExecution();
} for (int i = 0; i < stateTimestamps.length; i++) {
if (i != JobStatus.RESTARTING.ordinal()) {
// Only clear the non restarting state in order to preserve when the job was
// restarted. This is needed for the restarting time gauge
stateTimestamps[i] = 0;
}
}
numFinishedJobVertices = 0;
transitionState(JobStatus.RESTARTING, JobStatus.CREATED); // if we have checkpointed state, reload it into the executions
if (checkpointCoordinator != null) {
checkpointCoordinator.restoreLatestCheckpointedState(getAllVertices(), false, false);
}
} scheduleForExecution(slotProvider); //加入schedule
}
catch (Throwable t) {
LOG.warn("Failed to restart the job.", t);
fail(t);
}
}

 

关于重启策略,

参考https://ci.apache.org/projects/flink/flink-docs-release-1.2/dev/restart_strategies.html

If checkpointing is not enabled, the “no restart” strategy is used. If checkpointing is activated and the restart strategy has not been configured, the fixed-delay strategy is used with Integer.MAX_VALUE restart attempts.

 

StreamingJobGraphGenerator
private void configureCheckpointing() {
CheckpointConfig cfg = streamGraph.getCheckpointConfig(); long interval = cfg.getCheckpointInterval();
if (interval > 0) {
// check if a restart strategy has been set, if not then set the FixedDelayRestartStrategy
if (streamGraph.getExecutionConfig().getRestartStrategy() == null) {
// if the user enabled checkpointing, the default number of exec retries is infinite.
streamGraph.getExecutionConfig().setRestartStrategy(
RestartStrategies.fixedDelayRestart(Integer.MAX_VALUE, DEFAULT_RESTART_DELAY));
}
}

当打开checkpoint的时候,默认是使用fixedDelayRestart,并Integer.MAX_VALUE次重启

Flink – submitJob的更多相关文章

  1. Flink – JobManager.submitJob

    JobManager作为actor, case SubmitJob(jobGraph, listeningBehaviour) => val client = sender() val jobI ...

  2. Flink 源码解析 —— JobManager 处理 SubmitJob 的过程

    JobManager 处理 SubmitJob https://t.zsxq.com/3JQJMzZ 博客 1.Flink 从0到1学习 -- Apache Flink 介绍 2.Flink 从0到1 ...

  3. Flink 源码解析 —— TaskManager 处理 SubmitJob 的过程

    TaskManager 处理 SubmitJob 的过程 https://t.zsxq.com/eu7mQZj 博客 1.Flink 从0到1学习 -- Apache Flink 介绍 2.Flink ...

  4. Flink 1.1 – ResourceManager

    Flink resource manager的作用如图,   FlinkResourceManager /** * * <h1>Worker allocation steps</h1 ...

  5. Flink - Checkpoint

    Flink在流上最大的特点,就是引入全局snapshot,   CheckpointCoordinator 做snapshot的核心组件为, CheckpointCoordinator /** * T ...

  6. Apache 流框架 Flink,Spark Streaming,Storm对比分析(一)

    本文由  网易云发布. 1.Flink架构及特性分析 Flink是个相当早的项目,开始于2008年,但只在最近才得到注意.Flink是原生的流处理系统,提供high level的API.Flink也提 ...

  7. Apache Flink:特性、概念、组件栈、架构及原理分析

     2016-04-30 22:24:39    Yanjun Apache Flink是一个面向分布式数据流处理和批量数据处理的开源计算平台,它能够基于同一个Flink运行时(Flink Runtim ...

  8. 追源索骥:透过源码看懂Flink核心框架的执行流程

    li,ol.inline>li{display:inline-block;padding-right:5px;padding-left:5px}dl{margin-bottom:20px}dt, ...

  9. Flink(二)CentOS7.5搭建Flink1.6.1分布式集群

    一. Flink的下载 安装包下载地址:http://flink.apache.org/downloads.html  ,选择对应Hadoop的Flink版本下载 [admin@node21 soft ...

随机推荐

  1. Linux下库打桩机制分析 function Interposition

    [时间:2017-08] [状态:Open] [关键词:linux, libray,打桩,interposition,函数替换,链接器,gcc,malloc,free] 0 引言 本文主要参考< ...

  2. 修改/dev/shm的大小

    修改/dev/shm的大小 修改/etc/fstab的这行: 默认的:tmpfs /dev/shm tmpfs defaults 0 0改成:tmpfs /dev/shm tmpfs defaults ...

  3. 【bootstrap组件】几个常用的好用bs组件

    这次开发了个小TRS系统,虽然是很小,但是作为初心者,第一次用到了很多看起来洋气使用起来有相对简单的各种前端(主要是和bootstrap配合使用)组件.包括bootstrap-select2,boot ...

  4. Java如何获取URL连接的日期?

    Java编程中,如何获取URL连接的日期? 以下示例演示如何使用HttpURLConnection类的httpCon.getDate()方法获取URL连接的日期. package com.yiibai ...

  5. Java如何验证电子邮件地址格式?

    在Java编程中,如何验证电子邮件地址格式? 以下示例演示如何使用String类的matches()方法来验证电子邮件地址. package com.yiibai; public class Vali ...

  6. mycat 9066管理端口

    1.连接mycat 9066管理端口 命令:mysql -uroot -proot -P9066 -h127.0.0.1 -u:用户名 -p:密码 -P:端口 -h:ip地址例:linux路径切换到m ...

  7. 基于【CentOS-7+ Ambari 2.7.0 + HDP 3.0】搭建HAWQ数据仓库——安装配置NTP服务,保证集群时间保持同步

    一.所有节点上使用yum安装配置NTP服务yum install ntp -y 二.选定一台节点作为NTP server, 192.168.58.11修改/etc/ntp.conf vim /etc/ ...

  8. [JS] ECMAScript 6 - Destructuring

    C#里没有这种变态的方法. 虽然变态,但看起来不错的样子. 变量的解构赋值 完全解构:本质上,这种写法属于“模式匹配”,: 不完全解构:同时支持“不完全解构” let [x, y] = [1, 2, ...

  9. [Node.js] 02 - Read Eval Print Loop

    视频课程:带你入门Nodejs,提及了非常多的后端知识点 发布时间: 2017年10月7日 课程时长:193 分钟 类别:后端 课时:22 npm Resource: npm模块管理器[阮一峰] np ...

  10. [Node.js] 03 - Buffer, Stream and File IO

    fs 模块,视频教学 os 模块,视频教学,api doc Buffer类 创建 Buffer 类 // 创建一个长度为 10.且用 0 填充的 Buffer. const buf1 = Buffer ...