yarn作业提交过程源码
记录源码细节,内部有中文注释
Client 端:
//最终通过ApplicationClientProtocol协议提交到RM端的ClientRMService内
package org.apache.hadoop.mapred;
jobclient包内
YarnRunner
public JobStatus submitJob(JobID jobId, String jobSubmitDir, Credentials ts)
throws IOException, InterruptedException { addHistoryToken(ts); // Construct necessary information to start the MR AM
ApplicationSubmissionContext appContext =
createApplicationSubmissionContext(conf, jobSubmitDir, ts); // Submit to ResourceManager
try {
ApplicationId applicationId =
resMgrDelegate.submitApplication(appContext); //提交作业 ApplicationReport appMaster = resMgrDelegate
.getApplicationReport(applicationId); ResourceMgrDelegate类 public ApplicationId
submitApplication(ApplicationSubmissionContext appContext)
throws YarnException, IOException {
return client.submitApplication(appContext);
} public ResourceMgrDelegate(YarnConfiguration conf) {
super(ResourceMgrDelegate.class.getName());
this.conf = conf;
this.client = YarnClient.createYarnClient(); //该方法会创建YarnClientImpl,具体提交逻辑在该类里
init(conf);
start();
} YarnClientImpl类 public ApplicationId
submitApplication(ApplicationSubmissionContext appContext)
throws YarnException, IOException {
ApplicationId applicationId = appContext.getApplicationId();
appContext.setApplicationId(applicationId);
SubmitApplicationRequest request =
Records.newRecord(SubmitApplicationRequest.class);
request.setApplicationSubmissionContext(appContext);
rmClient.submitApplication(request); //ApplicationClientProtocol rmClient
RM端:
//提交只是往中央异步处理器加入RMAppEventType.START事件,异步处理,之后不等待处理结果,直接返回个简单的respone
ClientRMService内: public SubmitApplicationResponse submitApplication(
SubmitApplicationRequest request) throws YarnException {
ApplicationSubmissionContext submissionContext = request
.getApplicationSubmissionContext();
ApplicationId applicationId = submissionContext.getApplicationId();
.....
}
} try {
// call RMAppManager to submit application directly
rmAppManager.submitApplication(submissionContext,
System.currentTimeMillis(), false, user); //作业提交,调用的是RMAppManager中方法 LOG.info("Application with id " + applicationId.getId() +
" submitted by user " + user);
RMAuditLogger.logSuccess(user, AuditConstants.SUBMIT_APP_REQUEST,
"ClientRMService", applicationId);
} catch (YarnException e) {
LOG.info("Exception in submitting application with id " +
applicationId.getId(), e);
RMAuditLogger.logFailure(user, AuditConstants.SUBMIT_APP_REQUEST,
e.getMessage(), "ClientRMService",
"Exception in submitting application", applicationId);
throw e;
}
...
SubmitApplicationResponse response = recordFactory
.newRecordInstance(SubmitApplicationResponse.class);
return response; protected void submitApplication(
ApplicationSubmissionContext submissionContext, long submitTime,
boolean isRecovered, String user) throws YarnException {
...... // Create RMApp
RMApp application =
new RMAppImpl(applicationId, rmContext, this.conf,
submissionContext.getApplicationName(), user,
submissionContext.getQueue(),
submissionContext, this.scheduler, this.masterService,
submitTime, submissionContext.getApplicationType()); .... } // All done, start the RMApp
this.rmContext.getDispatcher().getEventHandler().handle(
new RMAppEvent(applicationId, isRecovered ? RMAppEventType.RECOVER:
RMAppEventType.START)); //往异步处理器增加个RMAppEvent事件,类型枚值RMAppEventType.START
//在RM内部会注册该类型的事件会用什么处理器来处理
} 在RM内部
// Register event handler for RmAppEvents
this.rmDispatcher.register(RMAppEventType.class,
new ApplicationEventDispatcher(this.rmContext));
... //ApplicationEventDispatcher,最终会调用到RMAPPImpl来处理这个事件
public void handle(RMAppEvent event) { this.writeLock.lock();
MAppEventType.START
try {
ApplicationId appID = event.getApplicationId();
LOG.debug("Processing event for " + appID + " of type "
+ event.getType());
final RMAppState oldState = getState();
try {
/* keep the master in sync with the state machine */
this.stateMachine.doTransition(event.getType(), event); //stateMachine通过状态工厂创建,状态工厂核心addTransition
//各种状态转变对应的处理器,有个submit应该是对应到MAppEventType.START
} catch (InvalidStateTransitonException e) {
LOG.error("Can't handle this event at current state", e); private static final class StartAppAttemptTransition extends RMAppTransition {
public void transition(RMAppImpl app, RMAppEvent event) {
if (event.getType().equals(RMAppEventType.APP_SAVED)) {
assert app.getState().equals(RMAppState.NEW_SAVING);
RMAppStoredEvent storeEvent = (RMAppStoredEvent) event;
if(storeEvent.getStoredException() != null) {
// For HA this exception needs to be handled by giving up
// master status if we got fenced
LOG.error("Failed to store application: "
+ storeEvent.getApplicationId(),
storeEvent.getStoredException());
ExitUtil.terminate(1, storeEvent.getStoredException());
}
} app.createNewAttempt(true); //
};
} private void createNewAttempt(boolean startAttempt) {
ApplicationAttemptId appAttemptId =
ApplicationAttemptId.newInstance(applicationId, attempts.size() + 1);
RMAppAttempt attempt =
new RMAppAttemptImpl(appAttemptId, rmContext, scheduler, masterService,
submissionContext, conf, user); //新建个RMAppAttemptImpl
attempts.put(appAttemptId, attempt);
currentAttempt = attempt;
if(startAttempt) {
handler.handle(
new RMAppAttemptEvent(appAttemptId, RMAppAttemptEventType.START));//此处是RMAppAttemptEvent加入异步处理器的队列
//RM register可以看到其对应的处理器,最终调用的是RMAppAttemptImpl的handle方法 } RMAppAttemptImpl类:
public void handle(RMAppAttemptEvent event) { this.writeLock.lock(); try {
ApplicationAttemptId appAttemptID = event.getApplicationAttemptId();
LOG.debug("Processing event for " + appAttemptID + " of type "
+ event.getType());
final RMAppAttemptState oldState = getAppAttemptState();
try {
/* keep the master in sync with the state machine */
this.stateMachine.doTransition(event.getType(), event); //
} catch (InvalidStateTransitonException e) {
..
其中状态机有 .addTransition(RMAppAttemptState.NEW, RMAppAttemptState.SUBMITTED,
RMAppAttemptEventType.START, new AttemptStartedTransition()) AttemptStartedTransition的 Transition方法
...
// Add the application to the scheduler
appAttempt.eventHandler.handle(
new AppAddedSchedulerEvent(appAttempt.applicationAttemptId,
appAttempt.submissionContext.getQueue(), appAttempt.user)) //该事件即是schedulerEventType,会交给schedulerDispatcher
//该对象赋值SchedulerEventDispatcher,它在内部又维护了个类中央异步处理,run方法内都统一通过scheduler处理事件 //查看FIFO Scheduler的handle方法: case APP_ADDED:
{
AppAddedSchedulerEvent appAddedEvent = (AppAddedSchedulerEvent) event;
addApplication(appAddedEvent.getApplicationAttemptId(), appAddedEvent
.getUser()); //
} private synchronized void addApplication(ApplicationAttemptId appAttemptId,
String user) {
// TODO: Fix store
FiCaSchedulerApp schedulerApp =
new FiCaSchedulerApp(appAttemptId, user, DEFAULT_QUEUE, activeUsersManager,
this.rmContext);
applications.put(appAttemptId, schedulerApp);
metrics.submitApp(user, appAttemptId.getAttemptId());
LOG.info("Application Submission: " + appAttemptId.getApplicationId() +
" from " + user + ", currently active: " + applications.size());
rmContext.getDispatcher().getEventHandler().handle(
new RMAppAttemptEvent(appAttemptId,
RMAppAttemptEventType.APP_ACCEPTED)); //又是个新的状态,最终RM的ApplicationMasterLauncher与NM通信
//启动AM,AM又向RM注册,那AM实始化各个map task,reduce task是怎么做的呢
} //该事件会ApplicationAttemptEventDispatcher来处理,在register里注册,会调用RMAppAttempImpl.handle来处理
public void handle(RMAppAttemptEvent event) { this.writeLock.lock(); try {
ApplicationAttemptId appAttemptID = event.getApplicationAttemptId();
LOG.debug("Processing event for " + appAttemptID + " of type "
+ event.getType());
final RMAppAttemptState oldState = getAppAttemptState();
try {
/* keep the master in sync with the state machine */
this.stateMachine.doTransition(event.getType(), event); // RMAppAttemptEventType.APP_ACCEPTED会激发从什么状态到什么状态,然后执行什么事件.addTransition定义
//会到schedulered状态,再通过CONTAINER_ALLOCATED事件到ALLOCATED_SAVING状态,再通过CONTAINER_ACQURIED到
//ALLOCATED状态,再通过LAUNCHED事件到LAUNCHED状态 比如:
.addTransition(RMAppAttemptState.SCHEDULED,
RMAppAttemptState.ALLOCATED_SAVING,
RMAppAttemptEventType.CONTAINER_ALLOCATED,
new AMContainerAllocatedTransition()) //CONTAINER_ALLOCATED会激动SCHEDULED到ALLOCATED_SAVING状态,并执行CONTAINER_ALLOCATED
//最后会在nm端启动appmaster,appmaster会初始化一系列map,reduce task,再向RM注册,向RM发送heartbeat
//为task请求资源,注意心跳可能没有新的请求资源信息,再从RM内存结构里已经分配好取
//注意NM心跳到,也会执行资源分配,保留在内存结构,等appmaster来取 关键是状态机RMAPPImpl RMAppAttempImpl,内部会定义一系列的状态到状态的转换及对应的处理类
yarn作业提交过程源码的更多相关文章
- Hadoop2.x Yarn作业提交(客户端)
转自:http://blog.csdn.net/lihm0_1/article/details/22186833 YARN作业提交的客户端仍然使用RunJar类,和MR1一样,可参考 http://b ...
- YARN作业提交流程剖析
YARN(MapReduce2) Yet Another Resource Negotiator / YARN Application Resource Negotiator对于节点数超出4000的大 ...
- Spark(四十九):Spark On YARN启动流程源码分析(一)
引导: 该篇章主要讲解执行spark-submit.sh提交到将任务提交给Yarn阶段代码分析. spark-submit的入口函数 一般提交一个spark作业的方式采用spark-submit来提交 ...
- MapReduce源码分析之新API作业提交(二):连接集群
MapReduce作业提交时连接集群是通过Job的connect()方法实现的,它实际上是构造集群Cluster实例cluster,代码如下: private synchronized void co ...
- Spark(五十一):Spark On YARN(Yarn-Cluster模式)启动流程源码分析(二)
上篇<Spark(四十九):Spark On YARN启动流程源码分析(一)>我们讲到启动SparkContext初始化,ApplicationMaster启动资源中,讲解的内容明显不完整 ...
- Spark On YARN启动流程源码分析(一)
本文主要参考: a. https://www.cnblogs.com/yy3b2007com/p/10934090.html 0. 说明 a. 关于spark源码会不定期的更新与补充 b. 对于spa ...
- Spark作业执行流程源码解析
目录 相关概念 概述 源码解析 作业提交 划分&提交调度阶段 提交任务 执行任务 结果处理 Reference 本文梳理一下Spark作业执行的流程. Spark作业和任务调度系统是其核心,通 ...
- Spark作业提交至Yarn上执行的 一个异常
(1)控制台Yarn(Cluster模式)打印的异常日志: client token: N/A diagnostics: Application application_1584359 ...
- Spark On YARN(Yarn-Cluster模式)启动流程源码分析(二)
转自:https://www.cnblogs.com/yy3b2007com/p/11087180.html 本章将针对yarn-cluster(--master yarn –deploy-mode ...
随机推荐
- 前端基础------JS
JS中的语句要以分号 ; 为结束符. JS语言基础: 1, JS 的变量名可以使用 下划线, 数字, 字母, $ 组成. 不可以是数字开头 2, 声明变量使用var 变量名. 的格式来进行声明. v ...
- JavaScript中的对象类型详解
To be finished 摘要 1.什么是对象? 2.引用类型和原始类型 3.对象数据属性拥有的特性(Attributes) 4.如何创建对象 a.直接定义 var mango={color:&q ...
- EasyNVR智能云终端硬件使用说明(EasyNVR无插件直播服务硬件的具体使用方法)
问题背景 随着EasyNVR硬件版本(EasyNVR硬件云终端)的发布不少客户选择了EasyNVR云终端作为产品选择,在客户收到EasyNVR云终端的时候肯定都有一个疑问,那就是如何使用手头上的这个小 ...
- 基于HttpClient、Jsoup的爬虫获取指定网页内容
不断尝试,发现越来越多有趣的东西,刚刚接触Jsoup感觉比正则表达式用起来方便,但也有局限只适用HTML的解析. 不能尝试运用到四则运算中(工作室刚开始联系的小程序). 在原来写的HttpClient ...
- ftp 协议分析
File Transfer Protocol(文件传输协议) 使用SOCKET实现 FTP的客户端协议规则: .h #pragma once #include <string> #incl ...
- Django中_Meta 部分用法
周一了,就不长篇大论了,给大家分享一个很实用的知识点,希望大家周末过得开心,愉快,诗和远方在等着你们.而我还在苦逼的撸代码,只为了应付眼前的苟且! model.UserInfo._meta.app_l ...
- Linux用户相关文件之组文件
组信息文件: 1.文件地址: /etc/group -rw-r--r--. 1 root root 492 10月 6 21:56 /etc/group 2.文件内容: xiaol:x:500: 3. ...
- win7与win server 2008防火墙设置
转自:http://blog.51cto.com/jimshu/590411 Windows 防火墙通过阻止未授权用户通过 Internet 或网络访问您的计算机来帮助保护计算机. Windows 2 ...
- JavaScript你所不知道的困惑(3)
版权声明:本文出自水寒的原创文章.未经博主同意不得转载. https://blog.csdn.net/lxq_xsyu/article/details/25600011 困惑一: window.col ...
- 《Python机器学习》笔记(五)
通过降维压缩数据 在前面已经介绍了几种不同的特征选择技术对数据集进行降维的方法.另一种常用于降维的特征选择方法就是特征抽取.数据压缩也是机器学习领域中的一个重要内容.数据压缩技术可以帮助我们对数据及逆 ...