hadoop运行作业的脚本解析
- #!/usr/bin/env bash
- # Licensed to the Apache Software Foundation (ASF) under one or more
- # contributor license agreements. See the NOTICE file distributed with
- # this work for additional information regarding copyright ownership.
- # The ASF licenses this file to You under the Apache License, Version 2.0
- # (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # This script runs the hadoop core commands.
- bin=`which $`
- bin=`dirname ${bin}`
- bin=`cd "$bin"; pwd`
- DEFAULT_LIBEXEC_DIR="$bin"/../libexec
- HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
- . $HADOOP_LIBEXEC_DIR/hadoop-config.sh
- function print_usage(){
- echo "Usage: hadoop [--config confdir] [COMMAND | CLASSNAME]"
- echo " CLASSNAME run the class named CLASSNAME"
- echo " or"
- echo " where COMMAND is one of:"
- echo " fs run a generic filesystem user client"
- echo " version print the version"
- echo " jar <jar> run a jar file"
- echo " note: please use \"yarn jar\" to launch"
- echo " YARN applications, not this command."
- echo " checknative [-a|-h] check native hadoop and compression libraries availability"
- echo " distcp <srcurl> <desturl> copy file or directories recursively"
- echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
- echo " classpath prints the class path needed to get the"
- echo " credential interact with credential providers"
- echo " Hadoop jar and the required libraries"
- echo " daemonlog get/set the log level for each daemon"
- echo " trace view and modify Hadoop tracing settings"
- echo ""
- echo "Most commands print help when invoked w/o parameters."
- }
- if [ $# = ]; then
- print_usage
- exit
- fi
- COMMAND=$
- case $COMMAND in
- # usage flags
- --help|-help|-h)
- print_usage
- exit
- ;;
- #hdfs commands
- namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
- echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." >&
- echo "Instead use the hdfs command for it." >&
- echo "" >&
- #try to locate hdfs and if present, delegate to it.
- shift
- if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
- exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
- elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
- exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
- else
- echo "HADOOP_HDFS_HOME not found!"
- exit
- fi
- ;;
- #mapred commands for backwards compatibility
- pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
- echo "DEPRECATED: Use of this script to execute mapred command is deprecated." >&
- echo "Instead use the mapred command for it." >&
- echo "" >&
- #try to locate mapred and if present, delegate to it.
- shift
- if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
- exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
- elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
- exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
- else
- echo "HADOOP_MAPRED_HOME not found!"
- exit
- fi
- ;;
- #core commands
- *)
- # the core commands
- if [ "$COMMAND" = "fs" ] ; then
- CLASS=org.apache.hadoop.fs.FsShell
- elif [ "$COMMAND" = "version" ] ; then
- CLASS=org.apache.hadoop.util.VersionInfo
- elif [ "$COMMAND" = "jar" ] ; then
- CLASS=org.apache.hadoop.util.RunJar
- if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then
- echo "WARNING: Use \"yarn jar\" to launch YARN applications." >&
- fi
- elif [ "$COMMAND" = "key" ] ; then
- CLASS=org.apache.hadoop.crypto.key.KeyShell
- elif [ "$COMMAND" = "checknative" ] ; then
- CLASS=org.apache.hadoop.util.NativeLibraryChecker
- elif [ "$COMMAND" = "distcp" ] ; then
- CLASS=org.apache.hadoop.tools.DistCp
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
- elif [ "$COMMAND" = "daemonlog" ] ; then
- CLASS=org.apache.hadoop.log.LogLevel
- elif [ "$COMMAND" = "archive" ] ; then
- CLASS=org.apache.hadoop.tools.HadoopArchives
- CLASSPATH=${CLASSPATH}:${TOOL_PATH}
- elif [ "$COMMAND" = "credential" ] ; then
- CLASS=org.apache.hadoop.security.alias.CredentialShell
- elif [ "$COMMAND" = "trace" ] ; then
- CLASS=org.apache.hadoop.tracing.TraceAdmin
- elif [ "$COMMAND" = "classpath" ] ; then
- if [ "$#" -gt ]; then
- CLASS=org.apache.hadoop.util.Classpath
- else
- # No need to bother starting up a JVM for this simple case.
- if $cygwin; then
- CLASSPATH=$(cygpath -p -w "$CLASSPATH" >/dev/null)
- fi
- echo $CLASSPATH
- exit
- fi
- elif [[ "$COMMAND" = -* ]] ; then
- # class and package names cannot begin with a -
- echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
- exit
- else
- CLASS=$COMMAND
- fi
- # cygwin path translation
- if $cygwin; then
- CLASSPATH=$(cygpath -p -w "$CLASSPATH" >/dev/null)
- HADOOP_LOG_DIR=$(cygpath -w "$HADOOP_LOG_DIR" >/dev/null)
- HADOOP_PREFIX=$(cygpath -w "$HADOOP_PREFIX" >/dev/null)
- HADOOP_CONF_DIR=$(cygpath -w "$HADOOP_CONF_DIR" >/dev/null)
- HADOOP_COMMON_HOME=$(cygpath -w "$HADOOP_COMMON_HOME" >/dev/null)
- HADOOP_HDFS_HOME=$(cygpath -w "$HADOOP_HDFS_HOME" >/dev/null)
- HADOOP_YARN_HOME=$(cygpath -w "$HADOOP_YARN_HOME" >/dev/null)
- HADOOP_MAPRED_HOME=$(cygpath -w "$HADOOP_MAPRED_HOME" >/dev/null)
- fi
- shift
- # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
- HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
- #make sure security appender is turned off
- HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
- export CLASSPATH=$CLASSPATH
- exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
- ;;
- esac
可以看到当hadoop脚本运行jar包时,会执行org.apache.hadoop.util.RunJar脚本.
- package org.apache.hadoop.util;
- import java.io.File;
- import java.io.FileOutputStream;
- import java.io.IOException;
- import java.io.InputStream;
- import java.io.OutputStream;
- import java.io.PrintStream;
- import java.lang.reflect.Array;
- import java.lang.reflect.InvocationTargetException;
- import java.lang.reflect.Method;
- import java.net.MalformedURLException;
- import java.net.URI;
- import java.net.URL;
- import java.net.URLClassLoader;
- import java.util.ArrayList;
- import java.util.Arrays;
- import java.util.Enumeration;
- import java.util.List;
- import java.util.jar.Attributes;
- import java.util.jar.JarEntry;
- import java.util.jar.JarFile;
- import java.util.jar.Manifest;
- import java.util.regex.Matcher;
- import java.util.regex.Pattern;
- import org.apache.hadoop.classification.InterfaceAudience.Private;
- import org.apache.hadoop.classification.InterfaceStability.Unstable;
- import org.apache.hadoop.fs.FileUtil;
- import org.apache.hadoop.io.IOUtils;
- @InterfaceAudience.Private
- @InterfaceStability.Unstable
- public class RunJar
- {
- public static final Pattern MATCH_ANY = Pattern.compile(".*");
- public static final int SHUTDOWN_HOOK_PRIORITY = 10;
- public static final String HADOOP_USE_CLIENT_CLASSLOADER = "HADOOP_USE_CLIENT_CLASSLOADER";
- public static final String HADOOP_CLASSPATH = "HADOOP_CLASSPATH";
- public static final String HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES = "HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES";
- public static void unJar(File jarFile, File toDir)
- throws IOException
- {
- unJar(jarFile, toDir, MATCH_ANY);
- }
- public static void unJar(File jarFile, File toDir, Pattern unpackRegex)
- throws IOException
- {
- JarFile jar = new JarFile(jarFile);
- try {
- Enumeration entries = jar.entries();
- while (entries.hasMoreElements()) {
- JarEntry entry = (JarEntry)entries.nextElement();
- if ((!entry.isDirectory()) && (unpackRegex.matcher(entry.getName()).matches()))
- {
- InputStream in = jar.getInputStream(entry);
- try {
- File file = new File(toDir, entry.getName());
- ensureDirectory(file.getParentFile());
- OutputStream out = new FileOutputStream(file);
- try {
- IOUtils.copyBytes(in, out, 8192);
- } finally {
- }
- }
- finally {
- }
- }
- }
- }
- finally {
- jar.close();
- }
- }
- private static void ensureDirectory(File dir)
- throws IOException
- {
- if ((!dir.mkdirs()) && (!dir.isDirectory()))
- throw new IOException(new StringBuilder().append("Mkdirs failed to create ").append(dir.toString()).toString());
- }
- public static void main(String[] args)
- throws Throwable
- {
- new RunJar().run(args);
- }
- public void run(String[] args) throws Throwable {
- String usage = "RunJar jarFile [mainClass] args...";
- if (args.length < 1) {
- System.err.println(usage);
- System.exit(-1);
- }
- int firstArg = 0;
- String fileName = args[(firstArg++)];
- File file = new File(fileName);
- if ((!file.exists()) || (!file.isFile())) {
- System.err.println(new StringBuilder().append("Not a valid JAR: ").append(file.getCanonicalPath()).toString());
- System.exit(-1);
- }
- String mainClassName = null;
- JarFile jarFile;
- try {
- jarFile = new JarFile(fileName);
- } catch (IOException io) {
- throw new IOException(new StringBuilder().append("Error opening job jar: ").append(fileName).toString()).initCause(io);
- }
- Manifest manifest = jarFile.getManifest();
- if (manifest != null) {
- mainClassName = manifest.getMainAttributes().getValue("Main-Class");
- }
- jarFile.close();
- if (mainClassName == null) {
- if (args.length < 2) {
- System.err.println(usage);
- System.exit(-1);
- }
- mainClassName = args[(firstArg++)];
- }mainClassName = mainClassName.replaceAll("/", ".");
- File tmpDir = new File(System.getProperty("java.io.tmpdir"));
- ensureDirectory(tmpDir);
- final File workDir;
- try { workDir = File.createTempFile("hadoop-unjar", "", tmpDir); }
- catch (IOException ioe)
- {
- System.err.println(new StringBuilder().append("Error creating temp dir in java.io.tmpdir ").append(tmpDir).append(" due to ").append(ioe.getMessage()).toString());
- System.exit(-1);
- return;
- }
- if (!workDir.delete()) {
- System.err.println(new StringBuilder().append("Delete failed for ").append(workDir).toString());
- System.exit(-1);
- }
- ensureDirectory(workDir);
- ShutdownHookManager.get().addShutdownHook(new Runnable()
- {
- public void run()
- {
- FileUtil.fullyDelete(workDir);
- }
- }
- , 10);
- unJar(file, workDir);
- ClassLoader loader = createClassLoader(file, workDir);
- Thread.currentThread().setContextClassLoader(loader);
- Class mainClass = Class.forName(mainClassName, true, loader);
- Method main = mainClass.getMethod("main", new Class[] { Array.newInstance(String.class, 0).getClass() });
- String[] newArgs = (String[])Arrays.asList(args).subList(firstArg, args.length).toArray(new String[0]);
- try
- {
- main.invoke(null, new Object[] { newArgs });
- } catch (InvocationTargetException e) {
- throw e.getTargetException();
- }
- }
- private ClassLoader createClassLoader(File file, File workDir)
- throws MalformedURLException
- {
- ClassLoader loader;
- ClassLoader loader;
- if (useClientClassLoader()) {
- StringBuilder sb = new StringBuilder();
- sb.append(new StringBuilder().append(workDir).append("/").toString()).append(File.pathSeparator).append(file).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/classes/").toString()).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/lib/*").toString());
- String hadoopClasspath = getHadoopClasspath();
- if ((hadoopClasspath != null) && (!hadoopClasspath.isEmpty())) {
- sb.append(File.pathSeparator).append(hadoopClasspath);
- }
- String clientClasspath = sb.toString();
- String systemClasses = getSystemClasses();
- List systemClassesList = systemClasses == null ? null : Arrays.asList(StringUtils.getTrimmedStrings(systemClasses));
- loader = new ApplicationClassLoader(clientClasspath, getClass().getClassLoader(), systemClassesList);
- }
- else {
- List classPath = new ArrayList();
- classPath.add(new File(new StringBuilder().append(workDir).append("/").toString()).toURI().toURL());
- classPath.add(file.toURI().toURL());
- classPath.add(new File(workDir, "classes/").toURI().toURL());
- File[] libs = new File(workDir, "lib").listFiles();
- if (libs != null) {
- for (int i = 0; i < libs.length; i++) {
- classPath.add(libs[i].toURI().toURL());
- }
- }
- loader = new URLClassLoader((URL[])classPath.toArray(new URL[0]));
- }
- return loader;
- }
- boolean useClientClassLoader() {
- return Boolean.parseBoolean(System.getenv("HADOOP_USE_CLIENT_CLASSLOADER"));
- }
- String getHadoopClasspath() {
- return System.getenv("HADOOP_CLASSPATH");
- }
- String getSystemClasses() {
- return System.getenv("HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES");
- }
- }
可以看到这个类将加载的jar解压掉,然后添加classpath路径,获取jar包的主函数,并执行主函数.
hadoop运行作业的脚本解析的更多相关文章
- 大数据时代之hadoop(二):hadoop脚本解析
“兵马未动,粮草先行”,要想深入的了解hadoop,我觉得启动或停止hadoop的脚本是必须要先了解的.说到底,hadoop就是一个分布式存储和计算框架,但是这个分布式环境是如何启动,管理的呢,我就带 ...
- 一文了解 Hadoop 运行机制
大数据技术栈在当下已经是比较成熟的了,Hadoop 作为大数据存储的基石,其重要程度不言而喻,作为一个想从 java 后端转向大数据开发的程序员来说,打好 Hadoop 基础,就相当于夯实建造房屋的地 ...
- SQL Server代理(10/12):使用代理账号运行作业
SQL Server代理是所有实时数据库的核心.代理有很多不明显的用法,因此系统的知识,对于开发人员还是DBA都是有用的.这系列文章会通俗介绍它的很多用法. 在这一系列的上一篇,你查看了msdb库下用 ...
- hadoop运行原理之Job运行(二) Job提交及初始化
本篇主要介绍Job从客户端提交到JobTracker及其被初始化的过程. 以WordCount为例,以前的程序都是通过JobClient.runJob()方法来提交Job,但是现在大多用Job.wai ...
- 使用MRUnit,Mockito和PowerMock进行Hadoop MapReduce作业的单元测试
0.preliminary 环境搭建 Setup development environment Download the latest version of MRUnit jar from Apac ...
- 宙斯是一个完整的Hadoop的作业平台[转]
https://github.com/alibaba/zeus 宙斯(zeus)是什么 宙斯是一个完整的Hadoop的作业平台从Hadoop任务的调试运行到生产任务的周期调度 宙斯支持任务的整个生命周 ...
- 在SQL Server Management Studio中可以运行作业但是用T-SQL运行则失败
原文:在SQL Server Management Studio中可以运行作业但是用T-SQL运行则失败 问题: 在SQL Server Management Studio中可以运行作业但是用T-SQ ...
- 老李推荐:第8章5节《MonkeyRunner源码剖析》MonkeyRunner启动运行过程-运行测试脚本
老李推荐:第8章5节<MonkeyRunner源码剖析>MonkeyRunner启动运行过程-运行测试脚本 poptest是国内唯一一家培养测试开发工程师的培训机构,以学员能胜任自动化 ...
- 使用Windows任务计划程序运行Windows PowerShell脚本
创建计划任务以运行PowerShell脚本 我需要创建一个计划任务来运行Windows PowerShell脚本的第一件事是我将执行的命令行.找到这个的简单方法是使用Run 命令.有时,我需要知道什么 ...
随机推荐
- Python处理Excel(转载)
1. Python 操作 Excel 的函数库 我主要尝试了 3 种读写 Excel 的方法: 1> xlrd, xlwt, xlutils: 这三个库的好处是不需要其它支持,在任何操作系统上都 ...
- ggplot2 分面相关设置(facet)
分面设置在ggplot2应该也是要经常用到的一项画图内容,在数据对比以及分类显示上有着极为重要的作用, 下面是两个经常要用到的分面函数. facet_wrap(facets, nrow = NULL, ...
- OpenCV常用函数分析
1. 聚类:将拥有最相似属性的数据归为一类. K-means聚类: python调用格式:compacness, labels, centers = cv2.kmeans(data, K, crite ...
- jsonp原来是这么回事,豁然开朗
什么是JSONP 先说说JSONP是怎么产生的: 其实网上关于JSONP的讲解有很多,但却千篇一律,而且云里雾里,对于很多刚接触的人来讲理解起来有些困难,小可不才,试着用自己的方式来阐释一下这个问题, ...
- 2016NEFU集训第n+3场 G - Tanya and Toys
Description In Berland recently a new collection of toys went on sale. This collection consists of 1 ...
- dell 去鼠标版功能widnows
桌面计算机(点击右键)----管理----设备管理器-----鼠标------选择触摸板(ps/2 兼容鼠标)---右击------跟新驱动-------浏览计算机查找------从计算机列表中选择- ...
- chapter6 深入了解函数
Lua函数是具有特定词法域的第一类值,与其他传统类型的值(string and number)具有相同的权利. 它可以保存在变量和table中,也可以把它当参数传递,也可以作为返回值. 在Lua中有个 ...
- windows任务计划程序路径设置
用任务计划启动程序,特别是脚本,比如我要启动python脚本,其中有一句是这么写的 BasePath = removeLastSlash(os.path.abspath("..\\..\\& ...
- laravel性能优化
1. 配置信息缓存 使用以下 Artisan 自带命令,把 config 文件夹里所有配置信息合并到一个文件里,减少运行时文件的载入数量: php artisan config:cache 上面命令会 ...
- 《C++ Primer》之重载操作符与转换(中)
赋值操作符 类赋值操作符接受类类型形参,通常,该形参是对类类型的 const 引用,但也可以是类类型或对类类型的非 const 引用.如果没有定义这个操作符,则编译器将合成它.类赋值操作符必须是类的成 ...