1. #!/usr/bin/env bash
  2.  
  3. # Licensed to the Apache Software Foundation (ASF) under one or more
  4. # contributor license agreements. See the NOTICE file distributed with
  5. # this work for additional information regarding copyright ownership.
  6. # The ASF licenses this file to You under the Apache License, Version 2.0
  7. # (the "License"); you may not use this file except in compliance with
  8. # the License. You may obtain a copy of the License at
  9. #
  10. # http://www.apache.org/licenses/LICENSE-2.0
  11. #
  12. # Unless required by applicable law or agreed to in writing, software
  13. # distributed under the License is distributed on an "AS IS" BASIS,
  14. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  15. # See the License for the specific language governing permissions and
  16. # limitations under the License.
  17.  
  18. # This script runs the hadoop core commands.
  19.  
  20. bin=`which $`
  21. bin=`dirname ${bin}`
  22. bin=`cd "$bin"; pwd`
  23.  
  24. DEFAULT_LIBEXEC_DIR="$bin"/../libexec
  25.  
  26. HADOOP_LIBEXEC_DIR=${HADOOP_LIBEXEC_DIR:-$DEFAULT_LIBEXEC_DIR}
  27. . $HADOOP_LIBEXEC_DIR/hadoop-config.sh
  28.  
  29. function print_usage(){
  30. echo "Usage: hadoop [--config confdir] [COMMAND | CLASSNAME]"
  31. echo " CLASSNAME run the class named CLASSNAME"
  32. echo " or"
  33. echo " where COMMAND is one of:"
  34. echo " fs run a generic filesystem user client"
  35. echo " version print the version"
  36. echo " jar <jar> run a jar file"
  37. echo " note: please use \"yarn jar\" to launch"
  38. echo " YARN applications, not this command."
  39. echo " checknative [-a|-h] check native hadoop and compression libraries availability"
  40. echo " distcp <srcurl> <desturl> copy file or directories recursively"
  41. echo " archive -archiveName NAME -p <parent path> <src>* <dest> create a hadoop archive"
  42. echo " classpath prints the class path needed to get the"
  43. echo " credential interact with credential providers"
  44. echo " Hadoop jar and the required libraries"
  45. echo " daemonlog get/set the log level for each daemon"
  46. echo " trace view and modify Hadoop tracing settings"
  47. echo ""
  48. echo "Most commands print help when invoked w/o parameters."
  49. }
  50.  
  51. if [ $# = ]; then
  52. print_usage
  53. exit
  54. fi
  55.  
  56. COMMAND=$
  57. case $COMMAND in
  58. # usage flags
  59. --help|-help|-h)
  60. print_usage
  61. exit
  62. ;;
  63.  
  64. #hdfs commands
  65. namenode|secondarynamenode|datanode|dfs|dfsadmin|fsck|balancer|fetchdt|oiv|dfsgroups|portmap|nfs3)
  66. echo "DEPRECATED: Use of this script to execute hdfs command is deprecated." >&
  67. echo "Instead use the hdfs command for it." >&
  68. echo "" >&
  69. #try to locate hdfs and if present, delegate to it.
  70. shift
  71. if [ -f "${HADOOP_HDFS_HOME}"/bin/hdfs ]; then
  72. exec "${HADOOP_HDFS_HOME}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
  73. elif [ -f "${HADOOP_PREFIX}"/bin/hdfs ]; then
  74. exec "${HADOOP_PREFIX}"/bin/hdfs ${COMMAND/dfsgroups/groups} "$@"
  75. else
  76. echo "HADOOP_HDFS_HOME not found!"
  77. exit
  78. fi
  79. ;;
  80.  
  81. #mapred commands for backwards compatibility
  82. pipes|job|queue|mrgroups|mradmin|jobtracker|tasktracker)
  83. echo "DEPRECATED: Use of this script to execute mapred command is deprecated." >&
  84. echo "Instead use the mapred command for it." >&
  85. echo "" >&
  86. #try to locate mapred and if present, delegate to it.
  87. shift
  88. if [ -f "${HADOOP_MAPRED_HOME}"/bin/mapred ]; then
  89. exec "${HADOOP_MAPRED_HOME}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
  90. elif [ -f "${HADOOP_PREFIX}"/bin/mapred ]; then
  91. exec "${HADOOP_PREFIX}"/bin/mapred ${COMMAND/mrgroups/groups} "$@"
  92. else
  93. echo "HADOOP_MAPRED_HOME not found!"
  94. exit
  95. fi
  96. ;;
  97.  
  98. #core commands
  99. *)
  100. # the core commands
  101. if [ "$COMMAND" = "fs" ] ; then
  102. CLASS=org.apache.hadoop.fs.FsShell
  103. elif [ "$COMMAND" = "version" ] ; then
  104. CLASS=org.apache.hadoop.util.VersionInfo
  105. elif [ "$COMMAND" = "jar" ] ; then
  106. CLASS=org.apache.hadoop.util.RunJar
  107. if [[ -n "${YARN_OPTS}" ]] || [[ -n "${YARN_CLIENT_OPTS}" ]]; then
  108. echo "WARNING: Use \"yarn jar\" to launch YARN applications." >&
  109. fi
  110. elif [ "$COMMAND" = "key" ] ; then
  111. CLASS=org.apache.hadoop.crypto.key.KeyShell
  112. elif [ "$COMMAND" = "checknative" ] ; then
  113. CLASS=org.apache.hadoop.util.NativeLibraryChecker
  114. elif [ "$COMMAND" = "distcp" ] ; then
  115. CLASS=org.apache.hadoop.tools.DistCp
  116. CLASSPATH=${CLASSPATH}:${TOOL_PATH}
  117. elif [ "$COMMAND" = "daemonlog" ] ; then
  118. CLASS=org.apache.hadoop.log.LogLevel
  119. elif [ "$COMMAND" = "archive" ] ; then
  120. CLASS=org.apache.hadoop.tools.HadoopArchives
  121. CLASSPATH=${CLASSPATH}:${TOOL_PATH}
  122. elif [ "$COMMAND" = "credential" ] ; then
  123. CLASS=org.apache.hadoop.security.alias.CredentialShell
  124. elif [ "$COMMAND" = "trace" ] ; then
  125. CLASS=org.apache.hadoop.tracing.TraceAdmin
  126. elif [ "$COMMAND" = "classpath" ] ; then
  127. if [ "$#" -gt ]; then
  128. CLASS=org.apache.hadoop.util.Classpath
  129. else
  130. # No need to bother starting up a JVM for this simple case.
  131. if $cygwin; then
  132. CLASSPATH=$(cygpath -p -w "$CLASSPATH" >/dev/null)
  133. fi
  134. echo $CLASSPATH
  135. exit
  136. fi
  137. elif [[ "$COMMAND" = -* ]] ; then
  138. # class and package names cannot begin with a -
  139. echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
  140. exit
  141. else
  142. CLASS=$COMMAND
  143. fi
  144.  
  145. # cygwin path translation
  146. if $cygwin; then
  147. CLASSPATH=$(cygpath -p -w "$CLASSPATH" >/dev/null)
  148. HADOOP_LOG_DIR=$(cygpath -w "$HADOOP_LOG_DIR" >/dev/null)
  149. HADOOP_PREFIX=$(cygpath -w "$HADOOP_PREFIX" >/dev/null)
  150. HADOOP_CONF_DIR=$(cygpath -w "$HADOOP_CONF_DIR" >/dev/null)
  151. HADOOP_COMMON_HOME=$(cygpath -w "$HADOOP_COMMON_HOME" >/dev/null)
  152. HADOOP_HDFS_HOME=$(cygpath -w "$HADOOP_HDFS_HOME" >/dev/null)
  153. HADOOP_YARN_HOME=$(cygpath -w "$HADOOP_YARN_HOME" >/dev/null)
  154. HADOOP_MAPRED_HOME=$(cygpath -w "$HADOOP_MAPRED_HOME" >/dev/null)
  155. fi
  156.  
  157. shift
  158.  
  159. # Always respect HADOOP_OPTS and HADOOP_CLIENT_OPTS
  160. HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
  161.  
  162. #make sure security appender is turned off
  163. HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
  164.  
  165. export CLASSPATH=$CLASSPATH
  166. exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
  167. ;;
  168.  
  169. esac

  可以看到当hadoop脚本运行jar包时,会执行org.apache.hadoop.util.RunJar脚本.

  1. package org.apache.hadoop.util;
  2.  
  3. import java.io.File;
  4. import java.io.FileOutputStream;
  5. import java.io.IOException;
  6. import java.io.InputStream;
  7. import java.io.OutputStream;
  8. import java.io.PrintStream;
  9. import java.lang.reflect.Array;
  10. import java.lang.reflect.InvocationTargetException;
  11. import java.lang.reflect.Method;
  12. import java.net.MalformedURLException;
  13. import java.net.URI;
  14. import java.net.URL;
  15. import java.net.URLClassLoader;
  16. import java.util.ArrayList;
  17. import java.util.Arrays;
  18. import java.util.Enumeration;
  19. import java.util.List;
  20. import java.util.jar.Attributes;
  21. import java.util.jar.JarEntry;
  22. import java.util.jar.JarFile;
  23. import java.util.jar.Manifest;
  24. import java.util.regex.Matcher;
  25. import java.util.regex.Pattern;
  26. import org.apache.hadoop.classification.InterfaceAudience.Private;
  27. import org.apache.hadoop.classification.InterfaceStability.Unstable;
  28. import org.apache.hadoop.fs.FileUtil;
  29. import org.apache.hadoop.io.IOUtils;
  30.  
  31. @InterfaceAudience.Private
  32. @InterfaceStability.Unstable
  33. public class RunJar
  34. {
  35. public static final Pattern MATCH_ANY = Pattern.compile(".*");
  36. public static final int SHUTDOWN_HOOK_PRIORITY = 10;
  37. public static final String HADOOP_USE_CLIENT_CLASSLOADER = "HADOOP_USE_CLIENT_CLASSLOADER";
  38. public static final String HADOOP_CLASSPATH = "HADOOP_CLASSPATH";
  39. public static final String HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES = "HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES";
  40.  
  41. public static void unJar(File jarFile, File toDir)
  42. throws IOException
  43. {
  44. unJar(jarFile, toDir, MATCH_ANY);
  45. }
  46.  
  47. public static void unJar(File jarFile, File toDir, Pattern unpackRegex)
  48. throws IOException
  49. {
  50. JarFile jar = new JarFile(jarFile);
  51. try {
  52. Enumeration entries = jar.entries();
  53. while (entries.hasMoreElements()) {
  54. JarEntry entry = (JarEntry)entries.nextElement();
  55. if ((!entry.isDirectory()) && (unpackRegex.matcher(entry.getName()).matches()))
  56. {
  57. InputStream in = jar.getInputStream(entry);
  58. try {
  59. File file = new File(toDir, entry.getName());
  60. ensureDirectory(file.getParentFile());
  61. OutputStream out = new FileOutputStream(file);
  62. try {
  63. IOUtils.copyBytes(in, out, 8192);
  64. } finally {
  65. }
  66. }
  67. finally {
  68. }
  69. }
  70. }
  71. }
  72. finally {
  73. jar.close();
  74. }
  75. }
  76.  
  77. private static void ensureDirectory(File dir)
  78. throws IOException
  79. {
  80. if ((!dir.mkdirs()) && (!dir.isDirectory()))
  81. throw new IOException(new StringBuilder().append("Mkdirs failed to create ").append(dir.toString()).toString());
  82. }
  83.  
  84. public static void main(String[] args)
  85. throws Throwable
  86. {
  87. new RunJar().run(args);
  88. }
  89.  
  90. public void run(String[] args) throws Throwable {
  91. String usage = "RunJar jarFile [mainClass] args...";
  92.  
  93. if (args.length < 1) {
  94. System.err.println(usage);
  95. System.exit(-1);
  96. }
  97.  
  98. int firstArg = 0;
  99. String fileName = args[(firstArg++)];
  100. File file = new File(fileName);
  101. if ((!file.exists()) || (!file.isFile())) {
  102. System.err.println(new StringBuilder().append("Not a valid JAR: ").append(file.getCanonicalPath()).toString());
  103. System.exit(-1);
  104. }
  105. String mainClassName = null;
  106. JarFile jarFile;
  107. try {
  108. jarFile = new JarFile(fileName);
  109. } catch (IOException io) {
  110. throw new IOException(new StringBuilder().append("Error opening job jar: ").append(fileName).toString()).initCause(io);
  111. }
  112.  
  113. Manifest manifest = jarFile.getManifest();
  114. if (manifest != null) {
  115. mainClassName = manifest.getMainAttributes().getValue("Main-Class");
  116. }
  117. jarFile.close();
  118.  
  119. if (mainClassName == null) {
  120. if (args.length < 2) {
  121. System.err.println(usage);
  122. System.exit(-1);
  123. }
  124. mainClassName = args[(firstArg++)];
  125. }mainClassName = mainClassName.replaceAll("/", ".");
  126.  
  127. File tmpDir = new File(System.getProperty("java.io.tmpdir"));
  128. ensureDirectory(tmpDir);
  129. final File workDir;
  130. try { workDir = File.createTempFile("hadoop-unjar", "", tmpDir); }
  131. catch (IOException ioe)
  132. {
  133. System.err.println(new StringBuilder().append("Error creating temp dir in java.io.tmpdir ").append(tmpDir).append(" due to ").append(ioe.getMessage()).toString());
  134.  
  135. System.exit(-1);
  136. return;
  137. }
  138.  
  139. if (!workDir.delete()) {
  140. System.err.println(new StringBuilder().append("Delete failed for ").append(workDir).toString());
  141. System.exit(-1);
  142. }
  143. ensureDirectory(workDir);
  144.  
  145. ShutdownHookManager.get().addShutdownHook(new Runnable()
  146. {
  147. public void run()
  148. {
  149. FileUtil.fullyDelete(workDir);
  150. }
  151. }
  152. , 10);
  153.  
  154. unJar(file, workDir);
  155.  
  156. ClassLoader loader = createClassLoader(file, workDir);
  157.  
  158. Thread.currentThread().setContextClassLoader(loader);
  159. Class mainClass = Class.forName(mainClassName, true, loader);
  160. Method main = mainClass.getMethod("main", new Class[] { Array.newInstance(String.class, 0).getClass() });
  161.  
  162. String[] newArgs = (String[])Arrays.asList(args).subList(firstArg, args.length).toArray(new String[0]);
  163. try
  164. {
  165. main.invoke(null, new Object[] { newArgs });
  166. } catch (InvocationTargetException e) {
  167. throw e.getTargetException();
  168. }
  169. }
  170.  
  171. private ClassLoader createClassLoader(File file, File workDir)
  172. throws MalformedURLException
  173. {
  174. ClassLoader loader;
  175. ClassLoader loader;
  176. if (useClientClassLoader()) {
  177. StringBuilder sb = new StringBuilder();
  178. sb.append(new StringBuilder().append(workDir).append("/").toString()).append(File.pathSeparator).append(file).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/classes/").toString()).append(File.pathSeparator).append(new StringBuilder().append(workDir).append("/lib/*").toString());
  179.  
  180. String hadoopClasspath = getHadoopClasspath();
  181. if ((hadoopClasspath != null) && (!hadoopClasspath.isEmpty())) {
  182. sb.append(File.pathSeparator).append(hadoopClasspath);
  183. }
  184. String clientClasspath = sb.toString();
  185.  
  186. String systemClasses = getSystemClasses();
  187. List systemClassesList = systemClasses == null ? null : Arrays.asList(StringUtils.getTrimmedStrings(systemClasses));
  188.  
  189. loader = new ApplicationClassLoader(clientClasspath, getClass().getClassLoader(), systemClassesList);
  190. }
  191. else {
  192. List classPath = new ArrayList();
  193. classPath.add(new File(new StringBuilder().append(workDir).append("/").toString()).toURI().toURL());
  194. classPath.add(file.toURI().toURL());
  195. classPath.add(new File(workDir, "classes/").toURI().toURL());
  196. File[] libs = new File(workDir, "lib").listFiles();
  197. if (libs != null) {
  198. for (int i = 0; i < libs.length; i++) {
  199. classPath.add(libs[i].toURI().toURL());
  200. }
  201. }
  202.  
  203. loader = new URLClassLoader((URL[])classPath.toArray(new URL[0]));
  204. }
  205. return loader;
  206. }
  207.  
  208. boolean useClientClassLoader() {
  209. return Boolean.parseBoolean(System.getenv("HADOOP_USE_CLIENT_CLASSLOADER"));
  210. }
  211.  
  212. String getHadoopClasspath() {
  213. return System.getenv("HADOOP_CLASSPATH");
  214. }
  215.  
  216. String getSystemClasses() {
  217. return System.getenv("HADOOP_CLIENT_CLASSLOADER_SYSTEM_CLASSES");
  218. }
  219. }

  可以看到这个类将加载的jar解压掉,然后添加classpath路径,获取jar包的主函数,并执行主函数.

hadoop运行作业的脚本解析的更多相关文章

  1. 大数据时代之hadoop(二):hadoop脚本解析

    “兵马未动,粮草先行”,要想深入的了解hadoop,我觉得启动或停止hadoop的脚本是必须要先了解的.说到底,hadoop就是一个分布式存储和计算框架,但是这个分布式环境是如何启动,管理的呢,我就带 ...

  2. 一文了解 Hadoop 运行机制

    大数据技术栈在当下已经是比较成熟的了,Hadoop 作为大数据存储的基石,其重要程度不言而喻,作为一个想从 java 后端转向大数据开发的程序员来说,打好 Hadoop 基础,就相当于夯实建造房屋的地 ...

  3. SQL Server代理(10/12):使用代理账号运行作业

    SQL Server代理是所有实时数据库的核心.代理有很多不明显的用法,因此系统的知识,对于开发人员还是DBA都是有用的.这系列文章会通俗介绍它的很多用法. 在这一系列的上一篇,你查看了msdb库下用 ...

  4. hadoop运行原理之Job运行(二) Job提交及初始化

    本篇主要介绍Job从客户端提交到JobTracker及其被初始化的过程. 以WordCount为例,以前的程序都是通过JobClient.runJob()方法来提交Job,但是现在大多用Job.wai ...

  5. 使用MRUnit,Mockito和PowerMock进行Hadoop MapReduce作业的单元测试

    0.preliminary 环境搭建 Setup development environment Download the latest version of MRUnit jar from Apac ...

  6. 宙斯是一个完整的Hadoop的作业平台[转]

    https://github.com/alibaba/zeus 宙斯(zeus)是什么 宙斯是一个完整的Hadoop的作业平台从Hadoop任务的调试运行到生产任务的周期调度 宙斯支持任务的整个生命周 ...

  7. 在SQL Server Management Studio中可以运行作业但是用T-SQL运行则失败

    原文:在SQL Server Management Studio中可以运行作业但是用T-SQL运行则失败 问题: 在SQL Server Management Studio中可以运行作业但是用T-SQ ...

  8. 老李推荐:第8章5节《MonkeyRunner源码剖析》MonkeyRunner启动运行过程-运行测试脚本

    老李推荐:第8章5节<MonkeyRunner源码剖析>MonkeyRunner启动运行过程-运行测试脚本   poptest是国内唯一一家培养测试开发工程师的培训机构,以学员能胜任自动化 ...

  9. 使用Windows任务计划程序运行Windows PowerShell脚本

    创建计划任务以运行PowerShell脚本 我需要创建一个计划任务来运行Windows PowerShell脚本的第一件事是我将执行的命令行.找到这个的简单方法是使用Run 命令.有时,我需要知道什么 ...

随机推荐

  1. Python处理Excel(转载)

    1. Python 操作 Excel 的函数库 我主要尝试了 3 种读写 Excel 的方法: 1> xlrd, xlwt, xlutils: 这三个库的好处是不需要其它支持,在任何操作系统上都 ...

  2. ggplot2 分面相关设置(facet)

    分面设置在ggplot2应该也是要经常用到的一项画图内容,在数据对比以及分类显示上有着极为重要的作用, 下面是两个经常要用到的分面函数. facet_wrap(facets, nrow = NULL, ...

  3. OpenCV常用函数分析

    1. 聚类:将拥有最相似属性的数据归为一类. K-means聚类: python调用格式:compacness, labels, centers = cv2.kmeans(data, K, crite ...

  4. jsonp原来是这么回事,豁然开朗

    什么是JSONP 先说说JSONP是怎么产生的: 其实网上关于JSONP的讲解有很多,但却千篇一律,而且云里雾里,对于很多刚接触的人来讲理解起来有些困难,小可不才,试着用自己的方式来阐释一下这个问题, ...

  5. 2016NEFU集训第n+3场 G - Tanya and Toys

    Description In Berland recently a new collection of toys went on sale. This collection consists of 1 ...

  6. dell 去鼠标版功能widnows

    桌面计算机(点击右键)----管理----设备管理器-----鼠标------选择触摸板(ps/2 兼容鼠标)---右击------跟新驱动-------浏览计算机查找------从计算机列表中选择- ...

  7. chapter6 深入了解函数

    Lua函数是具有特定词法域的第一类值,与其他传统类型的值(string and number)具有相同的权利. 它可以保存在变量和table中,也可以把它当参数传递,也可以作为返回值. 在Lua中有个 ...

  8. windows任务计划程序路径设置

    用任务计划启动程序,特别是脚本,比如我要启动python脚本,其中有一句是这么写的 BasePath = removeLastSlash(os.path.abspath("..\\..\\& ...

  9. laravel性能优化

    1. 配置信息缓存 使用以下 Artisan 自带命令,把 config 文件夹里所有配置信息合并到一个文件里,减少运行时文件的载入数量: php artisan config:cache 上面命令会 ...

  10. 《C++ Primer》之重载操作符与转换(中)

    赋值操作符 类赋值操作符接受类类型形参,通常,该形参是对类类型的 const 引用,但也可以是类类型或对类类型的非 const 引用.如果没有定义这个操作符,则编译器将合成它.类赋值操作符必须是类的成 ...