A Hadoop job may consist of many map tasks and reduce tasks. Therefore, debugging a
Hadoop job is often a complicated process. It is a good practice to first test a Hadoop job
using unit tests by running it with a subset of the data.
However, sometimes it is necessary to debug a Hadoop job in a distributed mode. To support
such cases, Hadoop provides a mechanism called debug scripts. This recipe explains how to
use debug scripts.

A debug script is a shell script, and Hadoop executes the script whenever a task encounters
an error. The script will have access to the $script, $stdout, $stderr, $syslog, and
$jobconfproperties, as environment variables populated by Hadoop. You can find a
sample script from resources/chapter3/debugscript. We can use the debug scripts
to copy all the logfiles to a single location, e-mail them to a single e-mail account, or perform
some analysis.
echo "Run the script" >> $LOG_FILE
echo $script >> $LOG_FILE
echo $stdout>> $LOG_FILE
echo $stderr>> $LOG_FILE
echo $syslog >> $LOG_FILE
echo $jobconf>> $LOG_FILE

when you execute this, you should pay attention to the execute path, or else it will not found debug script.

  1. package chapter3;
  3. import java.net.URI;
  5. import org.apache.hadoop.filecache.DistributedCache;
  6. import org.apache.hadoop.fs.FileStatus;
  7. import org.apache.hadoop.fs.FileSystem;
  8. import org.apache.hadoop.fs.Path;
  9. import org.apache.hadoop.io.IntWritable;
  10. import org.apache.hadoop.io.Text;
  11. import org.apache.hadoop.mapred.JobConf;
  12. import org.apache.hadoop.mapreduce.Job;
  13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  14. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  16. public class WordcountWithDebugScript {
  17. private static final String scriptFileLocation = "resources/chapter3/debugscript";
  18. private static final String HDFS_ROOT = "/debug";
  20. public static void setupFailedTaskScript(JobConf conf) throws Exception {
  22. // create a directory on HDFS where we'll upload the fail scripts
  23. FileSystem fs = FileSystem.get(conf);
  24. // Path debugDir = new Path("/debug");
  25. Path debugDir = new Path(HDFS_ROOT);
  27. // who knows what's already in this directory; let's just clear it.
  28. if (fs.exists(debugDir)) {
  29. fs.delete(debugDir, true);
  30. }
  32. // ...and then make sure it exists again
  33. fs.mkdirs(debugDir);
  35. // upload the local scripts into HDFS
  36. fs.copyFromLocalFile(new Path(scriptFileLocation), new Path(HDFS_ROOT
  37. + "/fail-script"));
  39. FileStatus[] list = fs.listStatus(new Path(HDFS_ROOT));
  40. if (list == null || list.length == 0) {
  41. System.out.println("No File found");
  42. } else {
  43. for (FileStatus f : list) {
  44. System.out.println("File found " + f.getPath());
  45. }
  46. }
  48. conf.setMapDebugScript("./fail-script");
  49. conf.setReduceDebugScript("./fail-script");
  50. // this create a simlink from the job directory to cache directory of
  51. // the mapper node
  52. DistributedCache.createSymlink(conf);
  54. URI fsUri = fs.getUri();
  56. String mapUriStr = fsUri.toString() + HDFS_ROOT
  57. + "/fail-script#fail-script";
  58. System.out.println("added " + mapUriStr + "to distributed cache 1");
  59. URI mapUri = new URI(mapUriStr);
  60. // Following copy the map uri to the cache directory of the job node
  61. DistributedCache.addCacheFile(mapUri, conf);
  62. }
  64. public static void main(String[] args) throws Exception {
  65. JobConf conf = new JobConf();
  66. setupFailedTaskScript(conf);
  67. Job job = new Job(conf, "word count");
  69. job.setJarByClass(FaultyWordCount.class);
  70. job.setMapperClass(FaultyWordCount.TokenizerMapper.class);
  71. job.setReducerClass(FaultyWordCount.IntSumReducer.class);
  72. job.setOutputKeyClass(Text.class);
  73. job.setOutputValueClass(IntWritable.class);
  74. FileSystem.get(conf).delete(new Path(args[1]), true);
  75. FileInputFormat.addInputPath(job, new Path(args[0]));
  76. FileOutputFormat.setOutputPath(job, new Path(args[1]));
  77. job.waitForCompletion(true);
  78. }
  80. }

