1. 一些疑问:
    1 全排序的话,最后的应该sortJob.setNumReduceTasks(1);
    2 如果多个reduce task都去修改 一个静态的 IntWritable IntWritable会乱序吧~
    输入数据:
    file1
    2
    32
    654
    32
    15
    756
    65223
    file2
    5956
    22
    650
    92
    file3
    26
    54
    6
  2.  
  3. import java.io.IOException;
  4.  
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.IntWritable;
  8. import org.apache.hadoop.io.NullWritable;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.mapreduce.Job;
  11. import org.apache.hadoop.mapreduce.Mapper;
  12. import org.apache.hadoop.mapreduce.Reducer;
  13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  14. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  15. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  16.  
  17. public class MySort {
  18.  
  19. public static class IntSortMapper extends Mapper<Object, Text, IntWritable, NullWritable>{
  20.  
  21. private IntWritable val = new IntWritable();
  22.  
  23. public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
  24. String line = value.toString().trim();
  25. val.set(Integer.parseInt(line));
  26. context.write(val, NullWritable.get());
  27. }
  28. }
  29.  
  30. public static class IntSortReducer extends Reducer<IntWritable, NullWritable, IntWritable,IntWritable>{
  31. private IntWritable k = new IntWritable();
  32. public void reduce(IntWritable key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException{
  33. k.set();
  34. for (NullWritable value : values) {
  35. context.write(k, key);
  36. }
  37. }
  38. }
  39.  
  40. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
  41. String dir_in = "hdfs://localhost:9000/in_sort";
  42. String dir_out = "hdfs://localhost:9000/out_sort";
  43.  
  44. Path in = new Path(dir_in);
  45. Path out = new Path(dir_out);
  46.  
  47. Configuration conf = new Configuration();
  48. Job sortJob = new Job(conf, "my_sort");
  49.  
  50. sortJob.setJarByClass(MySort.class);
  51.  
  52. sortJob.setInputFormatClass(TextInputFormat.class);
  53. sortJob.setMapperClass(IntSortMapper.class);
  54. //sortJob.setCombinerClass(SortReducer.class);
  55. //countJob.setPartitionerClass(HashPartitioner.class);
  56. sortJob.setMapOutputKeyClass(IntWritable.class);
  57. sortJob.setMapOutputValueClass(NullWritable.class);
  58.  
  59. FileInputFormat.addInputPath(sortJob, in);
  60.  
  61. sortJob.setReducerClass(IntSortReducer.class);
  62. sortJob.setNumReduceTasks();
  63. sortJob.setOutputKeyClass(IntWritable.class);
  64. sortJob.setOutputValueClass(IntWritable.class);
  65. //countJob.setOutputFormatClass(SequenceFileOutputFormat.class);
  66.  
  67. FileOutputFormat.setOutputPath(sortJob, out);
  68.  
  69. sortJob.waitForCompletion(true);
  70.  
  71. }
  72.  
  73. }
  1. 结果:
  1.  
  1. 修改reduce函数(不是用Iterable
  2. public static class IntSortReducer extends Reducer<IntWritable, NullWritable, IntWritable,IntWritable>{
  3. private IntWritable k = new IntWritable();
  4. public void reduce(IntWritable key, NullWritable value, Context context) throws IOException, InterruptedException{
  5. k.set();
  6. //for (NullWritable value : values) {
  7. context.write(k, key);
  8. //}
  9. }
  10. }
  1. 结果:(不是很理解,为啥去掉iterable后就只输出一个value key哪去了呢)
  1.  
  1. import java.io.IOException;
  2.  
  3. import org.apache.hadoop.conf.Configuration;
  4. import org.apache.hadoop.fs.Path;
  5. import org.apache.hadoop.io.IntWritable;
  6. import org.apache.hadoop.io.NullWritable;
  7. import org.apache.hadoop.io.Text;
  8. import org.apache.hadoop.mapreduce.Job;
  9. import org.apache.hadoop.mapreduce.Mapper;
  10. import org.apache.hadoop.mapreduce.Reducer;
  11. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  12. import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
  13. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  14.  
  15. public class MySort {
  16.  
  17. public static class IntSortMapper extends Mapper<Object, Text, IntWritable, NullWritable>{
  18.  
  19. private IntWritable val = new IntWritable();
  20.  
  21. public void map(Object key, Text value, Context context) throws IOException, InterruptedException{
  22. String line = value.toString().trim();
  23. val.set(Integer.parseInt(line));
  24. context.write(val, NullWritable.get());
  25. }
  26. }
  27.  
  28. public static class IntSortReducer extends Reducer<IntWritable, NullWritable, IntWritable,IntWritable>{
  29. private static IntWritable num = new IntWritable();
  30. public void reduce(IntWritable key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException{
  31.  
  32. for (NullWritable value : values) {
  33. context.write(num, key);
  34. num = new IntWritable(num.get() + );
  35. }
  36. }
  37. }
  38.  
  39. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
  40. String dir_in = "hdfs://localhost:9000/in_sort";
  41. String dir_out = "hdfs://localhost:9000/out_sort";
  42.  
  43. Path in = new Path(dir_in);
  44. Path out = new Path(dir_out);
  45.  
  46. Configuration conf = new Configuration();
  47. Job sortJob = new Job(conf, "my_sort");
  48.  
  49. sortJob.setJarByClass(MySort.class);
  50.  
  51. sortJob.setInputFormatClass(TextInputFormat.class);
  52. sortJob.setMapperClass(IntSortMapper.class);
  53. //sortJob.setCombinerClass(SortReducer.class);
  54. //countJob.setPartitionerClass(HashPartitioner.class);
  55. sortJob.setMapOutputKeyClass(IntWritable.class);
  56. sortJob.setMapOutputValueClass(NullWritable.class);
  57.  
  58. FileInputFormat.addInputPath(sortJob, in);
  59.  
  60. sortJob.setReducerClass(IntSortReducer.class);
  61. sortJob.setNumReduceTasks();
  62. sortJob.setOutputKeyClass(IntWritable.class);
  63. sortJob.setOutputValueClass(IntWritable.class);
  64. //countJob.setOutputFormatClass(SequenceFileOutputFormat.class);
  65.  
  66. FileOutputFormat.setOutputPath(sortJob, out);
  67.  
  68. sortJob.waitForCompletion(true);
  69.  
  70. }
  71.  
  72. }
    1    2
    2    6
    3    15
    4    22
    5    26
    6    32
    7    32
    8    54
    9    92
    10    650
    11    654
    12    756
    13    5956
    14    65223
  1.  
  1.  

Hadoop 学习笔记 (十) MapReduce实现排序 全局变量的更多相关文章

  1. Hadoop学习笔记—11.MapReduce中的排序和分组

    一.写在之前的 1.1 回顾Map阶段四大步骤 首先,我们回顾一下在MapReduce中,排序和分组在哪里被执行: 从上图中可以清楚地看出,在Step1.4也就是第四步中,需要对不同分区中的数据进行排 ...

  2. Hadoop学习笔记: MapReduce二次排序

    本文给出一个实现MapReduce二次排序的例子 package SortTest; import java.io.DataInput; import java.io.DataOutput; impo ...

  3. hadoop 学习笔记:mapreduce框架详解

    开始聊mapreduce,mapreduce是hadoop的计算框架,我学hadoop是从hive开始入手,再到hdfs,当我学习hdfs时候,就感觉到hdfs和mapreduce关系的紧密.这个可能 ...

  4. Hadoop学习笔记:MapReduce框架详解

    开始聊mapreduce,mapreduce是hadoop的计算框架,我学hadoop是从hive开始入手,再到hdfs,当我学习hdfs时候,就感觉到hdfs和mapreduce关系的紧密.这个可能 ...

  5. 【Big Data - Hadoop - MapReduce】hadoop 学习笔记:MapReduce框架详解

    开始聊MapReduce,MapReduce是Hadoop的计算框架,我学Hadoop是从Hive开始入手,再到hdfs,当我学习hdfs时候,就感觉到hdfs和mapreduce关系的紧密.这个可能 ...

  6. hadoop 学习笔记:mapreduce框架详解(转)

    原文:http://www.cnblogs.com/sharpxiajun/p/3151395.html(有删减) Mapreduce运行机制 下面我贴出几张图,这些图都是我在百度图片里找到的比较好的 ...

  7. Hadoop学习笔记—12.MapReduce中的常见算法

    一.MapReduce中有哪些常见算法 (1)经典之王:单词计数 这个是MapReduce的经典案例,经典的不能再经典了! (2)数据去重 "数据去重"主要是为了掌握和利用并行化思 ...

  8. Hadoop学习笔记: MapReduce Java编程简介

    概述 本文主要基于Hadoop 1.0.0后推出的新Java API为例介绍MapReduce的Java编程模型.新旧API主要区别在于新API(org.apache.hadoop.mapreduce ...

  9. hadoop 学习笔记 (十) mapreduce2.0

    MapReduce的特色---不擅长的方面 >实时计算 像mysql一样,在毫秒级或者秒级内返回结果 >流式计算 Mapreduce的输入数据时静态的,不能动态变化 MapReduce自身 ...

  10. 三、Hadoop学习笔记————从MapReduce到Yarn

    Yarn减轻了JobTracker的负担,对其进行了解耦

随机推荐

  1. Windows下Redis的安装使用[转]

    redis是一个key-value存储系统.和Memcached类似,它支持存储的value类型相对更多,包括string(字符串).list(链表).set(集合).zset(sorted set ...

  2. typename使用在模板中区分static成员和类型

    16.19 编写函数,接受一个容器的引用,打印容器中的元素,使用容器的size_type和size成员来控制打印元素的循环. 16.20 重写上一题的函数,使用begin和end返回的迭代器来控制循环 ...

  3. An NIO.2 primer--reference

    Part 1: The asynchronous channel APIs The More New I/O APIs for the Java™ Platform (NIO.2) is one of ...

  4. hdu2025java字符题

    查找最大元素 Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)Total Submiss ...

  5. SpringMVC注解@RequestMapping

        /**      * GET 查询      *      * @return 视图路径      */     @RequestMapping(value = {"/index&q ...

  6. android下4G上网卡

    架构: APP Call Trachker/SMS Dispatch/Service Tracker/Data Tracker ------------------------------------ ...

  7. Js 的常用方法:页面跳转,Session,类继承

    MyApp.Base = function () { } var basePrototype = MyApp.Base["prototype"]; //对象克隆方法 basePro ...

  8. Hadoop 系列 - (1) - 学习随笔 - 起源、构成

    起源:Hadoop是google 的集群系统的开源实现            --Google集群系统,:GFS(Google file system),MapReduce,BigTable(严格意义 ...

  9. 创建Java线程池

    线程池的作用: 线程池作用就是限制系统中执行线程的数量. 根据系统的环境情况,可以自动或手动设置线程数量,达到运行的最佳效果:少了浪费了系统资源,多了造成系统拥挤效率不高.用线程池控制线程数量,其他线 ...

  10. jquery动态插入行,不用拼写html,简洁版

    这个一个利用jquery实现动态插入输入行效果小功能,不用在javascript里拼写html字符串,更简洁.高效. html代码: <div class="fitem"&g ...