1、概念

2、代码示例

FlowSort

  1. package com.ares.hadoop.mr.flowsort;
  2.  
  3. import java.io.IOException;
  4.  
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.conf.Configured;
  7. import org.apache.hadoop.fs.Path;
  8. import org.apache.hadoop.io.LongWritable;
  9. import org.apache.hadoop.io.NullWritable;
  10. import org.apache.hadoop.io.Text;
  11. import org.apache.hadoop.mapreduce.Job;
  12. import org.apache.hadoop.mapreduce.Mapper;
  13. import org.apache.hadoop.mapreduce.Reducer;
  14. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  15. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  16. import org.apache.hadoop.util.StringUtils;
  17. import org.apache.hadoop.util.Tool;
  18. import org.apache.hadoop.util.ToolRunner;
  19. import org.apache.log4j.Logger;
  20.  
  21. import com.ares.hadoop.mr.exception.LineException;
  22.  
  23. public class FlowSort extends Configured implements Tool {
  24. private static final Logger LOGGER = Logger.getLogger(FlowSort.class);
  25. enum Counter {
  26. LINESKIP
  27. }
  28.  
  29. public static class FlowSortMapper extends Mapper<LongWritable, Text,
  30. FlowBean, NullWritable> {
  31. private String line;
  32. private int length;
  33. private final static char separator = '\t';
  34.  
  35. private String phoneNum;
  36. private long upFlow;
  37. private long downFlow;
  38. private long sumFlow;
  39.  
  40. private FlowBean flowBean = new FlowBean();
  41. private NullWritable nullWritable = NullWritable.get();
  42.  
  43. @Override
  44. protected void map(
  45. LongWritable key,
  46. Text value,
  47. Mapper<LongWritable, Text, FlowBean, NullWritable>.Context context)
  48. throws IOException, InterruptedException {
  49. // TODO Auto-generated method stub
  50. //super.map(key, value, context);
  51. String errMsg;
  52. try {
  53. line = value.toString();
  54. String[] fields = StringUtils.split(line, separator);
  55. length = fields.length;
  56. if (length != ) {
  57. throw new LineException(key.get() + ", " + line + " LENGTH INVALID, IGNORE...");
  58. }
  59.  
  60. phoneNum = fields[];
  61. upFlow = Long.parseLong(fields[]);
  62. downFlow = Long.parseLong(fields[]);
  63. sumFlow = Long.parseLong(fields[]);
  64.  
  65. flowBean.setPhoneNum(phoneNum);
  66. flowBean.setUpFlow(upFlow);
  67. flowBean.setDownFlow(downFlow);
  68. flowBean.setSumFlow(sumFlow);
  69.  
  70. context.write(flowBean, nullWritable);
  71. } catch (LineException e) {
  72. // TODO: handle exception
  73. LOGGER.error(e);
  74. System.out.println(e);
  75. context.getCounter(Counter.LINESKIP).increment();
  76. return;
  77. } catch (NumberFormatException e) {
  78. // TODO: handle exception
  79. errMsg = key.get() + ", " + line + " FLOW DATA INVALID, IGNORE...";
  80. LOGGER.error(errMsg);
  81. System.out.println(errMsg);
  82. context.getCounter(Counter.LINESKIP).increment();
  83. return;
  84. } catch (Exception e) {
  85. // TODO: handle exception
  86. LOGGER.error(e);
  87. System.out.println(e);
  88. context.getCounter(Counter.LINESKIP).increment();
  89. return;
  90. }
  91. }
  92. }
  93.  
  94. public static class FlowSortReducer extends Reducer<FlowBean, NullWritable,
  95. FlowBean, NullWritable> {
  96. @Override
  97. protected void reduce(
  98. FlowBean key,
  99. Iterable<NullWritable> values,
  100. Reducer<FlowBean, NullWritable, FlowBean, NullWritable>.Context context)
  101. throws IOException, InterruptedException {
  102. // TODO Auto-generated method stub
  103. //super.reduce(arg0, arg1, arg2);
  104. context.write(key, NullWritable.get());
  105. }
  106. }
  107.  
  108. @Override
  109. public int run(String[] args) throws Exception {
  110. // TODO Auto-generated method stub
  111. String errMsg = "FlowSort: TEST STARTED...";
  112. LOGGER.debug(errMsg);
  113. System.out.println(errMsg);
  114.  
  115. Configuration conf = new Configuration();
  116. //FOR Eclipse JVM Debug
  117. //conf.set("mapreduce.job.jar", "flowsum.jar");
  118. Job job = Job.getInstance(conf);
  119.  
  120. // JOB NAME
  121. job.setJobName("FlowSort");
  122.  
  123. // JOB MAPPER & REDUCER
  124. job.setJarByClass(FlowSort.class);
  125. job.setMapperClass(FlowSortMapper.class);
  126. job.setReducerClass(FlowSortReducer.class);
  127.  
  128. // MAP & REDUCE
  129. job.setOutputKeyClass(FlowBean.class);
  130. job.setOutputValueClass(NullWritable.class);
  131. // MAP
  132. job.setMapOutputKeyClass(FlowBean.class);
  133. job.setMapOutputValueClass(NullWritable.class);
  134.  
  135. // JOB INPUT & OUTPUT PATH
  136. //FileInputFormat.addInputPath(job, new Path(args[0]));
  137. FileInputFormat.setInputPaths(job, args[]);
  138. FileOutputFormat.setOutputPath(job, new Path(args[]));
  139.  
  140. // VERBOSE OUTPUT
  141. if (job.waitForCompletion(true)) {
  142. errMsg = "FlowSort: TEST SUCCESSFULLY...";
  143. LOGGER.debug(errMsg);
  144. System.out.println(errMsg);
  145. return ;
  146. } else {
  147. errMsg = "FlowSort: TEST FAILED...";
  148. LOGGER.debug(errMsg);
  149. System.out.println(errMsg);
  150. return ;
  151. }
  152.  
  153. }
  154.  
  155. public static void main(String[] args) throws Exception {
  156. if (args.length != ) {
  157. String errMsg = "FlowSort: ARGUMENTS ERROR";
  158. LOGGER.error(errMsg);
  159. System.out.println(errMsg);
  160. System.exit(-);
  161. }
  162.  
  163. int result = ToolRunner.run(new Configuration(), new FlowSort(), args);
  164. System.exit(result);
  165. }
  166. }

FlowBean

  1. package com.ares.hadoop.mr.flowsort;
  2.  
  3. import java.io.DataInput;
  4. import java.io.DataOutput;
  5. import java.io.IOException;
  6.  
  7. import org.apache.hadoop.io.WritableComparable;
  8.  
  9. public class FlowBean implements WritableComparable<FlowBean>{
  10. private String phoneNum;
  11. private long upFlow;
  12. private long downFlow;
  13. private long sumFlow;
  14.  
  15. public FlowBean() {
  16. // TODO Auto-generated constructor stub
  17. }
  18. // public FlowBean(String phoneNum, long upFlow, long downFlow, long sumFlow) {
  19. // super();
  20. // this.phoneNum = phoneNum;
  21. // this.upFlow = upFlow;
  22. // this.downFlow = downFlow;
  23. // this.sumFlow = sumFlow;
  24. // }
  25.  
  26. public String getPhoneNum() {
  27. return phoneNum;
  28. }
  29.  
  30. public void setPhoneNum(String phoneNum) {
  31. this.phoneNum = phoneNum;
  32. }
  33.  
  34. public long getUpFlow() {
  35. return upFlow;
  36. }
  37.  
  38. public void setUpFlow(long upFlow) {
  39. this.upFlow = upFlow;
  40. }
  41.  
  42. public long getDownFlow() {
  43. return downFlow;
  44. }
  45.  
  46. public void setDownFlow(long downFlow) {
  47. this.downFlow = downFlow;
  48. }
  49.  
  50. public long getSumFlow() {
  51. return sumFlow;
  52. }
  53.  
  54. public void setSumFlow(long sumFlow) {
  55. this.sumFlow = sumFlow;
  56. }
  57.  
  58. @Override
  59. public void readFields(DataInput in) throws IOException {
  60. // TODO Auto-generated method stub
  61. phoneNum = in.readUTF();
  62. upFlow = in.readLong();
  63. downFlow = in.readLong();
  64. sumFlow = in.readLong();
  65. }
  66.  
  67. @Override
  68. public void write(DataOutput out) throws IOException {
  69. // TODO Auto-generated method stub
  70. out.writeUTF(phoneNum);
  71. out.writeLong(upFlow);
  72. out.writeLong(downFlow);
  73. out.writeLong(sumFlow);
  74. }
  75.  
  76. @Override
  77. public String toString() {
  78. return "" + phoneNum + "\t" + upFlow + "\t" + downFlow + "\t" + sumFlow;
  79. }
  80.  
  81. @Override
  82. public int compareTo(FlowBean o) {
  83. // TODO Auto-generated method stub
  84. return sumFlow>o.getSumFlow()?-:;
  85. }
  86.  
  87. }

LineException

  1. package com.ares.hadoop.mr.exception;
  2.  
  3. public class LineException extends RuntimeException {
  4. private static final long serialVersionUID = 2536144005398058435L;
  5.  
  6. public LineException() {
  7. super();
  8. // TODO Auto-generated constructor stub
  9. }
  10.  
  11. public LineException(String message, Throwable cause) {
  12. super(message, cause);
  13. // TODO Auto-generated constructor stub
  14. }
  15.  
  16. public LineException(String message) {
  17. super(message);
  18. // TODO Auto-generated constructor stub
  19. }
  20.  
  21. public LineException(Throwable cause) {
  22. super(cause);
  23. // TODO Auto-generated constructor stub
  24. }
  25. }

【Hadoop】Hadoop MR 自定义排序的更多相关文章

  1. hadoop提交作业自定义排序和分组

    现有数据如下: 3 3 3 2 3 1 2 2 2 1 1 1 要求为: 先按第一列从小到大排序,如果第一列相同,按第二列从小到大排序 如果是hadoop默认的排序方式,只能比较key,也就是第一列, ...

  2. 2 weekend110的hadoop的自定义排序实现 + mr程序中自定义分组的实现

    我想得到按流量来排序,而且还是倒序,怎么达到实现呢? 达到下面这种效果, 默认是根据key来排, 我想根据value里的某个排, 解决思路:将value里的某个,放到key里去,然后来排 下面,开始w ...

  3. Hadoop学习之自定义二次排序

    一.概述    MapReduce框架对处理结果的输出会根据key值进行默认的排序,这个默认排序可以满足一部分需求,但是也是十分有限的.在我们实际的需求当中,往 往有要对reduce输出结果进行二次排 ...

  4. 自定义排序及Hadoop序列化

    自定义排序 将两列数据进行排序,第一列按照升序排列,当第一列相同时,第二列升序排列. 在map和reduce阶段进行排序时,比较的是k2.v2是不参与排序比较的.如果要想让v2也进行排序,需要把k2和 ...

  5. Hadoop学习之路(7)MapReduce自定义排序

    本文测试文本: tom 20 8000 nancy 22 8000 ketty 22 9000 stone 19 10000 green 19 11000 white 39 29000 socrate ...

  6. Hadoop【MR的分区、排序、分组】

    [toc] 一.分区 问题:按照条件将结果输出到不同文件中 自定义分区步骤 1.自定义继承Partitioner类,重写getPartition()方法 2.在job驱动Driver中设置自定义的Pa ...

  7. Hadoop MapReduce 二次排序原理及其应用

    关于二次排序主要涉及到这么几个东西: 在0.20.0 以前使用的是 setPartitionerClass setOutputkeyComparatorClass setOutputValueGrou ...

  8. Hadoop【MR开发规范、序列化】

    Hadoop[MR开发规范.序列化] 目录 Hadoop[MR开发规范.序列化] 一.MapReduce编程规范 1.Mapper阶段 2.Reducer阶段 3.Driver阶段 二.WordCou ...

  9. Hadoop基础-MapReduce的排序

    Hadoop基础-MapReduce的排序 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任. 一.MapReduce的排序分类 1>.部分排序 部分排序是对单个分区进行排序,举个 ...

随机推荐

  1. bzoj 2618 半平面交模板+学习笔记

    题目大意 给你n个凸多边形,求多边形的交的面积 分析 题意\(=\)给你一堆边,让你求半平面交的面积 做法 半平面交模板 1.定义半平面为向量的左侧 2.将所有向量的起点放到一个中心,以中心参照进行逆 ...

  2. VSM and VEM Modules

    Information About Modules Cisco Nexus 1000V manages a data center defined by a VirtualCenter. Each s ...

  3. 过河(DP)

    原题传送门 这道题要用到压缩的思想(原来DP还能这么用...) 其实很简单,假如我们要到某一个位置w 如果我们原位置为Q 很显然,如果(W-Q>=s*t)那么我们一定能到达W 换言之,就是如果我 ...

  4. CString::GetLength()获得字节数

    按照MSDN的说吗,在选用MBCS多字节字符串编码时,该方法会得到正确的字节数.此时没有问题. For multibyte character sets (MBCS), GetLength count ...

  5. hihocoder1236(2015长春网赛J题) Scores(bitset && 分块)

    题意:给你50000个五维点(a1,a2,a3,a4,a5),50000个询问(q1,q2,q3,q4,q5),问已知点里有多少个点(x1,x2,x3,x4,x5)满足(xi<=qi,i=1,2 ...

  6. [BZOJ1052][HAOI2007]覆盖问题 二分+贪心

    1052: [HAOI2007]覆盖问题 Time Limit: 10 Sec  Memory Limit: 162 MB Submit: 2053  Solved: 959 [Submit][Sta ...

  7. (29)C#多线程

    使用线程的原因 1.不希望用户界面停止响应. 2.所有需要等待的操作,如文件.数据库或网络访问需要一定的时间. 一个进程的多个线程可以同时运行不同cpu或多核cpu的不同内核上 注意多线程访问相同的数 ...

  8. Codeforces 954I Yet Another String Matching Problem(并查集 + FFT)

    题目链接  Educational Codeforces Round 40  Problem I 题意  定义两个长度相等的字符串之间的距离为:   把两个字符串中所有同一种字符变成另外一种,使得两个 ...

  9. 在CentOS 7上安装Node.js

    一.安装1.进入官网下载最新版本https://nodejs.org/en/ 选择下载后上传或直接使用wget下载 wget https://nodejs.org/dist/v8.11.2/node- ...

  10. jcl sort comp3 to 表示型

    Lets say your packed data is at 10th column and is of length 6, S9(4)V99 You could try the following ...