【Hadoop】Hadoop MR 自定义排序
1、概念
2、代码示例
FlowSort
package com.ares.hadoop.mr.flowsort; import java.io.IOException; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger; import com.ares.hadoop.mr.exception.LineException; public class FlowSort extends Configured implements Tool {
private static final Logger LOGGER = Logger.getLogger(FlowSort.class);
enum Counter {
LINESKIP
} public static class FlowSortMapper extends Mapper<LongWritable, Text,
FlowBean, NullWritable> {
private String line;
private int length;
private final static char separator = '\t'; private String phoneNum;
private long upFlow;
private long downFlow;
private long sumFlow; private FlowBean flowBean = new FlowBean();
private NullWritable nullWritable = NullWritable.get(); @Override
protected void map(
LongWritable key,
Text value,
Mapper<LongWritable, Text, FlowBean, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.map(key, value, context);
String errMsg;
try {
line = value.toString();
String[] fields = StringUtils.split(line, separator);
length = fields.length;
if (length != ) {
throw new LineException(key.get() + ", " + line + " LENGTH INVALID, IGNORE...");
} phoneNum = fields[];
upFlow = Long.parseLong(fields[]);
downFlow = Long.parseLong(fields[]);
sumFlow = Long.parseLong(fields[]); flowBean.setPhoneNum(phoneNum);
flowBean.setUpFlow(upFlow);
flowBean.setDownFlow(downFlow);
flowBean.setSumFlow(sumFlow); context.write(flowBean, nullWritable);
} catch (LineException e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (NumberFormatException e) {
// TODO: handle exception
errMsg = key.get() + ", " + line + " FLOW DATA INVALID, IGNORE...";
LOGGER.error(errMsg);
System.out.println(errMsg);
context.getCounter(Counter.LINESKIP).increment();
return;
} catch (Exception e) {
// TODO: handle exception
LOGGER.error(e);
System.out.println(e);
context.getCounter(Counter.LINESKIP).increment();
return;
}
}
} public static class FlowSortReducer extends Reducer<FlowBean, NullWritable,
FlowBean, NullWritable> {
@Override
protected void reduce(
FlowBean key,
Iterable<NullWritable> values,
Reducer<FlowBean, NullWritable, FlowBean, NullWritable>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.reduce(arg0, arg1, arg2);
context.write(key, NullWritable.get());
}
} @Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
String errMsg = "FlowSort: TEST STARTED...";
LOGGER.debug(errMsg);
System.out.println(errMsg); Configuration conf = new Configuration();
//FOR Eclipse JVM Debug
//conf.set("mapreduce.job.jar", "flowsum.jar");
Job job = Job.getInstance(conf); // JOB NAME
job.setJobName("FlowSort"); // JOB MAPPER & REDUCER
job.setJarByClass(FlowSort.class);
job.setMapperClass(FlowSortMapper.class);
job.setReducerClass(FlowSortReducer.class); // MAP & REDUCE
job.setOutputKeyClass(FlowBean.class);
job.setOutputValueClass(NullWritable.class);
// MAP
job.setMapOutputKeyClass(FlowBean.class);
job.setMapOutputValueClass(NullWritable.class); // JOB INPUT & OUTPUT PATH
//FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.setInputPaths(job, args[]);
FileOutputFormat.setOutputPath(job, new Path(args[])); // VERBOSE OUTPUT
if (job.waitForCompletion(true)) {
errMsg = "FlowSort: TEST SUCCESSFULLY...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} else {
errMsg = "FlowSort: TEST FAILED...";
LOGGER.debug(errMsg);
System.out.println(errMsg);
return ;
} } public static void main(String[] args) throws Exception {
if (args.length != ) {
String errMsg = "FlowSort: ARGUMENTS ERROR";
LOGGER.error(errMsg);
System.out.println(errMsg);
System.exit(-);
} int result = ToolRunner.run(new Configuration(), new FlowSort(), args);
System.exit(result);
}
}
FlowBean
package com.ares.hadoop.mr.flowsort; import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException; import org.apache.hadoop.io.WritableComparable; public class FlowBean implements WritableComparable<FlowBean>{
private String phoneNum;
private long upFlow;
private long downFlow;
private long sumFlow; public FlowBean() {
// TODO Auto-generated constructor stub
}
// public FlowBean(String phoneNum, long upFlow, long downFlow, long sumFlow) {
// super();
// this.phoneNum = phoneNum;
// this.upFlow = upFlow;
// this.downFlow = downFlow;
// this.sumFlow = sumFlow;
// } public String getPhoneNum() {
return phoneNum;
} public void setPhoneNum(String phoneNum) {
this.phoneNum = phoneNum;
} public long getUpFlow() {
return upFlow;
} public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
} public long getDownFlow() {
return downFlow;
} public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
} public long getSumFlow() {
return sumFlow;
} public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
} @Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
phoneNum = in.readUTF();
upFlow = in.readLong();
downFlow = in.readLong();
sumFlow = in.readLong();
} @Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(phoneNum);
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
} @Override
public String toString() {
return "" + phoneNum + "\t" + upFlow + "\t" + downFlow + "\t" + sumFlow;
} @Override
public int compareTo(FlowBean o) {
// TODO Auto-generated method stub
return sumFlow>o.getSumFlow()?-:;
} }
LineException
package com.ares.hadoop.mr.exception; public class LineException extends RuntimeException {
private static final long serialVersionUID = 2536144005398058435L; public LineException() {
super();
// TODO Auto-generated constructor stub
} public LineException(String message, Throwable cause) {
super(message, cause);
// TODO Auto-generated constructor stub
} public LineException(String message) {
super(message);
// TODO Auto-generated constructor stub
} public LineException(Throwable cause) {
super(cause);
// TODO Auto-generated constructor stub
}
}
【Hadoop】Hadoop MR 自定义排序的更多相关文章
- hadoop提交作业自定义排序和分组
现有数据如下: 3 3 3 2 3 1 2 2 2 1 1 1 要求为: 先按第一列从小到大排序,如果第一列相同,按第二列从小到大排序 如果是hadoop默认的排序方式,只能比较key,也就是第一列, ...
- 2 weekend110的hadoop的自定义排序实现 + mr程序中自定义分组的实现
我想得到按流量来排序,而且还是倒序,怎么达到实现呢? 达到下面这种效果, 默认是根据key来排, 我想根据value里的某个排, 解决思路:将value里的某个,放到key里去,然后来排 下面,开始w ...
- Hadoop学习之自定义二次排序
一.概述 MapReduce框架对处理结果的输出会根据key值进行默认的排序,这个默认排序可以满足一部分需求,但是也是十分有限的.在我们实际的需求当中,往 往有要对reduce输出结果进行二次排 ...
- 自定义排序及Hadoop序列化
自定义排序 将两列数据进行排序,第一列按照升序排列,当第一列相同时,第二列升序排列. 在map和reduce阶段进行排序时,比较的是k2.v2是不参与排序比较的.如果要想让v2也进行排序,需要把k2和 ...
- Hadoop学习之路(7)MapReduce自定义排序
本文测试文本: tom 20 8000 nancy 22 8000 ketty 22 9000 stone 19 10000 green 19 11000 white 39 29000 socrate ...
- Hadoop【MR的分区、排序、分组】
[toc] 一.分区 问题:按照条件将结果输出到不同文件中 自定义分区步骤 1.自定义继承Partitioner类,重写getPartition()方法 2.在job驱动Driver中设置自定义的Pa ...
- Hadoop MapReduce 二次排序原理及其应用
关于二次排序主要涉及到这么几个东西: 在0.20.0 以前使用的是 setPartitionerClass setOutputkeyComparatorClass setOutputValueGrou ...
- Hadoop【MR开发规范、序列化】
Hadoop[MR开发规范.序列化] 目录 Hadoop[MR开发规范.序列化] 一.MapReduce编程规范 1.Mapper阶段 2.Reducer阶段 3.Driver阶段 二.WordCou ...
- Hadoop基础-MapReduce的排序
Hadoop基础-MapReduce的排序 作者:尹正杰 版权声明:原创作品,谢绝转载!否则将追究法律责任. 一.MapReduce的排序分类 1>.部分排序 部分排序是对单个分区进行排序,举个 ...
随机推荐
- jenkins 自定义主题
一.概述 jenkins更新后,页面css布局都已改变,我现在用的jenkins.css, ( png图片需自定义) #page-body { background-image:url(http:// ...
- atan 和 atan2
转自http://blog.csdn.net/chinabinlang/article/details/6802686 atan函数与atan2函数的一点区别 . atan 和 atan2 都是求反 ...
- vue2.0 v-tap简洁(漏)版 (只解决300ms问题)
Vue.directive('tap',{ bind:function(el,binding){ var startTx, startTy, endTx, endTy, startTime, endT ...
- LinuxMint下JDK+Tomcat+Mysql+Eclipse javaEE安装
网上查了很多方法,总结下比较简单的做法. 本人使用的系统版本为64位LinuxMint18,cinnamon桌面环境,预装Openjdk1.8.开发使用版本为oracle的1.7版本jdk. 1 jd ...
- BAT脚本编写教程入门提高篇
BAT脚本编写教程入门提高篇 批处理文件的参数 批处理文件还可以像C语言的函数一样使用参数(相当于DOS命令的命令行参数),这需要用到一个参数表示符“%”. %[1-9]表示参数,参数是指在运行批处理 ...
- VS MFC RADIO控件 选择
我们假设有两个RADIO控件:IDC_RADIO_SINGLE和IDC_RADIO_RANGE,我们的目的是默认选种IDC_RADIO_SINGLE控件. 方法一: CheckRadioButton( ...
- dpdk 代码分析一 : 内存初始化
一 前言 http://www.dpdk.org/ dpdk 是 intel 开发的x86芯片上用于高性能网络处理的基础库,业内比较常用的模式是linux-app模式,即 利用该基础库,在用户层空 ...
- 移动web开发问题和优化小结
之前在微信公众号上看到的一篇文章,直接给拷过来了....原文链接http://mp.weixin.qq.com/s/0LwTz-Mw2WumSztIrHucdQ 2.Meta标签 页面在手机上显示时, ...
- tomcat 异常:Caused by: org.apache.catalina.LifecycleException: The connector cannot start since the specified port value of [-1] is invalid
启动tomcat时出现异常: org.apache.catalina.LifecycleException: Failed to start component [Connector[AJP/1.3- ...
- 【转载】无需图片,使用CSS3实现圆角按钮
原文地址:http://www.open-open.com/home/space-37924-do-blog-id-5789.html 首先来看看效果: 事例HTML代码: <a href=&q ...