1、基本概念

2、Mapper代码

package com.ares.hadoop.mr.flowsum;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import org.apache.log4j.Logger; import com.ares.hadoop.mr.wordcount.MRTest; //Long, String, String, Long --> LongWritable, Text, Text, LongWritable
public class FlowSumMapper extends Mapper<LongWritable, Text, Text, FlowBean> {
private static final Logger LOGGER = Logger.getLogger(MRTest.class); private String line;
private int length;
private final static char separator = '\t'; private String phoneNum;
private long upFlow;
private long downFlow;
//private long sumFlow; private Text text = new Text();
private FlowBean flowBean = new FlowBean(); @Override
protected void map(LongWritable key, Text value,
Mapper<LongWritable, Text, Text, FlowBean>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.map(key, value, context);
line = value.toString();
String[] fields = StringUtils.split(line, separator);
length = fields.length;
if (length != ) {
LOGGER.error(key.get() + ", " + line + " LENGTH INVALID, IGNORE...");
} phoneNum = fields[];
try {
upFlow = Long.parseLong(fields[length-]);
downFlow = Long.parseLong(fields[length-]);
//sumFlow = upFlow + downFlow;
} catch (Exception e) {
// TODO: handle exception
LOGGER.error(key.get() + ", " + line + " FLOW DATA INVALID, IGNORE...");
} flowBean.setPhoneNum(phoneNum);
flowBean.setUpFlow(upFlow);
flowBean.setDownFlow(downFlow);
//flowBean.setSumFlow(sumFlow); text.set(phoneNum);
context.write(text, flowBean);
}
}

3、Reducer代码

package com.ares.hadoop.mr.flowsum;

import java.io.IOException;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer; public class FlowSumReducer extends Reducer<Text, FlowBean, Text, FlowBean>{
//private static final Logger LOGGER = Logger.getLogger(MRTest.class); private FlowBean flowBean = new FlowBean(); @Override
protected void reduce(Text key, Iterable<FlowBean> values,
Reducer<Text, FlowBean, Text, FlowBean>.Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
//super.reduce(arg0, arg1, arg2);
long upFlowCounter = ;
long downFlowCounter = ; for (FlowBean flowBean : values) {
upFlowCounter += flowBean.getUpFlow();
downFlowCounter += flowBean.getDownFlow();
}
flowBean.setPhoneNum(key.toString());
flowBean.setUpFlow(upFlowCounter);
flowBean.setDownFlow(downFlowCounter);
flowBean.setSumFlow(upFlowCounter + downFlowCounter); context.write(key, flowBean);
}
}

4、序列化Bean代码

package com.ares.hadoop.mr.flowsum;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException; import org.apache.hadoop.io.Writable; public class FlowBean implements Writable {
private String phoneNum;
private long upFlow;
private long downFlow;
private long sumFlow; public FlowBean() {
// TODO Auto-generated constructor stub
}
// public FlowBean(String phoneNum, long upFlow, long downFlow, long sumFlow) {
// super();
// this.phoneNum = phoneNum;
// this.upFlow = upFlow;
// this.downFlow = downFlow;
// this.sumFlow = sumFlow;
// } public String getPhoneNum() {
return phoneNum;
} public void setPhoneNum(String phoneNum) {
this.phoneNum = phoneNum;
} public long getUpFlow() {
return upFlow;
} public void setUpFlow(long upFlow) {
this.upFlow = upFlow;
} public long getDownFlow() {
return downFlow;
} public void setDownFlow(long downFlow) {
this.downFlow = downFlow;
} public long getSumFlow() {
return sumFlow;
} public void setSumFlow(long sumFlow) {
this.sumFlow = sumFlow;
} @Override
public void readFields(DataInput in) throws IOException {
// TODO Auto-generated method stub
phoneNum = in.readUTF();
upFlow = in.readLong();
downFlow = in.readLong();
sumFlow = in.readLong();
} @Override
public void write(DataOutput out) throws IOException {
// TODO Auto-generated method stub
out.writeUTF(phoneNum);
out.writeLong(upFlow);
out.writeLong(downFlow);
out.writeLong(sumFlow);
} @Override
public String toString() {
return "" + upFlow + "\t" + downFlow + "\t" + sumFlow;
} }

5、TestRunner代码

package com.ares.hadoop.mr.flowsum;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.log4j.Logger; public class FlowSumRunner extends Configured implements Tool {
private static final Logger LOGGER = Logger.getLogger(FlowSumRunner.class); @Override
public int run(String[] args) throws Exception {
// TODO Auto-generated method stub
LOGGER.debug("MRTest: MRTest STARTED..."); if (args.length != ) {
LOGGER.error("MRTest: ARGUMENTS ERROR");
System.exit(-);
} Configuration conf = new Configuration();
//FOR Eclipse JVM Debug
//conf.set("mapreduce.job.jar", "flowsum.jar");
Job job = Job.getInstance(conf); // JOB NAME
job.setJobName("flowsum"); // JOB MAPPER & REDUCER
job.setJarByClass(FlowSumRunner.class);
job.setMapperClass(FlowSumMapper.class);
job.setReducerClass(FlowSumReducer.class); // MAP & REDUCE
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FlowBean.class);
// MAP
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FlowBean.class); // JOB INPUT & OUTPUT PATH
//FileInputFormat.addInputPath(job, new Path(args[0]));
FileInputFormat.setInputPaths(job, args[]);
FileOutputFormat.setOutputPath(job, new Path(args[])); // VERBOSE OUTPUT
if (job.waitForCompletion(true)) {
LOGGER.debug("MRTest: MRTest SUCCESSFULLY...");
return ;
} else {
LOGGER.debug("MRTest: MRTest FAILED...");
return ;
} } public static void main(String[] args) throws Exception {
int result = ToolRunner.run(new Configuration(), new FlowSumRunner(), args);
System.exit(result);
} }

参考资料:

http://www.cnblogs.com/robert-blue/p/4157768.html

http://www.cnblogs.com/qlee/archive/2011/05/18/2049610.html

http://blog.163.com/lzm07@126/blog/static/25705468201331611857190/

http://blog.csdn.net/lastsweetop/article/details/9193907

【Hadoop】Hadoop MR 自定义序列化类的更多相关文章

  1. Hadoop【MR开发规范、序列化】

    Hadoop[MR开发规范.序列化] 目录 Hadoop[MR开发规范.序列化] 一.MapReduce编程规范 1.Mapper阶段 2.Reducer阶段 3.Driver阶段 二.WordCou ...

  2. hadoop提交作业自定义排序和分组

    现有数据如下: 3 3 3 2 3 1 2 2 2 1 1 1 要求为: 先按第一列从小到大排序,如果第一列相同,按第二列从小到大排序 如果是hadoop默认的排序方式,只能比较key,也就是第一列, ...

  3. hadoop深入研究:(十三)——序列化框架

    hadoop深入研究:(十三)--序列化框架 Mapreduce之序列化框架(转自http://blog.csdn.net/lastsweetop/article/details/9376495) 框 ...

  4. 在hadoop作业中自定义分区和归约

    当遇到有特殊的业务需求时,需要对hadoop的作业进行分区处理 那么我们可以通过自定义的分区类来实现 还是通过单词计数的例子,JMapper和JReducer的代码不变,只是在JSubmit中改变了设 ...

  5. 【Hadoop】MapReduce自定义分区Partition输出各运营商的手机号码

    MapReduce和自定义Partition MobileDriver主类 package Partition; import org.apache.hadoop.io.NullWritable; i ...

  6. 为什么hadoop中用到的序列化不是java的serilaziable接口去序列化而是使用Writable序列化框架

    继上一个模块之后,此次分析的内容是来到了Hadoop IO相关的模块了,IO系统的模块可谓是一个比较大的模块,在Hadoop Common中的io,主要包括2个大的子模块构成,1个是以Writable ...

  7. Hadoop【MR的分区、排序、分组】

    [toc] 一.分区 问题:按照条件将结果输出到不同文件中 自定义分区步骤 1.自定义继承Partitioner类,重写getPartition()方法 2.在job驱动Driver中设置自定义的Pa ...

  8. hadoop修改MR的提交的代码程序的副本数

    hadoop修改MR的提交的代码程序的副本数 Under-Replicated Blocks的数量很多,有7万多个.hadoop fsck -blocks 检查发现有很多replica missing ...

  9. Hadoop streaming使用自定义python版本和第三方库

    在使用Hadoop的过程中,遇到了自带python版本比较老的问题. 下面以python3.7为例,演示如何在hadoop上使用自定义的python版本以及第三方库. 1.在https://www.p ...

随机推荐

  1. [poj] 3977 Subset || 折半搜索MITM

    原题 给定N个整数组成的数列(N<=35),从中选出一个子集,使得这个子集的所有元素的值的和的绝对值最小,如果有多组数据满足的话,选择子集元素最少的那个. n<=35,所以双向dfs的O( ...

  2. 乌龟git

    相关操作 链接:http://www.cnblogs.com/hbujt/p/5554038.html 避免乌龟每次输入账户密码 链接:http://www.cnblogs.com/bldf/p/60 ...

  3. 从日升的mecha anime看mecha genre的衰退

    注:矢立肇是日升企画部集体笔名,如勇者系列便是公司企画 这里列出一些我看过认为有意思的动画,大抵同系列的就合并了,除非后续作品(剧场版,OVA,etc)并非直接剧情承接且有趣 注意我对长篇TV动画评价 ...

  4. 身为多年的ubuntu用户。。。

    在这之前 说是多年也没有多年,事实上也就两年.. 不得不说一句,终于承受不住不稳定之重了... 个人觉得开始还是从centos开始比较好,比如说现在的我.. 之前看过的不知道在哪里的文章,谈论的是ub ...

  5. [ CodeVS冲杯之路 ] P1295

    不充钱,你怎么AC? 题目:http://codevs.cn/problem/1295/ 数据很小,直接DFS,加上剪枝 剪枝其实就是判重,首先深度是行下标,这里自带不重复,枚举的列下标,用 f 记录 ...

  6. Linux Suspend过程【转】

    转自:http://blog.csdn.net/chen198746/article/details/15809363 目录(?)[-] Linux Suspend简介 Suspend流程 enter ...

  7. windows下利用线程池完成多任务的分配和运行

    在做项目的过程中有时候为了提升效率,用了多线程的方法来对任务进行分割和应用,后来发现,采用线程池的方法能更好的利用线程资源来计算任务,网上有很多关于如何运行线程池的例子,msdn上也给出了对应的例子: ...

  8. windows 2012(64位) IIS配置asp程序 500 - 内部服务器错误。您查找的资源存在问题,因而无法显示。

    在网上找了很久,包括常规的设置父路径之类的,一直都不可以,搞了一晚上毫无成就感,第二天早上无意中看到一篇文章,说到点子上了,非常感谢.源地址已经找不到了,我把大概的问题截图说明一下. 方法如下:1.打 ...

  9. android的百度地图开发(一)

    1,注册百度开发者账号 2,申请key  ,注意开发版SH和发布版的SH  获取开发版SHA1: 输入命令:keytool -list -v -keystore debug.keystore,回车输入 ...

  10. 【计算机网络】简单网络管理协议 SNMP

    计算机网络  6.7节学习笔记 SNMP: 管理网络上的对象时,必然会给该对象添加一些软件或硬件,但这种添加必须对原有对象的影响尽量小. SNMP中的管理程序和代理程序按客户-服务器方式工作.管理程序 ...