MapReduce Demo

功能：统计公司员工一个月内手机上网上行流量、下行流量及总流量。

测试数据如下：

13612345678 6000 1000

13612345678 2000 3000

13812345678 2000 100

13812345678 1500 300

13512345678 9000 200

13512345678 500 200

13112345678 1000 200

13112345678 800 200

代码：

程序入口类：DataCount

package cn.terry.mr;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import com.sun.jersey.core.impl.provider.entity.XMLJAXBElementProvider.Text;

public class DataCount {



public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

Configuration conf=new Configuration();

Job job=Job.getInstance(conf);

job.setJarByClass(DataCount.class);

job.setMapperClass(MRMap.class);

FileInputFormat.setInputPaths(job, new Path(args[0]));



job.setReducerClass(MRReduce.class);

job.setMapOutputKeyClass(Text.class);

job.setMapOutputValueClass(DataBean.class);

FileOutputFormat.setOutputPath(job, new Path(args[1]));

job.waitForCompletion(true);

}

}

数据实体类： DataBean.java

package cn.terry.mr;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.Writable;

public class DataBean implements Writable {

private String telNo;

private Long upPayLoad;

private Long downPayLoad;

private Long totalPayLoad;

public String getTelNo() {

return telNo;

}

public void setTelNo(String telNo) {

this.telNo = telNo;

}

public Long getUpPayLoad() {

return upPayLoad;

}

public void setUpPayLoad(Long upPayLoad) {

this.upPayLoad = upPayLoad;

}

public Long getDownPayLoad() {

return downPayLoad;

}

public void setDownPayLoad(Long downPayLoad) {

this.downPayLoad = downPayLoad;

}

public Long getTotalPayLoad() {

return totalPayLoad;

}

public void setTotalPayLoad(Long totalPayLoad) {

this.totalPayLoad = totalPayLoad;

}

public DataBean() {

}

public DataBean(String telNo, Long upPayLoad, Long downPayLoad) {

this.telNo = telNo;

this.upPayLoad = upPayLoad;

this.downPayLoad = downPayLoad;

this.totalPayLoad=this.upPayLoad+this.downPayLoad;

}

//serialize

@Override

public void write(DataOutput out) throws IOException {

// TODO Auto-generated method stub

out.writeUTF(telNo);

out.writeLong(upPayLoad);

out.writeLong(downPayLoad);

out.writeLong(totalPayLoad);

}

//deserrialize

@Override

public void readFields(DataInput in) throws IOException {

// TODO Auto-generated method stub

this.telNo=in.readUTF();

this.upPayLoad=in.readLong();

this.downPayLoad=in.readLong();

this.totalPayLoad=in.readLong();

}

@Override

public String toString() {

// TODO Auto-generated method stub

return this.upPayLoad+"\t"+ this.downPayLoad+"\t" + this.totalPayLoad;

}

}

Map类：MRMap.java

package cn.terry.mr;

import java.io.IOException;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Mapper;

public class MRMap extends Mapper<LongWritable,Text,Text,DataBean> {

@Override

protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {

String line=value.toString();

String[] fields=line.split("\t");

String telNo=fields[0];

Long up=Long.parseLong(fields[1]);

Long down= Long.parseLong(fields[2]);

DataBean bean=new DataBean(telNo,up,down);

context.write(new Text(telNo), bean);

}

}

Reduce类：MRReduce.java

package cn.terry.mr;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Reducer;

public class MRReduce extends Reducer<Text,DataBean,Text,DataBean> {

@Override

protected void reduce(Text key, Iterable<DataBean> v2, Context context) throws IOException, InterruptedException {

long up_sum=0;

long down_sum=0;

for(DataBean bean :v2)

{

up_sum+=bean.getUpPayLoad();

down_sum+=bean.getDownPayLoad();

}

DataBean bean=new DataBean("",up_sum,down_sum);

context.write(key, bean);

}

}

运行：

[root@master bin]# hadoop jar /home/hadoop/mpCount.jar cn.terry.mr.DataCount /data.txt /mrOut

17/11/08 11:34:25 INFO client.RMProxy: Connecting to ResourceManager at master/1:80 32

17/11/08 11:34:27 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not p erformed. Implement the Tool interface and execute your application with ToolRunner to remedy this.

17/11/08 11:34:27 INFO input.FileInputFormat: Total input paths to process : 1

17/11/08 11:34:28 INFO mapreduce.JobSubmitter: number of splits:1

17/11/08 11:34:28 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1509957441313_00 02

17/11/08 11:34:29 INFO impl.YarnClientImpl: Submitted application application_1509957441313_00 02

17/11/08 11:34:29 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/appli cation_1509957441313_0002/

17/11/08 11:34:29 INFO mapreduce.Job: Running job: job_1509957441313_0002

17/11/08 11:34:46 INFO mapreduce.Job: Job job_1509957441313_0002 running in uber mode : false

17/11/08 11:34:46 INFO mapreduce.Job: map 0% reduce 0%

17/11/08 11:34:55 INFO mapreduce.Job: Task Id : attempt_1509957441313_0002_m_000000_0, Status : FAILED Error: java.io.IOException: Initialization of all the collectors failed. Error in last collect or was :class com.sun.jersey.core.impl.provider.entity.XMLJAXBElementProvider$Text at org.apache.hadoop.mapred.MapTask.createSortingCollector(MapTask.java:415) at org.apache.hadoop.mapred.MapTask.access$100(MapTask.java:81) at org.apache.hadoop.mapred.MapTask$NewOutputCollector.<init>(MapTask.java:698) at org.apache.hadoop.mapred.MapTask.runNewMapper(MapTask.java:770) at org.apache.hadoop.mapred.MapTask.run(MapTask.java:341) at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:422) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1746

以上错误可看出hadoop引用的Text包出错，需要将DataCount类中Text的包引用改为 import org.apache.hadoop.io.Text;

再次运行：

[root@master bin]# hadoop jar /home/hadoop/mpCount.jar cn.terry.mr.DataCount /data3.txt /MROut4

17/11/08 16:23:45 INFO client.RMProxy: Connecting to ResourceManager at master/x.x.x.x:8032

17/11/08 16:23:46 WARN mapreduce.JobResourceUploader: Hadoop command-line option parsing not performed. Implement the Tool interface and execute your application with ToolRunner to remedy this.

17/11/08 16:23:47 INFO input.FileInputFormat: Total input paths to process : 1

17/11/08 16:23:47 INFO mapreduce.JobSubmitter: number of splits:1

17/11/08 16:23:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1509957441313_0008

17/11/08 16:23:48 INFO impl.YarnClientImpl: Submitted application application_1509957441313_0008

17/11/08 16:23:48 INFO mapreduce.Job: The url to track the job: http://master:8088/proxy/application_1509957441313_0008/

17/11/08 16:23:48 INFO mapreduce.Job: Running job: job_1509957441313_0008

17/11/08 16:24:02 INFO mapreduce.Job: Job job_1509957441313_0008 running in uber mode : false

17/11/08 16:24:02 INFO mapreduce.Job: map 0% reduce 0%

17/11/08 16:24:14 INFO mapreduce.Job: map 100% reduce 0%

17/11/08 16:24:25 INFO mapreduce.Job: map 100% reduce 100%

17/11/08 16:24:26 INFO mapreduce.Job: Job job_1509957441313_0008 completed successfully

查看结果：

[root@master bin]# hdfs dfs -ls /MROut4

Found 2 items

-rw-r--r-- 2 root supergroup 0 2017-11-08 16:24 /MROut4/_SUCCESS

-rw-r--r-- 2 root supergroup 106 2017-11-08 16:24 /MROut4/part-r-00000

[root@master bin]# hdfs dfs -cat /MROut4/part-r-00000

13112345678 1800 400 2200

13512345678 9500 400 9900

13612345678 8000 4000 12000

13812345678 3500 400 3900

由于我的chrome和IE版本无法兼容cnblogs的插入code和picture功能，抱歉没能将代码及结果以友好的方式呈现。

MapReduce Demo的更多相关文章

python - hadoop,mapreduce demo
Hadoop,mapreduce 介绍 59888745@qq.com 大数据工程师是在Linux系统下搭建Hadoop生态系统(cloudera是最大的输出者类似于Linux的红帽), 把用户的交易 ...
Wordcount on YARN 一个MapReduce示例
Hadoop YARN版本:2.2.0 关于hadoop yarn的环境搭建可以参考这篇博文:Hadoop 2.0安装以及不停集群加datanode hadoop hdfs yarn伪分布式运行,有如 ...
关于Mapreduce Text类型赋值的错误
Mapreduce中Text类型数据被无缘无故替换? 今天偶然看到一个mapreduce demo,直接上手操作统计两个文件中最大值文件中数据格式为名字数值输出为名字(最大值所对应的 ...
Apache Hadoop2.x 边安装边入门
完整PDF版本:<Apache Hadoop2.x边安装边入门> 目录第一部分:Linux环境安装第一步.配置Vmware NAT网络一. Vmware网络模式介绍二. NAT模式 ...
CentOS7 分布式安装 Hadoop 2.8
1. 基本环境 1.1 操作系统操作系统:CentOS7.3 1.2 三台虚拟机 172.20.20.100 master 172.20.20.101 slave1 172.20.20.102 sl ...
在虚拟机上配置安装hadoop集群
原本以为有大神已经总结的很清楚了,就不自己在写了, 但是在自己安装的过程中还是出现了一些问题, 所以打算以自己的方式重新总结一下. 参考https://blog.csdn.net/hliq539 ...
centos6.6安装hadoop-2.5.0（三、完全分布式安装）
操作系统:centos6.6(三台服务器) 环境:selinux disabled:iptables off:java 1.8.0_131 安装包:hadoop-2.5.0.tar.gz hadoop ...
centos6.6安装hadoop-2.5.0（一、本地模式安装）
操作系统:centos6.6(一台服务器) 环境:selinux disabled:iptables off:java 1.8.0_131 安装包:hadoop-2.5.0.tar.gz hadoop ...
史上最详细的Hadoop环境搭建(转)
转载的文章,请告知侵删.本人只是做个记录,以免以后找不到. 前言 Hadoop在大数据技术体系中的地位至关重要,Hadoop是大数据技术的基础,对Hadoop基础知识的掌握的扎实程度,会决定在大数据技 ...

随机推荐

设置 IntelliJ IDEA 的彩色代码主题
首先,给出一系列 IntelliJ IDEA 代码的彩色主题,供大家选择: VibrantUnknown(Darcula) FadeComments NicePython Solarized Have ...
python 调试之assert and logging
断言 assert assert后面跟的表达式应该是True,否则,根据程序运行的逻辑,后面的代码肯定会出错. 如果断言失败,会抛出AssertionError def foo(s): n = int ...
mongodb之 oplog 日志详解
1:oplog简介 oplog是local库下的一个固定集合,Secondary就是通过查看Primary 的oplog这个集合来进行复制的.每个节点都有oplog,记录这从主节点复制过来的信息,这样 ...
nginx 学习资料
nginx 学习资料 table th:first-of-type { width: 90px; } table th:nth-of-type(2) { } table th:nth-of-type( ...
react：路由登陆后才能访问的控制
react-router 通过创建一个需要认证的路由来限制登陆后才能访问. 官方例子:https://reacttraining.com/react-router/web/example/auth ...
learnopengl“入门”的全部代码
projection 里的角度,角度越大,物体离摄像机越远:角度越小,物体离摄像机越近,这个角度表示视野(fov),视野越大,看到的物体就越小头文件 #ifndef SHADER_H_INCLUDE ...
python的requests快速上手、高级用法和身份认证
https://blog.csdn.net/qq_25134989/article/details/78800209 快速上手迫不及待了吗?本页内容为如何入门 Requests 提供了很好的指引.其 ...
jmeter分布式测试的坑
转 : jmeter分布式测试的坑有关jmeter分布式测试的环境配置,大概就是那样,但是每次想要进行jmeter分布式测试的时候,总是会有各种奇怪的问题,下面整理了一些可能遇到的坑. 只要错误中出 ...
file命令详解
Linux file命令 Linux file命令用于辨识文件类型. 通过file指令,我们得以辨识该文件的类型用法: file [-bchikLNnprsvz0] [--apple] [--mim ...
Linux shell 将字符串分割成数组
原文链接:http://1985wanggang.blog.163.com/blog/static/776383320121745626320/ a="one,two,three,four& ...

MapReduce Demo

MapReduce Demo的更多相关文章

随机推荐

热门专题