MapRedue开发实例

一些例子，所用版本为hadoop 2.6.5

1、统计字数

数据格式如下（单词，频数，以tab分开）：

 package com.mr.test;

 import java.io.IOException;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 public class MRTest {

     public static class C01Mapper extends Mapper<Object, Text, Text, IntWritable> {

         @Override

         public void map(Object key, Text value, Context context) throws IOException, InterruptedException {

             String[] line = value.toString().split("\t");

             if(line.length == 2) {

                 context.write(new Text(line[0]),new IntWritable(Integer.parseInt(line[1])));

             }

         }

     }

     public static class C01Reducer extends Reducer<Text, IntWritable, Text, IntWritable> {

         @Override

         public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {

             int i =0;

             for(IntWritable value : values){

                 i += value.get();

             }

             context.write(key, new IntWritable(i));

         }

     }    

     public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

         //参数含义： agrs[0]标识 in, agrs[1]标识 out,agrs[2]标识 unitmb,agrs[3]标识 reducer number,

         int unitmb =Integer.valueOf(args[2]);

         String in = args[0];

         String out = args[1];

         int nreducer = Integer.valueOf(args[3]);

         Configuration conf = new Configuration();

         conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapred.min.split.size", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapreduce.input.fileinputformat.split.minsize.per.node", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapreduce.input.fileinputformat.split.minsize.per.rack", String.valueOf(unitmb * 1024 * 1024));

         Job job = new Job(conf);

         FileInputFormat.addInputPath(job, new Path(in));

         FileOutputFormat.setOutputPath(job, new Path(out));

         job.setMapperClass(C01Mapper.class);

         job.setReducerClass(C01Reducer.class);

         job.setNumReduceTasks(nreducer);

         job.setCombinerClass(C01Reducer.class);

         job.setMapOutputKeyClass(Text.class);

         job.setMapOutputValueClass(IntWritable.class);

         job.setOutputKeyClass(Text.class);

         job.setOutputValueClass(IntWritable.class);

         job.setJarByClass(MRTest.class);

         job.waitForCompletion(true);

     }

 }

2、统计用户在网站的停留时间

数据格式（用户，毫秒数，网站，以tab分开）：

A	100	baidu.com

B	900	google.com

C	515	sohu.com

D	618	sina.com

E	791	google.com

B	121	baidu.com

C	915	google.com

D	112	sohu.com

E	628	sina.com

A	681	google.com

C	121	baidu.com

D	215	google.com

E	812	sohu.com

A	128	sina.com

B	291	google.com

 package com.mr.test;

 import java.io.IOException;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.io.WritableComparable;

 import org.apache.hadoop.io.WritableComparator;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Partitioner;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 public class MRWeb {

     public static class C02Mapper extends Mapper<Object, Text, Text, Text> {

         @Override

         public void map(Object key, Text value, Context context) throws IOException, InterruptedException{

             String line[] = value.toString().split("\t");

             //格式检查

             if(line.length == 3){

                 String name = line[0];

                 String time = line[1];

                 String website = line[2];

                 context.write(new Text(name + "\t" + time), new Text(time + "\t" + website));

             }

         }

     }    

     public static class C02Partitioner extends Partitioner<Text, Text> {

         @Override

         public int getPartition(Text key, Text value, int number) {

             String name = key.toString().split("\t")[0];

             int hash =name.hashCode();

             //以此实现分区

             return Math.abs(hash % number);

         }

     }

     public static class C02Sort extends WritableComparator {

         //必须有的

         protected C02Sort() {

             super(Text.class,true);

         }

         @Override

         public int compare(WritableComparable w1, WritableComparable w2) {

             Text h1 = new Text(((Text)w1).toString().split("\t")[0] );

             Text h2 = new Text(((Text)w2).toString().split("\t")[0] );

             IntWritable m1 =new IntWritable(Integer.valueOf(((Text)w1).toString().split("\t")[1]));

             IntWritable m2 =new IntWritable(Integer.valueOf(((Text)w2).toString().split("\t")[1]));

             int result;

             if(h1.equals(h2)){

                 result = m2.compareTo(m1);

             }else {

                 result =h1.compareTo(h2);

             }

             return result;

         }

     }

     public  static class C02Group extends WritableComparator{

         protected C02Group() {

             super(Text.class,true);

         }

         @Override

         public int compare(WritableComparable w1, WritableComparable w2) {

             Text h1 = new Text(((Text)w1).toString().split("\t")[0] );

             Text h2 = new Text(((Text)w2).toString().split("\t")[0] );

             return h1.compareTo(h2);

         }

     }

     public static class C02Reducer extends Reducer<Text, Text, IntWritable, Text> {

         @Override

         protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {

             int count = 0;

             String name =key.toString().split("\t")[0];

             //分组排序已经做好了，这里只管打印

             for(Text value : values){

                 count++;

                 StringBuffer buffer = new StringBuffer();

                 buffer.append(name);

                 buffer.append("\t");

                 buffer.append(value.toString());

                 context.write(new IntWritable(count), new Text(buffer.toString()));

             }

         }

     }

     public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {

         //参数含义： agrs[0]标识 in, agrs[1]标识 out,agrs[2]标识 unitmb,agrs[3]标识 reducer number,

         if(args.length != 4){

             System.out.println("error");

             System.exit(0);

         }

         int unitmb =Integer.valueOf(args[2]);

         String in = args[0];

         String out = args[1];

         int nreducer = Integer.valueOf(args[3]);

         Configuration conf = new Configuration();

         conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapred.min.split.size", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapreduce.input.fileinputformat.split.minsize.per.node", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapreduce.input.fileinputformat.split.minsize.per.rack", String.valueOf(unitmb * 1024 * 1024));

         Job job = new Job(conf);

         FileInputFormat.addInputPath(job, new Path(in));

         FileOutputFormat.setOutputPath(job, new Path(out));

         job.setMapperClass(C02Mapper.class);

         job.setReducerClass(C02Reducer.class);

         job.setNumReduceTasks(nreducer);

         job.setPartitionerClass(C02Partitioner.class);

         job.setGroupingComparatorClass(C02Group.class);

         job.setSortComparatorClass(C02Sort.class);

         job.setMapOutputKeyClass(Text.class);

         job.setMapOutputValueClass(Text.class);

         job.setOutputKeyClass(IntWritable.class);

         job.setOutputValueClass(Text.class);

         job.setJarByClass(MRWeb.class);

         job.waitForCompletion(true);

     }

 }

运行：hadoop jar ~/c02mrtest.jar com.mr.test.MRWeb TestData/webcount.txt /DataWorld/webresult 128 1

结果的样子：

3、json数组分析

数据格式（前面以tab分开）：

1	[{"name":"A","age":16,"maths":100}]

2	[{"name":"B","age":17,"maths":97}]

3	[{"name":"C","age":18,"maths":89}]

4	[{"name":"D","age":15,"maths":98}]

5	[{"name":"E","age":19,"maths":100}]

 package com.mr.test;

 import java.io.IOException;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import net.sf.json.JSONArray;

 import net.sf.json.JSONObject;

 public class MRString {

     public static class C03Mapper extends Mapper<Object, Text, Text, Text> {

         @Override

         protected void map(Object key, Text value, Mapper<Object, Text, Text, Text>.Context context)

                 throws IOException, InterruptedException {

             String[] line = value.toString().split("\t");

             if(line.length ==2){

                 String c = line[0];

                 String j = line[1];

                 JSONArray jsonArray =JSONArray.fromObject(j);

                 int size = jsonArray.size();

                 for(int i=0;i<size;i++){

                     String name = "";

                     String age = "";

                     String maths = "";

                     JSONObject jsonObject =jsonArray.getJSONObject(i);

                     if(jsonObject.containsKey("name")){

                         name = jsonObject.getString("name");

                     }

                     if(jsonObject.containsKey("age")){

                         age = jsonObject.getString("age");

                     }

                     if(jsonObject.containsKey("maths")){

                         maths = jsonObject.getString("maths");

                     }

                     StringBuffer buffer =new StringBuffer();

                     buffer.append(name);

                     buffer.append("\t");

                     buffer.append(age);

                     buffer.append("\t");

                     buffer.append(maths);

                     context.write(new Text(c), new Text(buffer.toString()));

                 }

             }

         }

     }

     public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {

         //参数含义： agrs[0]标识 in, agrs[1]标识 out,agrs[2]标识 unitmb,agrs[3]

         if(args.length != 3){

             System.out.println("error");

             System.exit(0);

         }

         int unitmb =Integer.valueOf(args[2]);

         String in = args[0];

         String out = args[1];

         Configuration conf = new Configuration();

         conf.set("mapreduce.input.fileinputformat.split.maxsize", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapred.min.split.size", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapreduce.input.fileinputformat.split.minsize.per.node", String.valueOf(unitmb * 1024 * 1024));

         conf.set("mapreduce.input.fileinputformat.split.minsize.per.rack", String.valueOf(unitmb * 1024 * 1024));

         Job job = new Job(conf);

         job.addFileToClassPath(new Path("TestData/json-lib-2.4-jdk15.jar"));

         job.addFileToClassPath(new Path("TestData/ezmorph-1.0.6.jar"));

         FileInputFormat.addInputPath(job, new Path(in));

         FileOutputFormat.setOutputPath(job, new Path(out));

         job.setMapperClass(C03Mapper.class);

         //没有reducer的情况下必须设置

         job.setNumReduceTasks(0);

         job.setMapOutputKeyClass(Text.class);

         job.setMapOutputValueClass(Text.class);

         job.setOutputKeyClass(Text.class);

         job.setOutputValueClass(Text.class);

         job.setJarByClass(MRString.class);

         job.waitForCompletion(true);

     }

 }

运行 hadoop jar ~/c03mrtest.jar com.mr.test.MRString TestData/jsonarray.txt /DataWorld/jsonoutput 128

结果：

这个例子还有一点值得注意（Path中的目录是HDFS中的目录）：

job.addFileToClassPath(new Path("TestData/json-lib-2.4-jdk15.jar")); //jar文件下载地址：http://json-lib.sourceforge.net/

job.addFileToClassPath(new Path("TestData/ezmorph-1.0.6.jar")); //jar文件下载地址：http://ezmorph.sourceforge.net/
使用这两句，在程序中动态添加了用于json解析的jar文件，而利用服务器中的ClassPath是访问不到这两个文件的。在编程的时候，在windows客户端下，为了语法书写方便，导入了json-lib-2.4-jdk15.jar，但是并没有导入ezmorph-1.0.6.jar 。

也就是说，可以在程序中动态的加入jar文件，只要知道了它在HDFS中的位置。

MapRedue开发实例的更多相关文章

ecshop二次开发给商品添加自定义字段【包含我自己进一步的开发实例详解】
本文包含商品自定义添加教程及进一步的开发实例: 教程: 说起自定义字段,我想很多的朋友像我一样会想起一些开源的CMS(比如Dedecms.Phpcms.帝国)等,他们是可以在后台直接添加自定义字段的. ...
RDIFramework.NET -.NET快速信息化系统开发整合框架【开发实例 EasyUI】之产品管理（WebForm版）
RDIFramework.NET—.NET快速开发整合框架 [开发实例]之产品管理(WebForm版) 接上篇:RDIFramework.NET (.NET快速信息化系统开发整合框架) [开发实例]之 ...
RDIFramework.NET-.NET快速信息化系统开发整合框架【开发实例 EasyUI】之产品管理（MVC版）
RDIFramework.NET—.NET快速开发整合框架 [开发实例]之产品管理(MVC版) 接上篇:RDIFramework.NET (.NET快速信息化系统开发整合框架) [开发实例]之产品管理 ...
Cocos2d-x 3.X手游开发实例详解
Cocos2d-x 3.X手游开发实例详解(最新最简Cocos2d-x手机游戏开发学习方法,以热门游戏2048.卡牌为例,完整再现手游的开发过程,实例丰富,代码完备,Cocos2d-x作者之一林顺和泰 ...
免费的HTML5连载来了《HTML5网页开发实例详解》连载（二）
最近新浪.百度.腾讯.京东.大众点评.淘宝等流行的网站都加大了招聘HTML5的力度,HTML5开发人员成了抢手货,本次连载的是由大众点评前端工程师和一淘网前端工程师基情奉献的<HTML5网页开发 ...
RDIFramework.NET开发实例━表约束条件权限的使用-Web
RDIFramework.NET开发实例━表约束条件权限的使用-Web 在上一篇文章“RDIFramework.NET开发实例━表约束条件权限的使用-WinForm”我们讲解了在WinForm下表约束 ...
RDIFramework.NET开发实例━表约束条件权限的使用-WinForm
RDIFramework.NET开发实例━表约束条件权限的使用-WinForm 在实际的应用中,客户常有这样的需求,指定用户或角色可以看指定条件下的数据,这里的“指定条件”在RDIFramework. ...
RDIFramework.NET V2.8版本 ━ 开发实例之产品管理（WinForm）
RDIFramework.NET V2.8版本 ━ 开发实例之产品管理(WinForm) 现在,我们使用.NET快速开发整合框架(RDIFramework.NET)来开发一个应用,此应用皆在说明如何使 ...
Android音乐播放器的开发实例
本文将引导大家做一个音乐播放器,在做这个Android开发实例的过程中,能够帮助大家进一步熟悉和掌握学过的ListView和其他一些组件.为了有更好的学习效果,其中很多功能我们手动实现,例如音乐播放的 ...

随机推荐

CodeIgniter-Lottery - php ci 抽奖辅助函数
CodeIgniter-Lottery - php ci 抽奖辅助函数 Github https://github.com/xjnotxj/CodeIgniter-Lottery 用法 1. 移入文件 ...
SwitchButton 开关按钮的多种实现方式
刚开始接触开关样式的按钮是在IOS系统上面,它的切换以及滑动十分帅气,深入人心. 所谓的开关按钮,就是只有2个状态:on和off,下图就是系统IOS 7上开关按钮效果. 起初我在android上我只会 ...
using语法糖详解 2015-01-06 17:45 50人阅读评论(0) 收藏
前段事件在using外套try catch 突然想到,如果出现异常会不会执行释放,不执行的话那服务器很可能导致崩溃... 特意上了CSDN问了大神..得到了答案.. Using相等于try catc ...
CSS魔法堂："那不是bug，是你不懂我!" by inline-block
前言每当来个需要既要水平排版又要设置固定高宽时,我就会想起display:inline-block,还有为了支持IE5.5/6/7的hack*display:inline;*zoom:1;.然后发 ...
Hibernate —— Entity.hbm.xml
一.简述 1.对象关系映射文件,用于映射实体类和关系数据库数据表之间的一个 xml 文件. 2.通过 Entity.hbm.xml 映射文件,Hibernate 可以理解持久化类和数据表之间的对应关系 ...
关于异步执行(Async/await)的理解(转发)
原文地址: http://blog.jobbole.com/85787/ 同步编程与异步编程通常情况下,我们写的C#代码就是同步的,运行在同一个线程中,从程序的第一行代码到最后一句代码顺序执行.而异 ...
模仿36。杀毒~button
<Style x:Key="360btn" TargetType="{x:Type Button}"> <Setter Property=&q ...
详解SQLServer 存储过程
Sql Server的存储过程是一个被命名的存储在服务器上的Transacation-Sql语句集合,是封装重复性工作的一种方法,它支持用户声明的变量.条件执行和其他强大的编程功能. 存储过程相对于其 ...
PHP中return 和 exit 、break和contiue 区别与用法
先说一下exit函数的用法. 作用: 输出一则消息并且终止当前脚本. 如果一段文本中包括多个以结束的脚本,则exit退出当前所在脚本. 比如一篇php文本包括一下代码,则输出为world. < ...
javascript 模式(1)——代码复用
程序的开发离不开代码的复用,通过代码复用可以减少开发和维护成本,在谈及代码复用的时候,会首先想到继承性,但继承并不是解决代码复用的唯一方式,还有其他的复用模式比如对象组合.本节将会讲解多种继承模式以实 ...

MapRedue开发实例

MapRedue开发实例的更多相关文章

随机推荐

热门专题