mapreduce编程练习（二）倒排索引 Combiner的使用以及练习

问题一：请使用利用Combiner的方式：根据图示内容编写maprdeuce程序

示例程序

package com.greate.learn;

import java.io.IOException;

import java.net.URI;

import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.InputSplit;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class GetFile_Statistics extends Configured implements Tool {

	public static class CountMapper extends Mapper<LongWritable, Text, Text, Text>{

		private Text word = new Text();

		private Text one = new Text(1+"");

		@Override

		protected void map(LongWritable key,Text value,Mapper<LongWritable, Text, Text, Text>.Context context)

				throws IOException,InterruptedException{

					System.out.println("line pos:" + key.toString());

					String line = value.toString();

					String fileName = ((FileSplit) context.getInputSplit()).getPath().getName();

					StringTokenizer tokenizer = new StringTokenizer(line);

					while (tokenizer.hasMoreElements()) {

						word.set(tokenizer.nextToken()+" :  "+fileName);

						context.write(word, one);

					}

				}

	}

	public static class Combiner extends Reducer<Text, Text, Text, Text>{

		@Override

		protected void reduce(Text key, Iterable<Text> values,

				Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {

			int sum = 0;

			for(Text v : values){

				sum += Integer.parseInt(v.toString());

			}

			System.out.println("sum:" + sum);

			String[] valueString = key.toString().split(" : ");

			context.write(new Text(valueString[0]), new Text(valueString[1]+":" + sum));

		}

	}

	public static class CountReducer extends Reducer<Text, Text, Text, Text>{

		static String beforeKey = "";

		static String beforeValue ="";

		@Override

		protected void reduce(Text key, Iterable<Text> values,

				Reducer<Text, Text, Text, Text>.Context context) throws IOException, InterruptedException {

			String key2 = key.toString();

			String value = "";

			for(Text text: values){

				value = text.toString();

				if(key2.equals(beforeKey)){

					beforeKey = key2;

					beforeValue = beforeValue +";"+value;

				}else{

					beforeKey =  key2;

					beforeValue = value;

				}

			}

			context.write(new Text(beforeKey), new Text(beforeValue));

		}

	}

	static FileSystem fs = null;

	static Configuration conf=null;

	public static void init() throws Exception{

		conf = new Configuration();

		conf.set("fs.defaultFS", "hdfs://localhost:9000/");

		 fs = FileSystem.get(new URI("hdfs://localhost:9000/"),conf,"hadoop");

	}

	public int run(String[] args) throws Exception {

		Job job = Job.getInstance(getConf(),"WordCount");

		job.setJarByClass(GetFile_Statistics.class);

		job.setMapperClass(CountMapper.class);

		job.setCombinerClass(Combiner.class);

		job.setReducerClass(CountReducer.class);

		job.setOutputKeyClass(Text.class);

		job.setOutputValueClass(Text.class);

		Path in = new Path("/GetFile_Statistics/input");

		if(fs.exists(in)){

			FileInputFormat.addInputPath(job, in);

		}else{

			System.out.println("文件夹不存在，需要创建！");

		}

		Path os = new Path("/GetFile_Statistics/output");

		int flage = 0;

		if(fs.exists(os)){

			System.out.println("文件夹存在！不再创建！");

			 fs.delete(os, true);

			 FileOutputFormat.setOutputPath(job, os);

			 flage = job.waitForCompletion(false) ? 0:1;

		}else{

			FileOutputFormat.setOutputPath(job, os);

			flage = job.waitForCompletion(false) ? 0:1;

		}

		return  flage;

	}

	public static void main(String[] args) throws Exception {

		init();

		int res = ToolRunner.run(new GetFile_Statistics(), args);

		System.exit(res);

	}

}

问题二：现有一批电话通信清单，记录了用户A拨打某些特殊号码（如120，10086,13800138000等）的记录。需要做一个统计结果，记录拨打给用户B的所有用户A。

示例程序

package com.greate.learn;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class PhoneNumber_Statistic extends Configured implements Tool{

	public static void main (String[] args) throws Exception{

			ToolRunner.run(new PhoneNumber_Statistic(), args);

	}

	public int run(String[] arg0) throws Exception{

		Configuration conf = getConf();

	    Job job = new Job(conf);

		job.setJarByClass(getClass());

		FileSystem fs = FileSystem.get(conf);

		FileInputFormat.setInputPaths(job, new Path("/PhoneNumber_Statistics/input/"));

		FileOutputFormat.setOutputPath(job, new Path("/PhoneNumber_Statistics/output/"));

		job.setOutputKeyClass(Text.class);

		job.setOutputValueClass(Text.class);

		job.setMapperClass(numberMap.class);

		job.setReducerClass(numberReduce.class);

		job.waitForCompletion(true);

		return 0;

	}

}

class numberMap extends Mapper<LongWritable, Text, Text, Text>{

		protected void map(LongWritable key, Text value, Context context)

throws IOException,InterruptedException{

			String[] list = value.toString().split(" ");

			String keyy = list[1];

			String valuee = list[0];

			context.write(new Text(keyy), new Text(valuee));

		}

}

class numberReduce extends Reducer<Text, Text, Text, Text>{																					//��������

	protected void reduce(Text key, Iterable<Text> values, Context context)

		throws IOException,InterruptedException{

		String valuee;

        String out = "";

        for(Text value:values){

        	valuee  = value.toString() + " | ";

        	out +=valuee;

        }

        context.write(key,new Text(out));

	}

}

mapreduce编程练习（二）倒排索引 Combiner的使用以及练习的更多相关文章

hadoop2.2编程：mapreduce编程之二次排序
mr自带的例子中的源码SecondarySort,我重新写了一下,基本没变. 这个例子中定义的map和reduce如下,关键是它对输入输出类型的定义:(java泛型编程) public static ...
Hadoop MapReduce编程 API入门系列之倒排索引（二十四）
不多说,直接上代码. 2016-12-12 21:54:04,509 INFO [org.apache.hadoop.metrics.jvm.JvmMetrics] - Initializing JV ...
《Data-Intensive Text Processing with mapReduce》读书笔记之二：mapreduce编程、框架及运行
搜狐视频的屌丝男士第二季大结局了,惊现波多野老师,怀揣着无比鸡冻的心情啊,可惜随着剧情的推进发展,并没有出现期待中的屌丝奇遇,大鹏还是没敢冲破尺度的界线.想百度些种子吧,又不想让电脑留下污点证据,要知 ...
三、MapReduce编程实例
前文一.CentOS7 hadoop3.3.1安装(单机分布式.伪分布式.分布式二.JAVA API实现HDFS MapReduce编程实例 @ 目录前文 MapReduce编程实例前言注意 ...
Hadoop MapReduce编程学习
一直在搞spark,也没时间弄hadoop,不过Hadoop基本的编程我觉得我还是要会吧,看到一篇不错的文章,不过应该应用于hadoop2.0以前,因为代码中有 conf.set("map ...
hadoop2.2编程：使用MapReduce编程实例（转）
原文链接:http://www.cnblogs.com/xia520pi/archive/2012/06/04/2534533.html 从网上搜到的一篇hadoop的编程实例,对于初学者真是帮助太大 ...
MapReduce编程实例4
MapReduce编程实例: MapReduce编程实例(一),详细介绍在集成环境中运行第一个MapReduce程序 WordCount及代码分析 MapReduce编程实例(二),计算学生平均成绩 ...
批处理引擎MapReduce编程模型
批处理引擎MapReduce编程模型作者:尹正杰版权声明:原创作品,谢绝转载!否则将追究法律责任. MapReduce是一个经典的分布式批处理计算引擎,被广泛应用于搜索引擎索引构建,大规模数据处理 ...
大数据笔记（十）——Shuffle与MapReduce编程案例（A）
一.什么是Shuffle yarn-site.xml文件配置的时候有这个参数:yarn.nodemanage.aux-services:mapreduce_shuffle 因为mapreduce程序运 ...
Hadoop MapReduce编程 API入门系列之压缩和计数器（三十）
不多说,直接上代码. Hadoop MapReduce编程 API入门系列之小文件合并(二十九) 生成的结果,作为输入源. 代码 package zhouls.bigdata.myMapReduce. ...

随机推荐

spring boot集成mybatis-plus插件进行自定义sql方法开发时报nested exception is org.apache.ibatis.binding.BindingException: Invalid bound statement (not found):
spring boot集成mybatis-plus插件进行自定义sql方法开发时报nested exception is org.apache.ibatis.binding.BindingExcept ...
Let’s Encrypt 通配符证书,泛域名证书申请配置
首先你可以查看下官方提供的支持申请通配符证书的客户端列表:https://letsencrypt.org/docs/client-options/. 参考链接:https://github.com/N ...
入门Kubernetes -基础概念
一.Kubernetes概述 Kubernetes ,又称为 k8s(首字母为 k.首字母与尾字母之间有 8 个字符.尾字母为 s,所以简称 k8s)或者简称为 "kube" ,是 ...
【转载】一种git commit前自动格式化的方式
查看原文简介这个系列为了解决一个问题:自动化的去管理代码风格和格式前提:Linux,C语言,Clang 如何在每次commit的时候,将代码风格自动格式化后再提交commit,且格式化的内容必须 ...
sql查询速度慢分析及如何优化查询
原因分析后台数据库中数据过多,未做数据优化数据请求-解析-展示处理不当网络问题提高数据库查询的速度方案SQL 查询速度慢的原因有很多,常见的有以下几种:1.没有索引或者没有用到索引(查询慢最常见的问 ...
Xamarin.Form 5.0：新功能和控件以及调试改进
上周在.NET Conf 2020,Scott Hunter(.NET),Maddy Leger(微软移动开发工具-Xamarin项目经理)和David Ortinau(首席项目经理,移动开发人员工具 ...
2021升级版微服务教程5—通过IDEA运行多个项目实例「模拟集群」
2021升级版SpringCloud教程从入门到实战精通「H版&alibaba&链路追踪&日志&事务&锁」教程全目录「含视频」:https://gitee.c ...
【SpringBoot1.x】SpringBoot1.x 任务
SpringBoot1.x 任务文章源码异步任务在 Java 应用中,绝大多数情况下都是通过同步的方式来实现交互处理的.但是在处理与第三方系统交互的时候,容易造成响应迟缓的情况,之前大部分都是使 ...
Fail2ban工具使用
Fail2ban fail2ban扫描日志文件并且可以识别禁用某些多次尝试登录的IP,通过更新系统的防火墙规则来实现拒绝该IP连接,也可以配置禁用的时间.fail2ban提供了一些常用软件默认的日 ...
【Linux】fio测试读写速度
需要安装fio yum install fio -y 有很多依赖包 FIO用法: 随机读:(可直接用,向磁盘写一个2G文件,10线程,随机读1分钟,给出结果) fio -filename=/h ...

mapreduce编程练习（二）倒排索引 Combiner的使用以及练习

mapreduce编程练习（二）倒排索引 Combiner的使用以及练习的更多相关文章

随机推荐

热门专题