map/reduce实现排序

 import java.io.IOException;

 import org.apache.hadoop.conf.Configuration;

 import org.apache.hadoop.conf.Configured;

 import org.apache.hadoop.fs.Path;

 import org.apache.hadoop.io.IntWritable;

 import org.apache.hadoop.io.LongWritable;

 import org.apache.hadoop.io.Text;

 import org.apache.hadoop.mapreduce.Job;

 import org.apache.hadoop.mapreduce.Mapper;

 import org.apache.hadoop.mapreduce.Partitioner;

 import org.apache.hadoop.mapreduce.Reducer;

 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;

 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

 import org.apache.hadoop.util.Tool;

 import org.apache.hadoop.util.ToolRunner;

 public class Sort extends Configured implements Tool {

     /*

      * 排序

      * 输入格式：每个数据占一行

      * 输出格式：

      * 1 21

      * 2 32

      * 3 62

      * 设计思路：

      * 使用reduce自带的默认排序规则。MapReduce按照key值进行排序。如果Key值为Intwritable类型，则按照数字大小排序

      * 如果key值为Text类型，则按照字典顺序对字符串进行排序。

      * 注意：要重写Partition函数。Reduce排序只能保证自己局部的数据顺序，并不能保证全局的。

      * */

     public static class Map extends Mapper<LongWritable,Text,IntWritable,IntWritable>{

         private IntWritable line=new IntWritable();

         public void map(LongWritable key,Text value,Context context)throws IOException,InterruptedException{

             line.set(Integer.parseInt(value.toString()));

             context.write(line, new IntWritable(1));

         }

     }

     public static class Reduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{

         private IntWritable num=new IntWritable(1);

         public void reduce(IntWritable key,Iterable<IntWritable> values,Context context)throws IOException,InterruptedException{

             for(IntWritable var:values){

             context.write(num, key);

             num=new IntWritable(num.get()+1);

             }

         }

     }

     public static class Partition extends Partitioner<IntWritable ,IntWritable>{

         @Override

         public int getPartition(IntWritable key, IntWritable value, int numPartitions) {

             // TODO Auto-generated method stub

             System.out.println(numPartitions);

             int maxnum=65223;

             int bound=maxnum/numPartitions+1;

             for(int i=0;i<numPartitions;i++)

             {

                 if(key.get()>=bound*(i-1)&&key.get()<=bound*i)

                 {

                     return i;

                 }

             }

             return 0;

         }

     }

     public int run(String[] args)throws Exception{

         Configuration conf=new Configuration();

         Job job=new Job(conf,"Sort");

         job.setJarByClass(Sort.class);

         job.setOutputKeyClass(IntWritable.class);

         job.setOutputValueClass(IntWritable.class);

         job.setMapperClass(Map.class);

         job.setReducerClass(Reduce.class);

         job.setPartitionerClass(Partition.class);

         job.setInputFormatClass(TextInputFormat.class);

         job.setOutputFormatClass(TextOutputFormat.class);

         FileInputFormat.addInputPath(job, new Path(args[0]));

         FileOutputFormat.setOutputPath(job, new Path(args[1]));

         boolean success=job.waitForCompletion(true);

         return success?0:1;

     }

     public static void main(String[] args)throws Exception{

         int ret=ToolRunner.run(new Sort(), args);

         System.exit(ret);

     }

 }

map/reduce实现排序的更多相关文章

python--函数式编程 (高阶函数(map , reduce ,filter,sorted)，匿名函数(lambda))
1.1函数式编程面向过程编程:我们通过把大段代码拆成函数,通过一层一层的函数,可以把复杂的任务分解成简单的任务,这种一步一步的分解可以称之为面向过程的程序设计.函数就是面向过程的程序设计的基本单元. ...
map reduce
作者:Coldwings链接:https://www.zhihu.com/question/29936822/answer/48586327来源:知乎著作权归作者所有,转载请联系作者获得授权. 简单的 ...
Map/Reduce个人实战--生成数据测试集
背景: 在大数据领域, 由于各方面的原因. 有时需要自己来生成测试数据集, 由于测试数据集较大, 因此采用Map/Reduce的方式去生成. 在这小编(mumuxinfei)结合自身的一些实战经历, ...
用通俗易懂的大白话讲解Map/Reduce原理
Hadoop简介 Hadoop就是一个实现了Google云计算系统的开源系统,包括并行计算模型Map/Reduce,分布式文件系统HDFS,以及分布式数据库Hbase,同时Hadoop的相关项目也很丰 ...
Map/Reduce之间的Partitioner接口
一.Partitioner介绍 Partitioner的作用是对Mapper产生的中间结果进行分片,以便将同一分组的数据交给同一个Reduce处理,它直接影响Reduce阶段的负载均衡(个人理解:就是 ...
分布式基础学习（2）分布式计算系统（Map/Reduce）
二. 分布式计算(Map/Reduce) 分布式式计算,同样是一个宽泛的概念,在这里,它狭义的指代,按Google Map/Reduce框架所设计的分布式框架.在Hadoop中,分布式文件系统,很 ...
python笔记十四（高阶函数——map/reduce、filter、sorted）
一.map/reduce 1.map() map(f,iterable),将一个iterable对象一次作用于函数f,并返回一个迭代器. >>> def f(x): #定义一个函数 ...
hadoop入门级总结二：Map/Reduce
在上一篇博客:hadoop入门级总结一:HDFS中,简单的介绍了hadoop分布式文件系统HDFS的整体框架及文件写入读出机制.接下来,简要的总结一下hadoop的另外一大关键技术之一分布式计算框架: ...
Map Reduce和流处理
欢迎大家前往腾讯云+社区,获取更多腾讯海量技术实践干货哦~ 本文由@从流域到海域翻译,发表于腾讯云+社区 map()和reduce()是在集群式设备上用来做大规模数据处理的方法,用户定义一个特定的映射 ...

随机推荐

mysql导入导出.sql文件备份还原数据库
从数据库导出数据库文件: 进入你的MySQL的安装目录的bin目录或者在C盘的根目录都行,我选的是在bin目录下,下面的例子出第一个外将以在C盘的根目录来讲解我的mysql安装在了C盘,C: ...
PHP & Javascript 如何对字符串中包含html标签进行编码整理
为什么要对字符串编码? 某些字符串中包含html标签,不编码,页面输出就乱了. PHP下怎么对字符串编码? htmlentities vs htmlspecialchars htmlentities ...
Json 数组排序
/*********************************************Json 数组排序 ******************************************** ...
PHP通过（PDO）Mysql表字段一键生成创建sqlite的SQL
首发于:http://www.zzzzy.com/201406053158.html /** * Mysql表字段一键生成创建sqlite的SQL 2 * @author: Skiychan < ...
jsp查询页面和结果页面在同一页面显示和交互
用frameset实现查询页面和结果页面在同一页面用target实现交互显示在同一页面上请参照以下方法解决: main.jsp: <html> <head> <met ...
QQ宠物吹泡泡游戏小助手 VC++6.0代码分析
最近玩QQ宠物,他总是心情低落,让我很不爽,让他玩耍吧,还得自己点鼠标,所以想偷个懒,试试能不能编个程序让电脑帮我做这个事情. 要干这件事就得先找一个游戏开刀,刚开始我找的是弹力球游戏,不就是点鼠标么 ...
windows 远程桌面连接ubuntu xrdp 只看到墙纸其他什么都没有
用 windows 的 mstsc 连接 ubuntu 的 xrdp 时,进入后只看到墙纸,其他什么都没有,鼠标指针也不见,输入按键都无反应. 原来 Ubuntu 启动了 3d 桌面,导致 xrdp ...
BZOJ 1612: [Usaco2008 Jan]Cow Contest奶牛的比赛
Description FJ的N(1 <= N <= 100)头奶牛们最近参加了场程序设计竞赛:).在赛场上,奶牛们按1..N依次编号.每头奶牛的编程能力不尽相同,并且没有哪两头奶牛的水平 ...
ECshop 在迁移到 PHP7 时遇到的兼容性问题
在 PHP7 上安装 ECShop V2.7.3时,报错! Deprecated: Methods with the same name as their class will not be cons ...
poi 操作excel
poi操作创建一个excel关联对象HSSFWorkbook: HSSFWorkbook book = new HSSFWorkbook(); 创建一个sheet: HSSFSheet st = b ...

map/reduce实现 排序

map/reduce实现 排序的更多相关文章

随机推荐

热门专题

map/reduce实现排序

map/reduce实现排序的更多相关文章