字段解释: product_no:用户手机号; lac_id:用户所在基站; start_time:用户在此基站的开始时间; staytime:用户在此基站的逗留时间。

  1. product_no lac_id moment start_time user_id county_id staytime city_id
  2. -- ::19.151754088
  3. -- ::20.152622488
  4. -- ::37.149593624
  5. -- ::51.139539816
  6. -- ::45.150276800
  7. -- ::38.140225200
  8. -- ::19.151754088
  9. -- ::32.151754088
  10. -- ::24.139539816
  11. -- ::30.152622440

需求描述:  根据 lac_id 和 start_time 知道用户当时的位置,根据 staytime 知道用户各个基站的逗留时长。根据轨迹合并连续基站的 staytime。最终得到每一个用户按时间排序在每一个基站驻留时长。
期望输出:

  1. -- ::20.152622488
  2. -- ::37.149593624
  3. -- ::38.140225200
  4. -- ::51.139539816
  5. -- ::45.150276800

问题分析:针对每个product_no按照start_time进行排序(本例降序),如果相邻两项的lac_id相同,则将staytime进行相加保存到后一项中,并将前一项移除。

完整代码v1:此版本只启用了Map阶段。map()函数:将每行内容解析成自定义的RecordWritable对象并添加到List集合中,然后对List集合进行排序。clearup()函数:将product_no和lac_id相同的相邻两项中的staytime进行相加。

缺点:将全部数据添加到List集合,对于大数据量无法满足要求。

  1. package demo0902;
  2.  
  3. import java.io.DataInput;
  4. import java.io.DataOutput;
  5. import java.io.IOException;
  6. import java.util.ArrayList;
  7. import java.util.Collections;
  8. import java.util.Comparator;
  9.  
  10. import org.apache.hadoop.conf.Configuration;
  11. import org.apache.hadoop.fs.Path;
  12. import org.apache.hadoop.io.LongWritable;
  13. import org.apache.hadoop.io.NullWritable;
  14. import org.apache.hadoop.io.Text;
  15. import org.apache.hadoop.io.WritableComparable;
  16. import org.apache.hadoop.mapreduce.Job;
  17. import org.apache.hadoop.mapreduce.Mapper;
  18. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  19. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  20.  
  21. public class Demo090203 {
  22. final static String INPUT_PATH = "hdfs://10.16.17.182:9000/test/in/0902/";
  23. final static String OUT_PATH = "hdfs://10.16.17.182:9000/test/out/0902/06";
  24.  
  25. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
  26. Configuration configuration = new Configuration();
  27. Job job = Job.getInstance(configuration);
  28. job.setJarByClass(Demo090203.class);
  29.  
  30. //指定map
    job.setMapperClass(Demo090201Mapper.class);
  31.  
  32. job.setMapOutputKeyClass(RecordWritable.class);
  33. job.setMapOutputValueClass(NullWritable.class);
  34.  
  35. job.setOutputKeyClass(RecordWritable.class);
  36. job.setOutputValueClass(NullWritable.class);
  37.  
  38. FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
  39. FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
  40.  
  41. job.waitForCompletion(true);
  42.  
  43. }
  44. //map
  45. public static class Demo090201Mapper extends Mapper<LongWritable, Text, RecordWritable, NullWritable>{
  46.  
  47. //存储一条记录
  48. ArrayList<RecordWritable> list = new ArrayList<RecordWritable>();
  49.  
  50. @Override
  51. protected void map(LongWritable key, Text value, Context context)
  52. throws IOException, InterruptedException {
  53. String[] splited = value.toString().split("\t");
  54.  
  55. //将一行内容组装成一条记录
  56. RecordWritable record = new RecordWritable();
  57. record.product_no=splited[0];
  58. record.lac_id=splited[1];
  59. record.moment=Integer.parseInt(splited[2]);
  60. record.start_time=splited[3];
  61. record.user_id=splited[4];
  62. record.county_id=splited[5];
  63. record.staytime=Integer.parseInt(splited[6]);
  64. record.city_id=splited[7];
  65.  
  66. list.add(record);
  67.  
  68. //对List中数据进行排序(自定义比较器)
  69. Collections.sort(list, new Comparator<RecordWritable>() {
  70. @Override
  71. public int compare(RecordWritable r1, RecordWritable r2) {
  72.  
  73. //调用RecordWritable的compareTo()方法
  74. return (r1.compareTo(r2));
  75. }
  76. });
  77. }
  78.  
  79. @Override
  80. protected void cleanup(Context context)
  81. throws IOException, InterruptedException {
  82.  
  83. for(RecordWritable r : list){
  84. System.out.println(r.toString());
  85. }
  86.  
  87. for(int i=0; i<list.size() ;i++){
  88. if(i != list.size()-1){
  89.  
  90. //取出相邻的两个RecordWritable
  91. RecordWritable record_pre = list.get(i);
  92. RecordWritable record_next = list.get(i+1);
  93.  
  94. //只有手机号和基站号都相等的情况下,才将 staytime 相加
  95. if(record_pre.product_no.equals(record_next.product_no) && record_pre.lac_id.equals(record_next.lac_id)){
  96.  
  97. //将相加后的staytime赋予后一条记录
  98. record_next.staytime += record_pre.staytime;
  99.  
  100. //移除前一条记录
  101. list.remove(record_pre);
  102. }
  103. }
  104. }
  105. for(RecordWritable record : list){
  106. context.write(record, NullWritable.get());
  107. }
  108. }
  109. }
  110.  
  111. //自定义的序列化类
  112. public static class RecordWritable implements WritableComparable<RecordWritable>{
  113. String product_no;
  114. String lac_id;
  115. int moment;
  116. String start_time;
  117. String user_id;
  118. String county_id;
  119. int staytime;
  120. String city_id;
  121.  
  122. @Override
  123. public int compareTo(RecordWritable o) {
  124. // 先按手机号排序 Asc
  125. int value = this.product_no.compareTo(o.product_no);
  126. if(value==0)
  127. // 再按时间进行排序 Desc
  128. return o.start_time.compareTo(this.start_time);
  129. return value;
  130. }
  131.  
  132. @Override
  133. public void write(DataOutput out) throws IOException {
  134. out.writeUTF(product_no);
  135. out.writeUTF(lac_id);
  136. out.writeInt(moment);
  137. out.writeUTF(start_time);
  138. out.writeUTF(user_id);
  139. out.writeUTF(county_id);
  140. out.writeInt(staytime);
  141. out.writeUTF(city_id);
  142. }
  143.  
  144. @Override
  145. public void readFields(DataInput in) throws IOException {
  146. product_no=in.readUTF();
  147. lac_id=in.readUTF();
  148. moment=in.readInt();
  149. start_time=in.readUTF();
  150. user_id=in.readUTF();
  151. county_id=in.readUTF();
  152. staytime=in.readInt();
  153. city_id=in.readUTF();
  154. }
  155.  
  156. @Override
  157. public String toString() {
  158. return this.product_no+" "+this.lac_id+" "+this.moment+" "+this.start_time+" "+user_id+" "+county_id+" "+ staytime+" "+city_id;
  159. }
  160. }
  161. }

完整代码v2:此版本Map阶段以product_no为key,每行内容为value进行输出。Reduce阶段和上一个版本的Map阶段功能类似。

优点:相比于v1,此版本优化在于每次只处理一个product_no相关的数据,减缓数据量带来的压力。

  1. package demo0902;
  2.  
  3. import java.io.DataInput;
  4. import java.io.DataOutput;
  5. import java.io.IOException;
  6. import java.util.ArrayList;
  7. import java.util.Collections;
  8. import java.util.Comparator;
  9.  
  10. import org.apache.hadoop.conf.Configuration;
  11. import org.apache.hadoop.fs.Path;
  12. import org.apache.hadoop.io.LongWritable;
  13. import org.apache.hadoop.io.NullWritable;
  14. import org.apache.hadoop.io.Text;
  15. import org.apache.hadoop.io.WritableComparable;
  16. import org.apache.hadoop.mapreduce.Job;
  17. import org.apache.hadoop.mapreduce.Mapper;
  18. import org.apache.hadoop.mapreduce.Reducer;
  19. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  20. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  21.  
  22. public class Demo090204 {
  23. final static String INPUT_PATH = "hdfs://10.16.17.182:9000/test/in/0902/";
  24. final static String OUT_PATH = "hdfs://10.16.17.182:9000/test/out/0902/02";
  25.  
  26. public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
  27. Configuration configuration = new Configuration();
  28. Job job = Job.getInstance(configuration);
  29. job.setJarByClass(Demo090203.class);
  30.  
  31. job.setMapperClass(Demo090201Mapper.class);
  32. job.setReducerClass(Demo090201Reducer.class);
  33.  
  34. job.setMapOutputKeyClass(Text.class);
  35. job.setMapOutputValueClass(Text.class);
  36.  
  37. job.setOutputKeyClass(RecordWritable.class);
  38. job.setOutputValueClass(NullWritable.class);
  39.  
  40. FileInputFormat.setInputPaths(job, new Path(INPUT_PATH));
  41. FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
  42.  
  43. job.waitForCompletion(true);
  44.  
  45. }
  46. //map
  47. public static class Demo090201Mapper extends Mapper<LongWritable, Text, Text, Text>{
  48. @Override
  49. protected void map(LongWritable key, Text value, Context context)
  50. throws IOException, InterruptedException {
  51. String[] splited = value.toString().split("\t");
  52.  
  53. context.write(new Text(splited[0]), new Text(value));
  54. }
  55. }
  56.  
  57. //reduce
  58. public static class Demo090201Reducer extends Reducer<Text, Text, RecordWritable, NullWritable>{
  59. @Override
  60. protected void reduce(Text key, Iterable<Text> v2s, Context context)
  61. throws IOException, InterruptedException {
  62.  
  63. ArrayList<RecordWritable> list = new ArrayList<RecordWritable>();
  64.  
  65. for(Text text : v2s){
  66. String[] splited = text.toString().split("\t");
  67.  
  68. RecordWritable record = new RecordWritable();
  69. record.product_no=splited[0];
  70. record.lac_id=splited[1];
  71. record.moment=Integer.parseInt(splited[2]);
  72. record.start_time=splited[3];
  73. record.user_id=splited[4];
  74. record.county_id=splited[5];
  75. record.staytime=Integer.parseInt(splited[6]);
  76. record.city_id=splited[7];
  77.  
  78. list.add(record);
  79. }
  80.  
  81. //对List中数据进行排序(自定义比较器)
  82. Collections.sort(list, new Comparator<RecordWritable>() {
  83. @Override
  84. public int compare(RecordWritable r1, RecordWritable r2) {
  85.  
  86. //调用RecordWritable的compareTo()方法
  87. return (r1.compareTo(r2));
  88. }
  89. });
  90.  
  91. for(int i=0; i<list.size() ;i++){
  92.  
  93. //滤过最后一条记录
  94. if(i != list.size()-1){
  95.  
  96. //取出相邻的两个RecordWritable
  97. RecordWritable record_pre = list.get(i);
  98. RecordWritable record_next = list.get(i+1);
  99.  
  100. if(record_pre.lac_id.equals(record_next.lac_id)){
  101.  
  102. //将相加后的staytime赋予后一条记录
  103. record_next.staytime += record_pre.staytime;
  104.  
  105. //移除前一条记录
  106. list.remove(record_pre);
  107. }
  108. }
  109. }
  110. for(RecordWritable record : list){
  111. context.write(record, NullWritable.get());
  112. }
  113. }
  114. }
  115. //自定义的序列化类
  116. public static class RecordWritable implements WritableComparable<RecordWritable>{
  117. String product_no;
  118. String lac_id;
  119. int moment;
  120. String start_time;
  121. String user_id;
  122. String county_id;
  123. int staytime;
  124. String city_id;
  125.  
  126. @Override
  127. public int compareTo(RecordWritable o) {
  128. // 先按手机号排序 Asc
  129. int value = this.product_no.compareTo(o.product_no);
  130. if(value==0)
  131. // 再按时间进行排序 Desc
  132. return o.start_time.compareTo(this.start_time);
  133. return value;
  134. }
  135.  
  136. @Override
  137. public void write(DataOutput out) throws IOException {
  138. out.writeUTF(product_no);
  139. out.writeUTF(lac_id);
  140. out.writeInt(moment);
  141. out.writeUTF(start_time);
  142. out.writeUTF(user_id);
  143. out.writeUTF(county_id);
  144. out.writeInt(staytime);
  145. out.writeUTF(city_id);
  146. }
  147.  
  148. @Override
  149. public void readFields(DataInput in) throws IOException {
  150. product_no=in.readUTF();
  151. lac_id=in.readUTF();
  152. moment=in.readInt();
  153. start_time=in.readUTF();
  154. user_id=in.readUTF();
  155. county_id=in.readUTF();
  156. staytime=in.readInt();
  157. city_id=in.readUTF();
  158. }
  159.  
  160. @Override
  161. public String toString() {
  162. return this.product_no+" "+this.lac_id+" "+this.moment+" "+this.start_time+" "+user_id+" "+county_id+" "+ staytime+" "+city_id;
  163. }
  164. }
  165. }

MR案例:基站相关01的更多相关文章

  1. 069 01 Android 零基础入门 01 Java基础语法 09 综合案例-数组移位 01 综合案例-数组移位-案例需求

    069 01 Android 零基础入门 01 Java基础语法 09 综合案例-数组移位 01 综合案例-数组移位-案例需求 本文知识点:综合案例-数组移位-案例需求 说明:因为时间紧张,本人写博客 ...

  2. MR案例:Reduce-Join

    问题描述:两种类型输入文件:address(地址)和company(公司)进行一对多的关联查询,得到地址名(例如:Beijing)与公司名(例如:Beijing JD.Beijing Red Star ...

  3. MR案例:倒排索引

    1.map阶段:将单词和URI组成Key值(如“MapReduce :1.txt”),将词频作为value. 利用MR框架自带的Map端排序,将同一文档的相同单词的词频组成列表,传递给Combine过 ...

  4. MR案例:小文件处理方案

    HDFS被设计来存储大文件,而有时候会有大量的小文件生成,造成NameNode资源的浪费,同时也影响MapReduce的处理效率.有哪些方案可以合并这些小文件,或者提高处理小文件的效率呢? 1). 所 ...

  5. MR案例:倒排索引 && MultipleInputs

    本案例采用 MultipleInputs类 实现多路径输入的倒排索引.解读:MR多路径输入 package test0820; import java.io.IOException; import j ...

  6. GAN︱生成模型学习笔记(运行机制、NLP结合难点、应用案例、相关Paper)

    我对GAN"生成对抗网络"(Generative Adversarial Networks)的看法: 前几天在公开课听了新加坡国立大学[机器学习与视觉实验室]负责人冯佳时博士在[硬 ...

  7. MR案例:CombineFileInputFormat

    CombineFileInputFormat是一个抽象类.Hadoop提供了两个实现类CombineTextInputFormat和CombineSequenceFileInputFormat. 此案 ...

  8. 预测学习、深度生成式模型、DcGAN、应用案例、相关paper

    我对GAN"生成对抗网络"(Generative Adversarial Networks)的看法: 前几天在公开课听了新加坡国立大学[机器学习与视觉实验室]负责人冯佳时博士在[硬 ...

  9. MR案例:输出/输入SequenceFile

    SequenceFile文件是Hadoop用来存储二进制形式的key-value对而设计的一种平面文件(Flat File).在SequenceFile文件中,每一个key-value对被看做是一条记 ...

随机推荐

  1. 170411、java Socket通信的简单例子(UDP)

    服务端代码: package com.bobohe.socket; import java.io.*; import java.net.*; class UDPServer { public stat ...

  2. 160314、MVC设计模式

    MVC的由来 精彩内容 MVC模式最早由Trygve Reenskaug在1978年提出 ,是施乐帕罗奥多研究中心(Xerox PARC)在20世纪80年代为程序语言Smalltalk发明的一种软件设 ...

  3. Struts2的CRUD操作

    Struts之CRUD 1何为CRUD:CRUD代表的是一个框架的Create(增),Read(读取),update(更新),Delete(删除) 2怎么做呢?? 其实Struts2的CRUD与现实的 ...

  4. Incompatible integer to pointer conversion assigning to 'NSInteger *' (aka 'int *') from 'NSInteger' (aka 'int')

    遇到这样的问题: integer to pointer conversion assigning to 'NSInteger *' (aka 'int *') from 'NSInteger' (ak ...

  5. 把www.domain.com均衡到本机不同的端口 反向代理 隐藏端口 Nginx做非80端口转发 搭建nginx反向代理用做内网域名转发 location 规则

    负载均衡-Nginx中文文档 http://www.nginx.cn/doc/example/loadbanlance.html 负载均衡 一个简单的负载均衡的示例,把www.domain.com均衡 ...

  6. 剑指Offer——孩子们的游戏(圆圈中最后剩下的数)

    题目描述: 每年六一儿童节,牛客都会准备一些小礼物去看望孤儿院的小朋友,今年亦是如此.HF作为牛客的资深元老,自然也准备了一些小游戏.其中,有个游戏是这样的:首先,让小朋友们围成一个大圈.然后,他随机 ...

  7. LeetCode_Add Two Numbers

    题目: You are given two linked lists representing two non-negative numbers. The digits are stored in r ...

  8. 将vi or vim中的内容复制到terminal中

    1. 查看 vim 是否支持 clipboard 功能 $ vim --version | grep clipboard 2. 如果有 +clipboard 则跳过这一步; 如果显示的是 -clipb ...

  9. foo ?

    我们经常看到一些基础教程,面试题中经经常使用foo来命名,甚至有时候我们也会用过,可是你是否又知道foo是什么意思?(实际上,知道不知道又不会对你编码有不论什么影响~) 从编程黑马的王轶男的话来解释, ...

  10. python学习笔记(二十二)实例变量、实例方法、类变量、类方法、属性方法、静态方法

    实例变量:在类的声明中,属性是用变量来表示的.这种变量就称为实例变量,也就是成员变量. 实例方法:在类中声明的方法,例如:my(self),必须实例化之后才可以使用,否则会报错. 类变量:公共的变量, ...