需求

  1. 1、对原始json数据进行解析,变成普通文本数据
  2.  
  3. 2、求出每个人评分最高的3部电影
  4.  
  5. 3、求出被评分次数最多的3部电影

数据

https://pan.baidu.com/s/1gPsQXVYSQEZ2OYek4HxK6A

pom.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  4.  
  5. <modelVersion>4.0.0</modelVersion>
  6.  
  7. <groupId>com.cyf</groupId>
  8. <artifactId>MapReduceCases</artifactId>
  9. <packaging>jar</packaging>
  10. <version>1.0</version>
  11.  
  12. <properties>
  13. <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  14. <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
  15. </properties>
  16. <dependencies>
  17. <dependency>
  18. <groupId>org.apache.hadoop</groupId>
  19. <artifactId>hadoop-common</artifactId>
  20. <version>2.6.4</version>
  21. </dependency>
  22. <dependency>
  23. <groupId>org.apache.hadoop</groupId>
  24. <artifactId>hadoop-hdfs</artifactId>
  25. <version>2.6.4</version>
  26. </dependency>
  27. <dependency>
  28. <groupId>org.apache.hadoop</groupId>
  29. <artifactId>hadoop-client</artifactId>
  30. <version>2.6.4</version>
  31. </dependency>
  32. <dependency>
  33. <groupId>org.apache.hadoop</groupId>
  34. <artifactId>hadoop-mapreduce-client-core</artifactId>
  35. <version>2.6.4</version>
  36. </dependency>
  37.  
  38. <dependency>
  39. <groupId>com.alibaba</groupId>
  40. <artifactId>fastjson</artifactId>
  41. <version>1.1.40</version>
  42. </dependency>
  43.  
  44. <dependency>
  45. <groupId>mysql</groupId>
  46. <artifactId>mysql-connector-java</artifactId>
  47. <version>5.1.36</version>
  48. </dependency>
  49. </dependencies>
  50.  
  51. <build>
  52. <plugins>
  53. <plugin>
  54. <artifactId>maven-assembly-plugin</artifactId>
  55. <configuration>
  56. <appendAssemblyId>false</appendAssemblyId>
  57. <descriptorRefs>
  58. <descriptorRef>jar-with-dependencies</descriptorRef>
  59. </descriptorRefs>
  60. <archive>
  61. <manifest>
  62. <mainClass>cn.itcast.mapreduce.json.JsonToText</mainClass>
  63. </manifest>
  64. </archive>
  65. </configuration>
  66. <executions>
  67. <execution>
  68. <id>make-assembly</id>
  69. <phase>package</phase>
  70. <goals>
  71. <goal>assembly</goal>
  72. </goals>
  73. </execution>
  74. </executions>
  75. </plugin>
  76. </plugins>
  77. </build>
  78.  
  79. </project>
  1. package cn.itcast.mapreduce.json;
  2.  
  3. import java.io.DataInput;
  4. import java.io.DataOutput;
  5. import java.io.IOException;
  6.  
  7. import org.apache.hadoop.io.NullWritable;
  8. import org.apache.hadoop.io.WritableComparable;
  9.  
  10. public class OriginBean implements WritableComparable<OriginBean> {
  11.  
  12. private Long movie;
  13.  
  14. private Long rate;
  15.  
  16. private Long timeStamp;
  17.  
  18. private Long uid;
  19.  
  20. public Long getMovie() {
  21. return movie;
  22. }
  23.  
  24. public void setMovie(Long movie) {
  25. this.movie = movie;
  26. }
  27.  
  28. public Long getRate() {
  29. return rate;
  30. }
  31.  
  32. public void setRate(Long rate) {
  33. this.rate = rate;
  34. }
  35.  
  36. public Long getTimeStamp() {
  37. return timeStamp;
  38. }
  39.  
  40. public void setTimeStamp(Long timeStamp) {
  41. this.timeStamp = timeStamp;
  42. }
  43.  
  44. public Long getUid() {
  45. return uid;
  46. }
  47.  
  48. public void setUid(Long uid) {
  49. this.uid = uid;
  50. }
  51.  
  52. public OriginBean(Long movie, Long rate, Long timeStamp, Long uid) {
  53. this.movie = movie;
  54. this.rate = rate;
  55. this.timeStamp = timeStamp;
  56. this.uid = uid;
  57. }
  58.  
  59. public OriginBean() {
  60. // TODO Auto-generated constructor stub
  61. }
  62.  
  63. public int compareTo(OriginBean o) {
  64. return this.movie.compareTo(o.movie);
  65. }
  66.  
  67. public void write(DataOutput out) throws IOException {
  68. out.writeLong(movie);
  69. out.writeLong(rate);
  70. out.writeLong(timeStamp);
  71. out.writeLong(uid);
  72. }
  73.  
  74. public void readFields(DataInput in) throws IOException {
  75. this.movie = in.readLong();
  76. this.rate = in.readLong();
  77. this.timeStamp = in.readLong();
  78. this.uid = in.readLong();
  79. }
  80.  
  81. @Override
  82. public String toString() {
  83. return this.movie + "\t" + this.rate + "\t" + this.timeStamp + "\t" + this.uid;
  84. }
  85.  
  86. }
  1. package cn.itcast.mapreduce.json;
  2.  
  3. import java.io.IOException;
  4.  
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.LongWritable;
  8. import org.apache.hadoop.io.NullWritable;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.mapreduce.Job;
  11. import org.apache.hadoop.mapreduce.Mapper;
  12. import org.apache.hadoop.mapreduce.Reducer;
  13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  14. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  15. import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
  16. import org.codehaus.jackson.map.ObjectMapper;
  17.  
  18. import com.alibaba.fastjson.JSON;
  19. import com.alibaba.fastjson.JSONObject;
  20.  
  21. public class JsonToText {
  22.  
  23. static class MyMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
  24.  
  25. Text k = new Text();
  26.  
  27. @Override
  28. protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  29.  
  30. // Bean bean = mapper.readValue(value.toString(), Bean.class);
  31.  
  32. JSONObject valueJson = JSON.parseObject(value.toString());
  33.  
  34. Long movie = valueJson.getLong("movie");
  35.  
  36. OriginBean bean = new OriginBean(movie, valueJson.getLong("rate"), valueJson.getLong("timeStamp"), valueJson.getLong("uid"));
  37. k.set(bean.toString());
  38. context.write(k, NullWritable.get());
  39. }
  40. }
  41.  
  42. public static void main(String[] args) throws Exception {
  43.  
  44. Configuration conf = new Configuration();
  1. //16777216/1024/1024=16 (62.5M/16)4个切片,启动4个maptask,处理结果4个文件
  1. conf.set("mapreduce.input.fileinputformat.split.maxsize", "16777216"); Job job = Job.getInstance(conf); // job.setJarByClass(JsonToText.class); //告诉框架,我们的程序所在jar包的位置 job.setJar("/root/JsonToText.jar"); job.setMapperClass(MyMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); // job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setNumReduceTasks(0); FileInputFormat.setInputPaths(job, new Path("/json/input")); FileOutputFormat.setOutputPath(job, new Path("/json/output")); // FileInputFormat.setInputPaths(job, new Path(args[0])); // FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); } }

创建文件夹 并上传数据

hadoop fs -mkdir -p /json/input

hadoop fs -put rating.json /json/input

运行

hadoop jar JsonToText.jar cn.itcast.mapreduce.json.JsonToText

运行结果

https://pan.baidu.com/s/1ayrpl7w8Dlzpc7TRZIO94w

pom.xml

  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  3. xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  4.  
  5. <modelVersion>4.0.0</modelVersion>
  6.  
  7. <groupId>com.cyf</groupId>
  8. <artifactId>MapReduceCases</artifactId>
  9. <packaging>jar</packaging>
  10. <version>1.0</version>
  11.  
  12. <properties>
  13. <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
  14. <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
  15. </properties>
  16. <dependencies>
  17. <dependency>
  18. <groupId>org.apache.hadoop</groupId>
  19. <artifactId>hadoop-common</artifactId>
  20. <version>2.6.4</version>
  21. </dependency>
  22. <dependency>
  23. <groupId>org.apache.hadoop</groupId>
  24. <artifactId>hadoop-hdfs</artifactId>
  25. <version>2.6.4</version>
  26. </dependency>
  27. <dependency>
  28. <groupId>org.apache.hadoop</groupId>
  29. <artifactId>hadoop-client</artifactId>
  30. <version>2.6.4</version>
  31. </dependency>
  32. <dependency>
  33. <groupId>org.apache.hadoop</groupId>
  34. <artifactId>hadoop-mapreduce-client-core</artifactId>
  35. <version>2.6.4</version>
  36. </dependency>
  37.  
  38. <dependency>
  39. <groupId>com.alibaba</groupId>
  40. <artifactId>fastjson</artifactId>
  41. <version>1.1.40</version>
  42. </dependency>
  43.  
  44. <dependency>
  45. <groupId>mysql</groupId>
  46. <artifactId>mysql-connector-java</artifactId>
  47. <version>5.1.36</version>
  48. </dependency>
  49. </dependencies>
  50.  
  51. <build>
  52. <plugins>
  53. <plugin>
  54. <artifactId>maven-assembly-plugin</artifactId>
  55. <configuration>
  56. <appendAssemblyId>false</appendAssemblyId>
  57. <descriptorRefs>
  58. <descriptorRef>jar-with-dependencies</descriptorRef>
  59. </descriptorRefs>
  60. <archive>
  61. <manifest>
  62. <mainClass>cn.itcast.mapreduce.json.MovieRateSum</mainClass>
  63. </manifest>
  64. </archive>
  65. </configuration>
  66. <executions>
  67. <execution>
  68. <id>make-assembly</id>
  69. <phase>package</phase>
  70. <goals>
  71. <goal>assembly</goal>
  72. </goals>
  73. </execution>
  74. </executions>
  75. </plugin>
  76. </plugins>
  77. </build>
  78.  
  79. </project>
  1. package cn.itcast.mapreduce.json;
  2.  
  3. import java.io.DataInput;
  4. import java.io.DataOutput;
  5. import java.io.IOException;
  6.  
  7. import org.apache.hadoop.io.WritableComparable;
  8.  
  9. public class ResultBean implements WritableComparable<ResultBean> {
  10.  
  11. private Long movie;
  12. private Long sumRate;
  13.  
  14. public void setSumRate(long sumRate) {
  15. this.sumRate = sumRate;
  16. }
  17.  
  18. public Long getMovie() {
  19. return movie;
  20. }
  21.  
  22. public void setMovie(Long movie) {
  23. this.movie = movie;
  24. }
  25.  
  26. public ResultBean(Long movie, Long sumRate) {
  27. this.movie = movie;
  28. this.sumRate = sumRate;
  29. }
  30.  
  31. public ResultBean() {
  32. // TODO Auto-generated constructor stub
  33. }
  34.  
  35. public int compareTo(ResultBean o) {
  36. if (this.movie - o.movie != 0) {
  37. return (int) (this.movie - o.movie);
  38. }
  39. return (int) (o.sumRate - this.sumRate);
  40. }
  41.  
  42. public void write(DataOutput out) throws IOException {
  43. out.writeLong(movie);
  44. out.writeLong(sumRate);
  45. }
  46.  
  47. public ResultBean(Long sumRate) {
  48. super();
  49. this.sumRate = sumRate;
  50. }
  51.  
  52. public void readFields(DataInput in) throws IOException {
  53. this.movie = in.readLong();
  54. this.sumRate = in.readLong();
  55. }
  56.  
  57. @Override
  58. public String toString() {
  59. //return movie + "\t" + sumRate;
  60. return movie + "\t" + sumRate;
  61. }
  62.  
  63. }
  1. package cn.itcast.mapreduce.json;
  2.  
  3. import java.io.IOException;
  4.  
  5. import org.apache.hadoop.conf.Configuration;
  6. import org.apache.hadoop.fs.Path;
  7. import org.apache.hadoop.io.LongWritable;
  8. import org.apache.hadoop.io.NullWritable;
  9. import org.apache.hadoop.io.Text;
  10. import org.apache.hadoop.mapreduce.Job;
  11. import org.apache.hadoop.mapreduce.Mapper;
  12. import org.apache.hadoop.mapreduce.Reducer;
  13. import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
  14. import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
  15. import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
  16. import org.codehaus.jackson.map.ObjectMapper;
  17.  
  18. import com.alibaba.fastjson.JSON;
  19. import com.alibaba.fastjson.JSONObject;
  20.  
  21. public class MovieRateSum {
  22.  
  23. static class MyMapper extends Mapper<LongWritable, Text, LongWritable, OriginBean> {
  24.  
  25. ObjectMapper mapper = new ObjectMapper();
  26.  
  27. @Override
  28. protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
  29.  
  30. // Bean bean = mapper.readValue(value.toString(), Bean.class);
  31.  
  32. JSONObject valueJson = JSON.parseObject(value.toString());
  33.  
  34. Long movie = valueJson.getLong("movie");
  35.  
  36. OriginBean bean = new OriginBean(movie, valueJson.getLong("rate"), valueJson.getLong("timeStamp"), valueJson.getLong("uid"));
  37.  
  38. context.write(new LongWritable(bean.getMovie()), bean);
  39. }
  40. }
  41.  
  42. static class MyReduce extends Reducer<LongWritable, OriginBean, ResultBean, NullWritable> {
  43.  
  44. @Override
  45. protected void reduce(LongWritable movie, Iterable<OriginBean> beans, Context context) throws IOException, InterruptedException {
  46.  
  47. long sum = 0L;
  48.  
  49. for (OriginBean bean : beans) {
  50. sum += bean.getRate();
  51. }
  52. ResultBean bean = new ResultBean();
  53. bean.setMovie(movie.get());
  54. bean.setSumRate(sum);
  55. context.write(bean, NullWritable.get());
  56. }
  57.  
  58. }
  59.  
  60. public static void main(String[] args) throws Exception {
  61.  
  62. Configuration conf = new Configuration();
  63.  
  64. Job job = Job.getInstance(conf);
  65.  
  66. // job.setJarByClass(MovieRateSum.class);
  67. //告诉框架,我们的程序所在jar包的位置
  68. job.setJar("/root/MovieRateSum.jar");
  69. job.setMapperClass(MyMapper.class);
  70. job.setReducerClass(MyReduce.class);
  71.  
  72. job.setMapOutputKeyClass(LongWritable.class);
  73. job.setMapOutputValueClass(OriginBean.class);
  74.  
  75. job.setOutputKeyClass(ResultBean.class);
  76. job.setOutputValueClass(NullWritable.class);
  77.  
  78. job.setOutputFormatClass(SequenceFileOutputFormat.class);
  79.  
  80. FileInputFormat.setInputPaths(job, new Path("/json/output"));
  81. FileOutputFormat.setOutputPath(job, new Path("/json/output-seq"));
  82. // FileInputFormat.setInputPaths(job, new Path(args[0]));
  83. // FileOutputFormat.setOutputPath(job, new Path(args[1]));
  84.  
  85. job.waitForCompletion(true);
  86. }
  87.  
  88. }

大数据学习——mapreduce运营商日志增强的更多相关文章

  1. 大数据学习——点击流日志每天都10T,在业务应用服务器上,需要准实时上传至(Hadoop HDFS)上

    点击流日志每天都10T,在业务应用服务器上,需要准实时上传至(Hadoop HDFS)上 1需求说明 点击流日志每天都10T,在业务应用服务器上,需要准实时上传至(Hadoop HDFS)上 2需求分 ...

  2. 大数据学习——mapreduce案例join算法

    需求: 用mapreduce实现select order.orderid,order.pdtid,pdts.pdt_name,oder.amount from orderjoin pdtson ord ...

  3. 大数据学习——mapreduce学习topN问题

    求每一个订单中成交金额最大的那一笔  top1 数据 Order_0000001,Pdt_01,222.8 Order_0000001,Pdt_05,25.8 Order_0000002,Pdt_05 ...

  4. 大数据学习——mapreduce共同好友

    数据 commonfriends.txt A:B,C,D,F,E,O B:A,C,E,K C:F,A,D,I D:A,E,F,L E:B,C,D,M,L F:A,B,C,D,E,O,M G:A,C,D ...

  5. 大数据学习——mapreduce倒排索引

    数据 a.txt hello jerry hello tom b.txt allen tom allen jerry allen hello c.txt hello jerry hello tom 1 ...

  6. 大数据学习——mapreduce汇总手机号上行流量下行流量总流量

    时间戳 手机号 MAC地址 ip 域名 上行流量包个数 下行 上行流量 下行流量 http状态码 1363157995052 13826544101 5C-0E-8B-C7-F1-E0:CMCC 12 ...

  7. 大数据学习——mapreduce程序单词统计

    项目结构 pom.xml文件 <?xml version="1.0" encoding="UTF-8"?> <project xmlns=&q ...

  8. 大数据学习——MapReduce学习——字符统计WordCount

    操作背景 jdk的版本为1.8以上 ubuntu12 hadoop2.5伪分布 安装 Hadoop-Eclipse-Plugin 要在 Eclipse 上编译和运行 MapReduce 程序,需要安装 ...

  9. 大数据学习系列之七 ----- Hadoop+Spark+Zookeeper+HBase+Hive集群搭建 图文详解

    引言 在之前的大数据学习系列中,搭建了Hadoop+Spark+HBase+Hive 环境以及一些测试.其实要说的话,我开始学习大数据的时候,搭建的就是集群,并不是单机模式和伪分布式.至于为什么先写单 ...

随机推荐

  1. Codeforces Round #410 (Div. 2) C

    Description Mike has a sequence A = [a1, a2, ..., an] of length n. He considers the sequence B = [b1 ...

  2. 1-17finally关键字

    finally的特点 被finally控制的语句体一定会执行,除非在执行finally语句体之前JVM退出(比如System.exit(0)),一般用于关闭资源 finally如何使用? finall ...

  3. Java GC基础

    Java的垃圾回收机制负责回收无用对象占据的内存资源,但是有特殊情况:假定对象不是使用new关键字获得了一块儿“特殊”的内存区域,

  4. AJPFX的反射学习笔记

    反射是描述 数据结构的结构        属性.方法(数据)元数据        类(数据结构)描述数据的结构-->类也是特殊的对象---->元数据        CLASS类 描述数据结 ...

  5. IP查询系统的异步回调案例

    package com.lxj.demo; import java.io.BufferedReader; import java.io.IOException; import java.io.Inpu ...

  6. 获取当前目录 文件输出html 网页查看

    @echo off setlocal set LISTFILE=list.html echo MAKING LISTFILE … (PLEASE WAIT) echo ^<!doctype ht ...

  7. upupw nginx服务器 rewrite设置

    最近开始尝试使用upupw的Nginx套件做开发,感觉还挺不错的,也遇到了一些问题,决定在这里记录一下,同时也希望可以帮助到一些人. 用习惯了Apache,改用Nginx之后会有些不适应,但是咬咬牙就 ...

  8. 1《数学之美》第1章 文字和语言 vs 数字和信息

    1<数学之美>第1章 文字和语言 vs 数字和信息

  9. Unity复杂的旋转-欧拉角和四元数

    一.欧拉角欧拉角最容易表示,用三个变量X,Y,Z可以直观的表示绕着某个轴的旋转角度. 在Unity里就是Transform组件的Rotation里的X Y Z三个变量代表了欧拉角 二.四元数四元数相比 ...

  10. oracle的Hint

    与优化器模式相关的Hint 1 ALl_ROWS   让优化器启用CBO /*+ all_rows */ 2  first_rows(n)     让优化器启用CBO 模式,而且得到目标sql的执行计 ...