Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1（一）

　　　　下面是版本2。

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本2（九）

这篇博客，给大家，体会不一样的版本编程。

代码

package zhouls.bigdata.myMapReduce.weather;

import java.io.DataInput;

import java.io.DataOutput;

import java.io.IOException;

import org.apache.hadoop.io.WritableComparable;

public class MyKey implements WritableComparable<MyKey>{

    //WritableComparable,实现这个方法，要多很多

    //readFields是读入，write是写出

    private int year;

    private int month;

    private double hot;

    public int getYear() {

    return year;

}

    public void setYear(int year) {

        this.year = year;

    }

    public int getMonth() {

        return month;

    }

    public void setMonth(int month) {

        this.month = month;

    }

    public double getHot() {

        return hot;

    }

    public void setHot(double hot) {

        this.hot = hot;

        }//这一大段的get和set，可以右键，source，产生get和set，自动生成。

    public void readFields(DataInput arg0) throws IOException { //反序列化

        this.year=arg0.readInt();

        this.month=arg0.readInt();

        this.hot=arg0.readDouble();

    }

    public void write(DataOutput arg0) throws IOException { //序列化

        arg0.writeInt(year);

        arg0.writeInt(month);

        arg0.writeDouble(hot);

    }

    //判断对象是否是同一个对象，当该对象作为输出的key

    public int compareTo(MyKey o) {

        int r1 =Integer.compare(this.year, o.getYear());//比较当前的年和你传过来的年

        if(r1==){

        int r2 =Integer.compare(this.month, o.getMonth());

        if(r2==){

            return Double.compare(this.hot, o.getHot());

        }else{

            return r2;

        }

        }else{

            return r1;

        }

    }

}

package zhouls.bigdata.myMapReduce.weather;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;

public class MyPartitioner extends HashPartitioner<MyKey, DoubleWritable>{//这里就是洗牌

    //执行时间越短越好

    public int getPartition(MyKey key, DoubleWritable value, int numReduceTasks) {

        return (key.getYear()-)%numReduceTasks;//对于一个数据集，找到最小，1949

    }

}

//1949-10-01 14:21:02    34c

//1949-10-02 14:01:02    36c

//1950-01-01 11:21:02    32c

//1950-10-01 12:21:02    37c

//1951-12-01 12:21:02    23c

//1950-10-02 12:21:02    41c

//1950-10-03 12:21:02    27c

//1951-07-01 12:21:02    45c

//1951-07-02 12:21:02    46c

//1951-07-03 12:21:03    47c

package zhouls.bigdata.myMapReduce.weather;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class MySort extends WritableComparator{

    public MySort(){

        super(MyKey.class,true);//把MyKey传进了

    }

    public int compare(WritableComparable a, WritableComparable b) {//这是排序的精髓

        MyKey k1 =(MyKey) a;

        MyKey k2 =(MyKey) b;

        int r1 =Integer.compare(k1.getYear(), k2.getYear());

        if(r1==){//年相同

        int r2 =Integer.compare(k1.getMonth(), k2.getMonth());

        if(r2==){//月相同

            return -Double.compare(k1.getHot(), k2.getHot());//比较气温

        }else{

            return r2;

        }

        }else{

            return r1;

        }

    }

}

package zhouls.bigdata.myMapReduce.weather;

import org.apache.hadoop.io.WritableComparable;

import org.apache.hadoop.io.WritableComparator;

public class MyGroup extends WritableComparator{

    public MyGroup(){

        super(MyKey.class,true);//把MyKey传进了

}

    public int compare(WritableComparable a, WritableComparable b) {//这是分组的精髓

        MyKey k1 =(MyKey) a;

        MyKey k2 =(MyKey) b;

        int r1 =Integer.compare(k1.getYear(), k2.getYear());

    if(r1==){

        return Integer.compare(k1.getMonth(), k2.getMonth());

    }else{

        return r1;

    }

    }

}

package zhouls.bigdata.myMapReduce.weather;

import java.io.IOException;

import java.text.ParseException;

import java.text.SimpleDateFormat;

import java.util.Calendar;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.NullWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class RunJob {

//    1949-10-01 14:21:02    34c WeatherMapper

//    1949-10-02 14:01:02    36c

//    1950-01-01 11:21:02    32c    分区在MyPartitioner.java

//    1950-10-01 12:21:02    37c

//    1951-12-01 12:21:02    23c    排序在MySort.java

//    1950-10-02 12:21:02    41c

//    1950-10-03 12:21:02    27c    分组在MyGroup.java

//    1951-07-01 12:21:02    45c

//    1951-07-02 12:21:02    46c    再，WeatherReducer

//    1951-07-03 12:21:03    47c

//key：每行第一个隔开符（制表符）左边为key，右边为value    自定义类型MyKey，洗牌，

    static class WeatherMapper extends Mapper<Text, Text, MyKey, DoubleWritable>{

    SimpleDateFormat sdf =new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");

    NullWritable v =NullWritable.get();

//    1949-10-01 14:21:02是自定义类型MyKey，即key

//    34c是DoubleWritable，即value

    protected void map(Text key, Text value,Context context) throws IOException, InterruptedException {

    try {

        Date date =sdf.parse(key.toString());

        Calendar c =Calendar.getInstance();

        //Calendar 类是一个抽象类，可以通过调用 getInstance() 静态方法获取一个 Calendar 对象，

        //此对象已由当前日期时间初始化，即默认代表当前时间，如 Calendar c = Calendar.getInstance();

        c.setTime(date);

        int year =c.get(Calendar.YEAR);

        int month =c.get(Calendar.MONTH);

        double hot =Double.parseDouble(value.toString().substring(, value.toString().lastIndexOf("c")));

        MyKey k =new MyKey();

        k.setYear(year);

        k.setMonth(month);

        k.setHot(hot);

        context.write(k, new DoubleWritable(hot));

    } catch (Exception e) {

        e.printStackTrace();

    }

    }

}

    static class WeatherReducer extends Reducer<MyKey, DoubleWritable, Text, NullWritable>{

    protected void reduce(MyKey arg0, Iterable<DoubleWritable> arg1,Context arg2)throws IOException, InterruptedException {

        int i=;

        for(DoubleWritable v :arg1){

        i++;

        String msg =arg0.getYear()+"\t"+arg0.getMonth()+"\t"+v.get();//"\t"是制表符

        arg2.write(new Text(msg), NullWritable.get());

                if(i==){

                    break;

                }

        }

    }

}

public static void main(String[] args) {

    Configuration config =new Configuration();

//    config.set("fs.defaultFS", "hdfs://HadoopMaster:9000");

//    config.set("yarn.resourcemanager.hostname", "HadoopMaster");

//    config.set("mapred.jar", "C:\\Users\\Administrator\\Desktop\\wc.jar");

//    config.set("mapreduce.input.keyvaluelinerecordreader.key.value.separator", ",");//默认分隔符是制表符"\t"，这里自定义，如","

    try {

        FileSystem fs =FileSystem.get(config);

        Job job =Job.getInstance(config);

        job.setJarByClass(RunJob.class);

        job.setJobName("weather");

        job.setMapperClass(WeatherMapper.class);

        job.setReducerClass(WeatherReducer.class);

        job.setMapOutputKeyClass(MyKey.class);

        job.setMapOutputValueClass(DoubleWritable.class);

        job.setPartitionerClass(MyPartitioner.class);

        job.setSortComparatorClass(MySort.class);

        job.setGroupingComparatorClass(MyGroup.class);

        job.setNumReduceTasks();

        job.setInputFormatClass(KeyValueTextInputFormat.class);

//    FileInputFormat.addInputPath(job, new Path("hdfs://HadoopMaster:9000/weather.txt"));//输入路径，下有weather.txt

//

//    Path outpath =new Path("hdfs://HadoopMaster:9000/out/weather");

        FileInputFormat.addInputPath(job, new Path("./data/weather.txt"));//输入路径，下有weather.txt

    Path outpath =new Path("./out/weather");

    if(fs.exists(outpath)){

        fs.delete(outpath, true);

    }

    FileOutputFormat.setOutputPath(job, outpath);

        boolean f= job.waitForCompletion(true);

        if(f){

        }

    } catch (Exception e) {

        e.printStackTrace();

    }

    }

}

欢迎大家，加入我的微信公众号：大数据躺过的坑

同时，大家可以关注我的个人博客：

http://www.cnblogs.com/zlslch/ 和 http://www.cnblogs.com/lchzls/

以及对应本平台的QQ群：161156071（大数据躺过的坑）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本3（九）的更多相关文章

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本2（十）
下面,是版本1. Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1(一) 这篇博文,包括了,实际生产开发非常重要的,单元测试和调试代码.这里不多赘述,直接送上代码. MRUni ...
Hadoop MapReduce编程 API入门系列之统计学生成绩版本2（十八）
不多说,直接上代码. 统计出每个年龄段的男.女学生的最高分这里,为了空格符的差错,直接,我们有时候,像如下这样的来排数据. 代码 package zhouls.bigdata.myMapRedu ...
Hadoop MapReduce编程 API入门系列之压缩和计数器（三十）
不多说,直接上代码. Hadoop MapReduce编程 API入门系列之小文件合并(二十九) 生成的结果,作为输入源. 代码 package zhouls.bigdata.myMapReduce. ...
Hadoop MapReduce编程 API入门系列之join（二十六）（未完）
不多说,直接上代码. 天气记录数据库 Station ID Timestamp Temperature 气象站数据库 Station ID Station Name 气象站和天气记录合并之后的示意图如 ...
Hadoop MapReduce编程 API入门系列之MapReduce多种输入格式（十七）
不多说,直接上代码. 代码 package zhouls.bigdata.myMapReduce.ScoreCount; import java.io.DataInput; import java.i ...
Hadoop MapReduce编程 API入门系列之自定义多种输入格式数据类型和排序多种输出格式（十一）
推荐 MapReduce分析明星微博数据 http://git.oschina.net/ljc520313/codeexample/tree/master/bigdata/hadoop/mapredu ...
Hadoop MapReduce编程 API入门系列之wordcount版本1（五）
这个很简单哈,编程的版本很多种. 代码版本1 package zhouls.bigdata.myMapReduce.wordcount5; import java.io.IOException; im ...
Hadoop MapReduce编程 API入门系列之薪水统计（三十一）
不多说,直接上代码. 代码 package zhouls.bigdata.myMapReduce.SalaryCount; import java.io.IOException; import jav ...
Hadoop MapReduce编程 API入门系列之Crime数据分析（二十五）（未完）
不多说,直接上代码. 一共12列,我们只需提取有用的列:第二列(犯罪类型).第四列(一周的哪一天).第五列(具体时间)和第七列(犯罪场所). 思路分析基于项目的需求,我们通过以下几步完成: 1.首先 ...

随机推荐

dubbo之多版本
当一个接口实现,出现不兼容升级时,可以用版本号过渡,版本号不同的服务相互间不引用. 可以按照以下的步骤进行版本迁移: 在低压力时间段,先升级一半提供者为新版本再将所有消费者升级为新版本然后将剩下的 ...
chrome设置以及hosts备份
最近重装完chrome总是忘记改了哪些设置,所以这里做一下备份. 有卡顿问题可以关闭GPU加速使用https的方式访问Google,Chrome下强制Google使用https的方法如下: 打开Ch ...
Generics of a Higher Kind
http://adriaanm.github.io/files/higher.pdf https://www.atlassian.com/blog/archives/scala-types-of-a- ...
Cache-Control 机制是为浏览器定制的？
Cache-Control 机制是为浏览器定制的?
C# for 遍历 IPagedList
IPagedList<Doc> ss = Doclist.ToPagedList(page, pageSize); ;i<ss.Count;i++) { var yy = ss[i] ...
jdk?jre?
很多人都搞不懂什么是jdk,什么是jre,只知道电脑安装了这两个就能开发和运行java程序,这里我简单讲讲什么是jdk,什么是jre. jdk,即Java Development Kit,故名思意就是 ...
[luogu2317 HNOI2005] 星际贸易 (dp)
传送门 Solution 两个dp分开处理, 第一问什么都不考虑直接dp 第二问还有些疑惑,姑且先留坑 Code //By Menteur_Hxy #include <cstdio> #i ...
ubuntu 配置lamp
官方配置网站:http://wiki.ubuntu.org.cn/LAMP_%E6%9C%8D%E5%8A%A1%E5%99%A8%E5%AE%89%E8%A3%85%E9%85%8D%E7%BD%A ...
datawhale爬虫实训4
DataWhale-Task4(爬取丁香园2) 任务:使用lxml爬虫帖子相关的回复与部分用户信息(用户名,头像地址,回复详情) 难点:需要登录才能看到所有回复浏览器登录上去,查看cookies信息 ...
Sigma Function 数学因子求和
Sigma function is an interesting function in Number Theory. It is denoted by the Greek letter Sigma ...

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本3（九）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本1（一）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本2（九）

Hadoop MapReduce编程 API入门系列之挖掘气象数据版本3（九）的更多相关文章

随机推荐

热门专题