map reduce相关程序

Test_1.java

/**

 * Hadoop网络课程模板程序

 * 编写者：James

 */  

import java.io.IOException;

import java.text.DateFormat;

import java.text.SimpleDateFormat;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.*;

import org.apache.hadoop.mapreduce.*;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

 * 无Reducer版本

 */

public class Test_1 extends Configured implements Tool {    

    /**

     * 计数器

     * 用于计数各种异常数据

     */

    enum Counter

    {

        LINESKIP,    //出错的行

    }

    /**

     * MAP任务

     */

    public static class Map extends Mapper<LongWritable, Text, NullWritable, Text>

    {

        public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException

        {

            String line = value.toString();                //读取源数据

            try

            {

                //数据处理

                String [] lineSplit = line.split(" ");

                String month = lineSplit[0];

                String time = lineSplit[1];

                String mac = lineSplit[6];

                Text out = new Text(month + ' ' + time + ' ' + mac);

                context.write( NullWritable.get(), out);    //输出

            }

            catch ( java.lang.ArrayIndexOutOfBoundsException e )

            {

                context.getCounter(Counter.LINESKIP).increment(1);    //出错令计数器+1

                return;

            }

        }

    }

    @Override

    public int run(String[] args) throws Exception

    {

        Configuration conf = getConf();

        Job job = new Job(conf, "Test_1");                                //任务名

        job.setJarByClass(Test_1.class);                                //指定Class

        FileInputFormat.addInputPath( job, new Path(args[0]) );            //输入路径

        FileOutputFormat.setOutputPath( job, new Path(args[1]) );        //输出路径

        job.setMapperClass( Map.class );                                //调用上面Map类作为Map任务代码

        job.setOutputFormatClass( TextOutputFormat.class );

        job.setOutputKeyClass( NullWritable.class );                    //指定输出的KEY的格式

        job.setOutputValueClass( Text.class );                            //指定输出的VALUE的格式

        job.waitForCompletion(true);

        //输出任务完成情况

        System.out.println( "任务名称：" + job.getJobName() );

        System.out.println( "任务成功：" + ( job.isSuccessful()?"是":"否" ) );

        System.out.println( "输入行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );

        System.out.println( "输出行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() );

        System.out.println( "跳过的行：" + job.getCounters().findCounter(Counter.LINESKIP).getValue() );

        return job.isSuccessful() ? 0 : 1;

    }

    /**

     * 设置系统说明

     * 设置MapReduce任务

     */

    public static void main(String[] args) throws Exception

    {

        //判断参数个数是否正确

        //如果无参数运行则显示以作程序说明

        if ( args.length != 2 )

        {

            System.err.println("");

            System.err.println("Usage: Test_1 < input path > < output path > ");

            System.err.println("Example: hadoop jar ~/Test_1.jar hdfs://localhost:9000/home/james/Test_1 hdfs://localhost:9000/home/james/output");

            System.err.println("Counter:");

            System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");

            System.exit(-1);

        }

        //记录开始时间

        DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );

        Date start = new Date();

        //运行任务

        int res = ToolRunner.run(new Configuration(), new Test_1(), args);

        //输出任务耗时

        Date end = new Date();

        float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;

        System.out.println( "任务开始：" + formatter.format(start) );

        System.out.println( "任务结束：" + formatter.format(end) );

        System.out.println( "任务耗时：" + String.valueOf( time ) + " 分钟" ); 

        System.exit(res);

    }

}

Test_1数据

Apr 23 11:49:54 hostapd: wlan0: STA 14:7d:c5:9e:fb:84

Apr 23 11:49:52 hostapd: wlan0: STA 74:e5:0b:04:28:f2

Apr 23 11:49:50 hostapd: wlan0: STA cc:af:78:cc:d5:5d

Apr 23 11:49:44 hostapd: wlan0: STA cc:af:78:cc:d5:5d

Apr 23 11:49:43 hostapd: wlan0: STA 74:e5:0b:04:28:f2

Apr 23 11:49:42 hostapd: wlan0: STA 14:7d:c5:9e:fb:84

Test_2.java

/**

 * Hadoop网络课程模板程序

 * 编写者：James

 */  

import java.io.IOException;

import java.text.DateFormat;

import java.text.SimpleDateFormat;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.*;

import org.apache.hadoop.mapreduce.*;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

 * 有Reducer版本

 */

public class Test_2 extends Configured implements Tool {    

    /**

     * 计数器

     * 用于计数各种异常数据

     */

    enum Counter

    {

        LINESKIP,    //出错的行

    }

    /**

     * MAP任务

     */

    public static class Map extends Mapper<LongWritable, Text, Text, Text>

    {

        public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException

        {

            String line = value.toString();                //读取源数据

            try

            {

                //数据处理

                String [] lineSplit = line.split(" ");

                String anum = lineSplit[0];

                String bnum = lineSplit[1];

                context.write( new Text(bnum), new Text(anum) );    //输出

            }

            catch ( java.lang.ArrayIndexOutOfBoundsException e )

            {

                context.getCounter(Counter.LINESKIP).increment(1);    //出错令计数器+1

                return;

            }

        }

    }

    /**

     * REDUCE任务

     */

    public static class Reduce extends Reducer<Text, Text, Text, Text>

    {

        public void reduce ( Text key, Iterable<Text> values, Context context ) throws IOException, InterruptedException

        {

            String valueString;

            String out = "";

            for ( Text value : values )

            {

                valueString = value.toString();

                out += valueString + "|";

            }

            context.write( key, new Text(out) );

        }

    }

    @Override

    public int run(String[] args) throws Exception

    {

        Configuration conf = getConf();

        Job job = new Job(conf, "Test_2");                                //任务名

        job.setJarByClass(Test_2.class);                                //指定Class

        FileInputFormat.addInputPath( job, new Path(args[0]) );            //输入路径

        FileOutputFormat.setOutputPath( job, new Path(args[1]) );        //输出路径

        job.setMapperClass( Map.class );                                //调用上面Map类作为Map任务代码

        job.setReducerClass ( Reduce.class );                            //调用上面Reduce类作为Reduce任务代码

        job.setOutputFormatClass( TextOutputFormat.class );

        job.setOutputKeyClass( Text.class );                            //指定输出的KEY的格式

        job.setOutputValueClass( Text.class );                            //指定输出的VALUE的格式

        job.waitForCompletion(true);

        //输出任务完成情况

        System.out.println( "任务名称：" + job.getJobName() );

        System.out.println( "任务成功：" + ( job.isSuccessful()?"是":"否" ) );

        System.out.println( "输入行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );

        System.out.println( "输出行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() );

        System.out.println( "跳过的行：" + job.getCounters().findCounter(Counter.LINESKIP).getValue() );

        return job.isSuccessful() ? 0 : 1;

    }

    /**

     * 设置系统说明

     * 设置MapReduce任务

     */

    public static void main(String[] args) throws Exception

    {

        //判断参数个数是否正确

        //如果无参数运行则显示以作程序说明

        if ( args.length != 2 )

        {

            System.err.println("");

            System.err.println("Usage: Test_2 < input path > < output path > ");

            System.err.println("Example: hadoop jar ~/Test_2.jar hdfs://localhost:9000/home/james/Test_2 hdfs://localhost:9000/home/james/output");

            System.err.println("Counter:");

            System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");

            System.exit(-1);

        }

        //记录开始时间

        DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );

        Date start = new Date();

        //运行任务

        int res = ToolRunner.run(new Configuration(), new Test_2(), args);

        //输出任务耗时

        Date end = new Date();

        float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;

        System.out.println( "任务开始：" + formatter.format(start) );

        System.out.println( "任务结束：" + formatter.format(end) );

        System.out.println( "任务耗时：" + String.valueOf( time ) + " 分钟" ); 

        System.exit(res);

    }

}

Test_2数据

13599999999 10086

13899999999    120

13944444444 13800138000

13722222222 13800138000

18800000000 120

13722222222 10086

18944444444 10086

Exercise_1.java

/**

 * Hadoop网络课程作业程序

 * 编写者：James

 */  

import java.io.IOException;

import java.text.DateFormat;

import java.text.SimpleDateFormat;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.*;

import org.apache.hadoop.mapreduce.*;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class Exercise_1 extends Configured implements Tool {    

    /**

     * 计数器

     * 用于计数各种异常数据

     */

    enum Counter

    {

        LINESKIP,    //出错的行

    }

    /**

     * MAP任务

     */

    public static class Map extends Mapper<LongWritable, Text, NullWritable, Text>

    {

        public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException

        {

            String line = value.toString();                //读取源数据

            try

            {

                //数据处理

                String [] lineSplit = line.split(" ");

                String month = lineSplit[0];

                String time = lineSplit[1];

                String mac = lineSplit[6];

                /**  需要注意的部分       **/ 

                String name = context.getConfiguration().get("name");

                Text out = new Text(name + ' ' + month + ' ' + time + ' ' + mac);

                /**  需要注意的部分       **/ 

                context.write( NullWritable.get(), out);    //输出

            }

            catch ( java.lang.ArrayIndexOutOfBoundsException e )

            {

                context.getCounter(Counter.LINESKIP).increment(1);    //出错令计数器+1

                return;

            }

        }

    }

    @Override

    public int run(String[] args) throws Exception

    {

        Configuration conf = getConf();

        /**  需要注意的部分       **/ 

        conf.set("name", args[2]);

        /**  需要注意的部分       **/ 

        Job job = new Job(conf, "Exercise_1");                            //任务名

        job.setJarByClass(Exercise_1.class);                            //指定Class

        FileInputFormat.addInputPath( job, new Path(args[0]) );            //输入路径

        FileOutputFormat.setOutputPath( job, new Path(args[1]) );        //输出路径

        job.setMapperClass( Map.class );                                //调用上面Map类作为Map任务代码

        job.setOutputFormatClass( TextOutputFormat.class );

        job.setOutputKeyClass( NullWritable.class );                    //指定输出的KEY的格式

        job.setOutputValueClass( Text.class );                            //指定输出的VALUE的格式

        job.waitForCompletion(true);

        //输出任务完成情况

        System.out.println( "任务名称：" + job.getJobName() );

        System.out.println( "任务成功：" + ( job.isSuccessful()?"是":"否" ) );

        System.out.println( "输入行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );

        System.out.println( "输出行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() );

        System.out.println( "跳过的行：" + job.getCounters().findCounter(Counter.LINESKIP).getValue() );

        return job.isSuccessful() ? 0 : 1;

    }

    /**

     * 设置系统说明

     * 设置MapReduce任务

     */

    public static void main(String[] args) throws Exception

    {

        //判断参数个数是否正确

        //如果无参数运行则显示以作程序说明

        if ( args.length != 3 )

        {

            System.err.println("");

            System.err.println("Usage: Test_1 < input path > < output path > < name >");

            System.err.println("Example: hadoop jar ~/Test_1.jar hdfs://localhost:9000/home/james/Test_1 
hdfs://localhost:9000/home/james/output hadoop");

            System.err.println("Counter:");

            System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");

            System.exit(-1);

        }

        //记录开始时间

        DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );

        Date start = new Date();

        //运行任务

        int res = ToolRunner.run(new Configuration(), new Exercise_1(), args);

        //输出任务耗时

        Date end = new Date();

        float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;

        System.out.println( "任务开始：" + formatter.format(start) );

        System.out.println( "任务结束：" + formatter.format(end) );

        System.out.println( "任务耗时：" + String.valueOf( time ) + " 分钟" ); 

        System.exit(res);

    }

}

result_1

hadoop Apr 23 14:7d:c5:9e:fb:84

hadoop Apr 23 74:e5:0b:04:28:f2

hadoop Apr 23 cc:af:78:cc:d5:5d

hadoop Apr 23 cc:af:78:cc:d5:5d

hadoop Apr 23 74:e5:0b:04:28:f2

hadoop Apr 23 14:7d:c5:9e:fb:84

Exercise_2.java

/**

 * Hadoop网络课程作业程序

 * 编写者：James

 */  

import java.io.IOException;

import java.text.DateFormat;

import java.text.SimpleDateFormat;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.*;

import org.apache.hadoop.mapreduce.*;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

public class Exercise_2 extends Configured implements Tool {    

    /**

     * 计数器

     * 用于计数各种异常数据

     */

    enum Counter

    {

        LINESKIP,    //出错的行

    }

    /**

     * MAP任务

     */

    public static class Map extends Mapper<LongWritable, Text, NullWritable, Text>

    {

        /**  需要注意的部分       **/

        private String name;

        public void setup ( Context context )

        {

            this.name = context.getConfiguration().get("name");                    //读取名字

        }

        /**  需要注意的部分       **/

        public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException

        {

            String line = value.toString();                //读取源数据

            try

            {

                //数据处理

                String [] lineSplit = line.split(" ");

                String month = lineSplit[0];

                String time = lineSplit[1];

                String mac = lineSplit[6];

                /**  需要注意的部分       **/ 

                Text out = new Text(this.name + ' ' + month + ' ' + time + ' ' + mac);

                /**  需要注意的部分       **/ 

                context.write( NullWritable.get(), out);    //输出

            }

            catch ( java.lang.ArrayIndexOutOfBoundsException e )

            {

                context.getCounter(Counter.LINESKIP).increment(1);    //出错令计数器+1

                return;

            }

        }

    }

    @Override

    public int run(String[] args) throws Exception

    {

        Configuration conf = getConf();

        /**  需要注意的部分       **/ 

        conf.set("name", args[2]);

        /**  需要注意的部分       **/ 

        Job job = new Job(conf, "Exercise_2");                            //任务名

        job.setJarByClass(Exercise_2.class);                            //指定Class

        FileInputFormat.addInputPath( job, new Path(args[0]) );            //输入路径

        FileOutputFormat.setOutputPath( job, new Path(args[1]) );        //输出路径

        job.setMapperClass( Map.class );                                //调用上面Map类作为Map任务代码

        job.setOutputFormatClass( TextOutputFormat.class );

        job.setOutputKeyClass( NullWritable.class );                    //指定输出的KEY的格式

        job.setOutputValueClass( Text.class );                            //指定输出的VALUE的格式

        job.waitForCompletion(true);

        //输出任务完成情况

        System.out.println( "任务名称：" + job.getJobName() );

        System.out.println( "任务成功：" + ( job.isSuccessful()?"是":"否" ) );

        System.out.println( "输入行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );

        System.out.println( "输出行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() );

        System.out.println( "跳过的行：" + job.getCounters().findCounter(Counter.LINESKIP).getValue() );

        return job.isSuccessful() ? 0 : 1;

    }

    /**

     * 设置系统说明

     * 设置MapReduce任务

     */

    public static void main(String[] args) throws Exception

    {

        //判断参数个数是否正确

        //如果无参数运行则显示以作程序说明

        if ( args.length != 3 )

        {

            System.err.println("");

            System.err.println("Usage: Test_1 < input path > < output path > < name >");

            System.err.println("Example: hadoop jar ~/Test_1.jar hdfs://localhost:9000/home/james/Test_1 
hdfs://localhost:9000/home/james/output hadoop");

            System.err.println("Counter:");

            System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");

            System.exit(-1);

        }

        //记录开始时间

        DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );

        Date start = new Date();

        //运行任务

        int res = ToolRunner.run(new Configuration(), new Exercise_2(), args);

        //输出任务耗时

        Date end = new Date();

        float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;

        System.out.println( "任务开始：" + formatter.format(start) );

        System.out.println( "任务结束：" + formatter.format(end) );

        System.out.println( "任务耗时：" + String.valueOf( time ) + " 分钟" ); 

        System.exit(res);

    }

}

改写test_2

/**

 * Hadoop网络课程模板程序

 * 编写者：James

 */  

import java.io.IOException;

import java.text.DateFormat;

import java.text.SimpleDateFormat;

import java.util.Date;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.conf.Configured;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.*;

import org.apache.hadoop.mapreduce.*;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;

import org.apache.hadoop.util.Tool;

import org.apache.hadoop.util.ToolRunner;

/**

 * 有Reducer版本

 */

public class Test_2 extends Configured implements Tool {    

    /**

     * 计数器

     * 用于计数各种异常数据

     */

    enum Counter

    {

        LINESKIP,    //出错的行

    }

    /**

     * MAP任务

     */

    public static class Map extends Mapper<LongWritable, Text, Text, Text>

    {

        public void map ( LongWritable key, Text value, Context context ) throws IOException, InterruptedException

        {

            String line = value.toString();                //读取源数据

            try

            {

                //数据处理

                String [] lineSplit = line.split(" ");

                String anum = lineSplit[0];

                String bnum = lineSplit[1];

                context.write( new Text(bnum), new Text(anum) );    //输出

            }

            catch ( java.lang.ArrayIndexOutOfBoundsException e )

            {

                context.getCounter(Counter.LINESKIP).increment(1);    //出错令计数器+1

                return;

            }

        }

    }

    /**

     * REDUCE任务

     */

    public static class Reduce extends Reducer<Text, Text, Text, Text>

    {

        public void reduce ( Text key, Iterable<Text> values, Context context ) throws IOException, InterruptedException

        {

            String valueString;

            String out = "";

            String name = context.getConfiguration().get("name");

            for ( Text value : values )

            {

                valueString = value.toString();

                out += valueString + "|";

            }

            context.write( key, new Text(out) + "|" + name );

        }

    }

    @Override

    public int run(String[] args) throws Exception

    {

        Configuration conf = getConf();

        conf.set("name", args[2]);

        Job job = new Job(conf, "Test_2");                                //任务名

        job.setJarByClass(Test_2.class);                                //指定Class

        FileInputFormat.addInputPath( job, new Path(args[0]) );            //输入路径

        FileOutputFormat.setOutputPath( job, new Path(args[1]) );        //输出路径

        job.setMapperClass( Map.class );                                //调用上面Map类作为Map任务代码

        job.setReducerClass ( Reduce.class );                            //调用上面Reduce类作为Reduce任务代码

        job.setOutputFormatClass( TextOutputFormat.class );

        job.setOutputKeyClass( Text.class );                            //指定输出的KEY的格式

        job.setOutputValueClass( Text.class );                            //指定输出的VALUE的格式

        job.waitForCompletion(true);

        //输出任务完成情况

        System.out.println( "任务名称：" + job.getJobName() );

        System.out.println( "任务成功：" + ( job.isSuccessful()?"是":"否" ) );

        System.out.println( "输入行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_INPUT_RECORDS").getValue() );

        System.out.println( "输出行数：" + job.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "MAP_OUTPUT_RECORDS").getValue() );

        System.out.println( "跳过的行：" + job.getCounters().findCounter(Counter.LINESKIP).getValue() );

        return job.isSuccessful() ? 0 : 1;

    }

    /**

     * 设置系统说明

     * 设置MapReduce任务

     */

    public static void main(String[] args) throws Exception

    {

        //判断参数个数是否正确

        //如果无参数运行则显示以作程序说明

        if ( args.length != 3 )

        {

            System.err.println("");

            System.err.println("Usage: Test_2 < input path > < output path > ");

            System.err.println("Example: hadoop jar ~/Test_2.jar hdfs://localhost:9000/home/james/Test_2 hdfs://localhost:9000/home/james/output hadoop");

            System.err.println("Counter:");

            System.err.println("\t"+"LINESKIP"+"\t"+"Lines which are too short");

            System.exit(-1);

        }

        //记录开始时间

        DateFormat formatter = new SimpleDateFormat( "yyyy-MM-dd HH:mm:ss" );

        Date start = new Date();

        //运行任务

        int res = ToolRunner.run(new Configuration(), new Test_2(), args);

        //输出任务耗时

        Date end = new Date();

        float time =  (float) (( end.getTime() - start.getTime() ) / 60000.0) ;

        System.out.println( "任务开始：" + formatter.format(start) );

        System.out.println( "任务结束：" + formatter.format(end) );

        System.out.println( "任务耗时：" + String.valueOf( time ) + " 分钟" ); 

        System.exit(res);

    }

}

result_2

10086    13599999999|13722222222|18944444444|hadoop

120    18800000000|hadoop

13800138000    13944444444|13722222222|hadoop

map reduce相关程序的更多相关文章

Hadoop Map/Reduce 示例程序WordCount
#进入hadoop安装目录 cd /usr/local/hadoop #创建示例文件:input #在里面输入以下内容: #Hello world, Bye world! vim input #在hd ...
Hadoop Map/Reduce教程
原文地址:http://hadoop.apache.org/docs/r1.0.4/cn/mapred_tutorial.html 目的先决条件概述输入与输出例子:WordCount v1.0 ...
Map/Reduce应用开发基础知识-摘录
Map/Reduce 这部分文档为用户将会面临的Map/Reduce框架中的各个环节提供了适当的细节.这应该会帮助用户更细粒度地去实现.配置和调优作业.然而,请注意每个类/接口的javadoc文档提供 ...
一步一步跟我学习hadoop(5)----hadoop Map/Reduce教程（2）
Map/Reduce用户界面本节为用户採用框架要面对的各个环节提供了具体的描写叙述,旨在与帮助用户对实现.配置和调优进行具体的设置.然而,开发时候还是要相应着API进行相关操作. 首先我们须要了解M ...
hadoop入门级总结二：Map/Reduce
在上一篇博客:hadoop入门级总结一:HDFS中,简单的介绍了hadoop分布式文件系统HDFS的整体框架及文件写入读出机制.接下来,简要的总结一下hadoop的另外一大关键技术之一分布式计算框架: ...
马士兵hadoop第四课：Yarn和Map/Reduce配置启动和原理讲解
马士兵hadoop第一课:虚拟机搭建和安装hadoop及启动马士兵hadoop第二课:hdfs集群集中管理和hadoop文件操作马士兵hadoop第三课:java开发hdfs 马士兵hadoop第 ...
马士兵hadoop第四课：Yarn和Map/Reduce配置启动和原理讲解(转)
马士兵hadoop第一课:虚拟机搭建和安装hadoop及启动马士兵hadoop第二课:hdfs集群集中管理和hadoop文件操作马士兵hadoop第三课:java开发hdfs 马士兵hadoop第 ...
Hadoop学习笔记2 - 第一和第二个Map Reduce程序
转载请标注原链接http://www.cnblogs.com/xczyd/p/8608906.html 在Hdfs学习笔记1 - 使用Java API访问远程hdfs集群中,我们已经可以完成了访问hd ...
map reduce程序示例
map reduce程序示例 package test2; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop. ...

随机推荐

C++ Explicit Constructors(显式构造函数)
C++ 为类(Class)提供了许多默认函数.如果自己没有申明,编译器会为我们提供一个copy构造函数.一个copy assignment操作符和一个析构函数.此外,如果没有申明任何构造函数,编译器会 ...
搭建个人博客-hexo+github
自己也算是摸爬滚打搭建成功,然后自己再重新安装部署一遍,把完整步骤分享给大家,同时最后有一些连接,如果我的步骤不行,大家可以参考其他人的(这个有点花费时间,大家提前有个心理准备 - _-) 一.第一步 ...
Dropwizard框架入门
最近项目用到了Dropwizard框架,个人感觉还不错,那么这里就从他们官网入手,然后加上自己的实现步骤让大家初步了解这个框架. 官网对DW(Dropwizard)的定义是跨越了一个库和框架之间的界限 ...
GO语言基础条件、跳转、Array和Slice
1. 判断语句if 1. 条件表达式没有括号(这点其他语言转过来的需要注意) 2. 支持一个初始化表达式(可以是并行方式,即:a, b, c := 1, 2, 3) 3. 左大括号必须和条件语句或 e ...
Cognos与Firefox的那些事
最近怀着一颗好奇的心装了Win10系统,作为一个Coder,或多或少的这么久以来对于它的兼容性还是秉着一颗质疑的态度.但是一切事情都要敢于尝试,毕竟Win10的用户体验还是很好的.和预料的一样,问题马 ...
Escape字符总结
有如下的 escape字符. 对于十进制来说,\后面只涵盖3个字符,比如\1234,是\123和字符4. 但是对于十六进制,后面会涵盖四个字符,比如\x1234,后面的四个字符都在\的涵盖范围内.
【Nodejs】理想论坛帖子下载爬虫1.04
一直想做一个能把理想论坛指定页范围的帖子都能完整下载下来的爬虫,但未能如愿. 主要的障碍在并发数的控制和长时间任务的突然退出,比如想下载前五页的帖子,分析后可得到大约15000个主贴或子贴,如果用回调 ...
大型应用的javascript架构
来源:http://blog.leezhong.com/tech/2010/11/29/javascript-arch.html 目前很多网站基本没有明确的前端架构,大多是服务端渲染视图页,输出到浏览 ...
HBase数据迁移至Hive
背景:需要将HBase中表xyz(列簇cf1,列val)迁移至Hive 1. 建立Hive和HBase的映射关系 1.1 运行hive shell进入hive命令行模式,运行如下脚本 CREA ...
linux 用户管理，用户权限管理，用户组管理
linux 用户管理,用户权限管理,用户组管理一:ls -l 命令解释第个d表示是目录,如果是文件是-,如果是连接是l 第2到4个 rwx 表示创建者的操作权限 r 读,w 写,x 执行第5到 ...

map reduce相关程序

map reduce相关程序的更多相关文章

随机推荐

热门专题