mutilple output reduce cannot write

package org.lukey.hadoop.classifyBayes;

import java.io.BufferedReader;

import java.io.IOException;

import java.io.InputStreamReader;

import java.net.URI;

import org.apache.hadoop.conf.Configuration;

import org.apache.hadoop.fs.FSDataInputStream;

import org.apache.hadoop.fs.FileSystem;

import org.apache.hadoop.fs.Path;

import org.apache.hadoop.io.DoubleWritable;

import org.apache.hadoop.io.IntWritable;

import org.apache.hadoop.io.LongWritable;

import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;

import org.apache.hadoop.mapreduce.Mapper;

import org.apache.hadoop.mapreduce.Reducer;

import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;

import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;

public class Probability {

    // Client

    public static void main(String[] args) throws Exception {

        Configuration conf = new Configuration();

        //读取单词总数，设置到congfiguration中

        String totalWordsPath = "/user/hadoop/output/totalwords.txt";

        FileSystem fs = FileSystem.get(URI.create(totalWordsPath), conf);

        FSDataInputStream inputStream = fs.open(new Path(totalWordsPath));

        BufferedReader buffer = new BufferedReader(new InputStreamReader(inputStream));

        String strLine = buffer.readLine();

        String[] temp = strLine.split(":");

        if(temp.length == 2){

            //temp[0] = TOTALWORDS

            conf.setInt(temp[0], Integer.parseInt(temp[1]));

        }

        /*

        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

        if (otherArgs.length != 2) {

            System.out.println("Usage <in> <out>");

            System.exit(-1);

        }

*/

        Job job = new Job(conf, "file count");

        job.setJarByClass(Probability.class);

        job.setMapperClass(WordsOfClassCountMapper.class);

        job.setReducerClass(WordsOfClassCountReducer.class);

        String input = "/user/hadoop/mid/wordsFrequence";

        String output = "/user/hadoop/output/probability/";

        FileInputFormat.addInputPath(job, new Path(input));

        FileOutputFormat.setOutputPath(job, new Path(output));

        job.setOutputKeyClass(Text.class);

        job.setOutputValueClass(IntWritable.class);

        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

    private static MultipleOutputs<Text, IntWritable> mos;

    // Mapper

    static class WordsOfClassCountMapper extends Mapper<LongWritable, Text, Text, IntWritable> {

        private  static IntWritable number = new IntWritable();

        @Override

        protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context)

                throws IOException, InterruptedException {

            String[] temp = value.toString().split("\t");

            if(temp.length == 3){

                // 文件夹名类别名

                String dirName = temp[0];

                value.set(temp[1]);

                number.set(Integer.parseInt(temp[2]));

                mos.write(value, number, dirName);

            }

        }

        @Override

        protected void cleanup(Mapper<LongWritable, Text, Text, IntWritable>.Context context)

                throws IOException, InterruptedException {

            // TODO Auto-generated method stub

            mos.close();

        }

        @Override

        protected void setup(Mapper<LongWritable, Text, Text, IntWritable>.Context context)

                throws IOException, InterruptedException {

            // TODO Auto-generated method stub

            mos = new MultipleOutputs<Text, IntWritable>(context);

        }

    }

    // Reducer

    static class WordsOfClassCountReducer extends Reducer<Text, IntWritable, Text, DoubleWritable> {

        // result 表示每个文件里面单词个数

        DoubleWritable result = new DoubleWritable(3);

        Configuration conf = new Configuration();

        int total = conf.getInt("TOTALWORDS", 1);

        @Override

        protected void reduce(Text key, Iterable<IntWritable> values,

                Reducer<Text, IntWritable, Text, DoubleWritable>.Context context)

                        throws IOException, InterruptedException {

            // TODO Auto-generated method stub

//            double sum = 0;

//            for (IntWritable value : values) {

//                sum += value.get();

//            }

//            result.set(sum);

            context.write(key, result);

        }

    }

}

mutilple output reduce cannot write的更多相关文章

2019.12.05【ABAP随笔】分组循环(LOOP AT Group) / REDUCE
ABAP 7.40新语法 LOOP AT Group 和 REDUCE *LOOP AT itab result [cond] GROUP BY key ( key1 = dobj1 key2 = d ...
Hadoop基础概念介绍
基于YARN的配置信息, 参见: http://www.ibm.com/developerworks/cn/opensource/os-cn-hadoop-yarn/ hadoop入门 - 基础概念 ...
MapReduce执行流程及程序编写
MapReduce 一种分布式计算模型,解决海量数据的计算问题,MapReduce将计算过程抽象成两个函数 Map(映射):对一些独立元素(拆分后的小块)组成的列表的每一个元素进行指定的操作,可以高度 ...
（3）Deep Learning之神经网络和反向传播算法
往期回顾在上一篇文章中,我们已经掌握了机器学习的基本套路,对模型.目标函数.优化算法这些概念有了一定程度的理解,而且已经会训练单个的感知器或者线性单元了.在这篇文章中,我们将把这些单独的单元按照一定 ...
javaScript系列 [09]-javaScript和JSON (拓展)
本文输出JSON搜索和JSON转换相关的内容,是对前两篇文章的补充. JSON搜索在特定的开发场景中,如果服务器端返回的JSON数据异常复杂(可能超过上万行),那么必然就有对JSON文档进行搜索的需 ...
Hadoop源码分析（mapreduce.lib.partition/reduce/output）
Map的结果,会通过partition分发到Reducer上.Reducer做完Reduce操作后,通过OutputFormat,进行输出.以下我们就来分析參与这个过程的类. Mapper的结果, ...
MapReduce剖析笔记之七：Child子进程处理Map和Reduce任务的主要流程
在上一节我们分析了TaskTracker如何对JobTracker分配过来的任务进行初始化,并创建各类JVM启动所需的信息,最终创建JVM的整个过程,本节我们继续来看,JVM启动后,执行的是Child ...
MapReduce剖析笔记之三：Job的Map/Reduce Task初始化
上一节分析了Job由JobClient提交到JobTracker的流程,利用RPC机制,JobTracker接收到Job ID和Job所在HDFS的目录,够早了JobInProgress对象,丢入队列 ...
【hadoop】如何向map和reduce脚本传递参数,加载文件和目录
本文主要讲解三个问题: 1 使用Java编写MapReduce程序时,如何向map.reduce函数传递参数. 2 使用Streaming编写MapReduce程序(C/C++ ...

随机推荐

c语言-三字符组
C 源程序源字符集在 7 位 ASCII 字符集中包含,但设置为 ISO 646-1983 固定的代码的超集. 三字符序列允许 C 程序编写使用 " 仅 ISO (国际标准组织的固定的代码. ...
Linux Shell : Test命令参数解析
格式: test conditions test -n string : string 不为空 test -z string : string 为空 test int1 -eq int2 : int ...
allegro 导Gerber文件
今天抽空好好整理了一下有关Allegro出Gerber文件文档,此文档在网上搜到的基础上进一步完善,把每个需要注意的地方都用红色字体框出 http://files.cnblogs.com/files/ ...
利用朴素贝叶斯算法进行分类-Java代码实现
http://www.crocro.cn/post/286.html 利用朴素贝叶斯算法进行分类-Java代码实现鳄鱼 3个月前 (12-14) 分类:机器学习阅读(44) 评论(0) ...
why TCP guarentee delivery?
Simple idea: just use a TIMEOUT, if no answer after a certain seconds, just re-deliver!
maven 国内镜像地址
由于连接国外网站时网速特慢,为解决这个问题,os china 建立了一个maven 的私服.为了记忆,特将此记录. settings.xml 设置镜像方法步骤如下: 1. mirrors 设置 < ...
MongoDB用户
MongoDB 增加用户删除用户修改用户读写权限只读权限, MongoDB用户权限分配的操作是针对某个库来说的.--这句话很重要. 1. 进入ljc 数据库: use ...
Java-List泛型的用处(能够使用传入泛型对象的方法)
List<PageData> varList = setMealService.list(page); for(int i = 0;i < varList.size(); i++){ ...
linux 文件系统操作（）
1. 用Xshell 客户端连上远程主机. 2.ll 或 ls 查看当前目录下的文件或目录, cd / 切换到根目录, cd **切换到某个目录(或者叫进入某个文件夹) 3.文件的压缩命令:zip - ...
mybatis 的一点问题
写法1: public User queryUserByUsername(String username); 写法2: public User queryUserByUsername(@Par ...

mutilple output reduce cannot write

mutilple output reduce cannot write的更多相关文章

随机推荐

热门专题