Hadoop下大矩阵乘法Version2
1)使用本方法计算F*B,其中F是1000*1000的矩阵,B是1000*20000的矩阵,使用三个节点的集群,每个节点一个CPU核(集群装在虚拟机里,
宿主机只有4个CPU核),每个节点配置一个map槽,一个reduce槽,完成矩阵运算时间为5mins。
2)源码如下:
1 /**
2 * Created with IntelliJ IDEA.
3 * User: hadoop
4 * Date: 16-3-14
5 * Time: 下午3:13
6 * To change this template use File | Settings | File Templates.
7 */
8 import org.apache.hadoop.conf.Configuration;
9 import org.apache.hadoop.fs.FileSystem;
10 import java.io.IOException;
11 import java.lang.reflect.Array;
12 import java.net.URI;
13 import org.apache.hadoop.fs.Path;
14 import org.apache.hadoop.io.*;
15 import org.apache.hadoop.io.DoubleWritable;
16 import org.apache.hadoop.io.Writable;
17 import org.apache.hadoop.mapreduce.InputSplit;
18 import org.apache.hadoop.mapreduce.Job;
19 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
20 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
21 import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
22 import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
23 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
24 import org.apache.hadoop.mapreduce.Reducer;
25 import org.apache.hadoop.mapreduce.Mapper;
26 import org.apache.hadoop.filecache.DistributedCache;
27 import org.apache.hadoop.util.ReflectionUtils;
28
29 public class MutiDoubleInputMatrixProduct {
30
31 public static void initDoubleArrayWritable(int length,DoubleWritable[] doubleArrayWritable){
32 for (int i=0;i<length;++i){
33 doubleArrayWritable[i]=new DoubleWritable(0.0);
34 }
35 }
36
37 public static class MyMapper extends Mapper<IntWritable,DoubleArrayWritable,IntWritable,DoubleArrayWritable>{
38 public DoubleArrayWritable map_value=new DoubleArrayWritable();
39 public double[][] leftMatrix=null;/******************************************/
40 //public Object obValue=null;
41 public DoubleWritable[] arraySum=null;
42 public DoubleWritable[] tempColumnArrayDoubleWritable=null;
43 public DoubleWritable[] tempRowArrayDoubleWritable=null;
44 public double sum=0;
45 public double uValue;
46 public int leftMatrixRowNum;
47 public int leftMatrixColumnNum;
48 public void setup(Context context) throws IOException {
49 Configuration conf=context.getConfiguration();
50 leftMatrixRowNum=conf.getInt("leftMatrixRowNum",10);
51 leftMatrixColumnNum=conf.getInt("leftMatrixColumnNum",10);
52 leftMatrix=new double[leftMatrixRowNum][leftMatrixColumnNum];
53 uValue=(double)(context.getConfiguration().getFloat("u",1.0f));
54 tempRowArrayDoubleWritable=new DoubleWritable[leftMatrixColumnNum];
55 initDoubleArrayWritable(leftMatrixColumnNum,tempRowArrayDoubleWritable);
56 tempColumnArrayDoubleWritable=new DoubleWritable[leftMatrixRowNum];
57 initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);
58 System.out.println("map setup() start!");
59 //URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());
60 Path[] cacheFiles=DistributedCache.getLocalCacheFiles(conf);
61 String localCacheFile="file://"+cacheFiles[0].toString();
62 //URI[] cacheFiles=DistributedCache.getCacheFiles(conf);
63 //DistributedCache.
64 System.out.println("local path is:"+cacheFiles[0].toString());
65 // URI[] cacheFiles=DistributedCache.getCacheFiles(context.getConfiguration());
66 FileSystem fs =FileSystem.get(URI.create(localCacheFile), conf);
67 SequenceFile.Reader reader=null;
68 reader=new SequenceFile.Reader(fs,new Path(localCacheFile),conf);
69 IntWritable key= (IntWritable)ReflectionUtils.newInstance(reader.getKeyClass(),conf);
70 DoubleArrayWritable value= (DoubleArrayWritable)ReflectionUtils.newInstance(reader.getValueClass(),conf);
71 //int valueLength=0;
72 int rowIndex=0;
73 int index;
74 while (reader.next(key,value)){
75 index=-1;
76 for (Writable val:value.get()){
77 tempRowArrayDoubleWritable[++index].set(((DoubleWritable)val).get());
78 }
79 //obValue=value.toArray();
80 rowIndex=key.get();
81 leftMatrix[rowIndex]=new double[leftMatrixColumnNum];
82 //this.leftMatrix=new double[valueLength][Integer.parseInt(context.getConfiguration().get("leftMatrixColumnNum"))];
83 for (int i=0;i<leftMatrixColumnNum;++i){
84 //leftMatrix[rowIndex][i]=Double.parseDouble(Array.get(obValue, i).toString());
85 //leftMatrix[rowIndex][i]=Array.getDouble(obValue, i);
86 leftMatrix[rowIndex][i]= tempRowArrayDoubleWritable[i].get();
87 }
88
89 }
90 arraySum=new DoubleWritable[leftMatrix.length];
91 initDoubleArrayWritable(leftMatrix.length,arraySum);
92 }
93 public void map(IntWritable key,DoubleArrayWritable value,Context context) throws IOException, InterruptedException {
94 //obValue=value.toArray();
95 InputSplit inputSplit=context.getInputSplit();
96 String fileName=((FileSplit)inputSplit).getPath().getName();
97 if (fileName.startsWith("FB")) {
98 context.write(key,value);
99 }
100 else{
101 int ii=-1;
102 for(Writable val:value.get()){
103 tempColumnArrayDoubleWritable[++ii].set(((DoubleWritable)val).get());
104 }
105 //arraySum=new DoubleWritable[this.leftMatrix.length];
106 for (int i=0;i<this.leftMatrix.length;++i){
107 sum=0;
108 for (int j=0;j<this.leftMatrix[0].length;++j){
109 //sum+= this.leftMatrix[i][j]*Double.parseDouble(Array.get(obValue,j).toString())*(double)(context.getConfiguration().getFloat("u",1f));
110 //sum+= this.leftMatrix[i][j]*Array.getDouble(obValue,j)*uValue;
111 sum+= this.leftMatrix[i][j]*tempColumnArrayDoubleWritable[j].get()*uValue;
112 }
113 arraySum[i].set(sum);
114 //arraySum[i].set(sum);
115 }
116 map_value.set(arraySum);
117 context.write(key,map_value);
118 }
119 }
120 }
121 public static class MyReducer extends Reducer<IntWritable,DoubleArrayWritable,IntWritable,DoubleArrayWritable>{
122 public DoubleWritable[] sum=null;
123 // public Object obValue=null;
124 public DoubleArrayWritable valueArrayWritable=new DoubleArrayWritable();
125 public DoubleWritable[] tempColumnArrayDoubleWritable=null;
126 // public DoubleWritable[] tempRowArrayDoubleWritable=null;
127 //private int leftMatrixColumnNum;
128 private int leftMatrixRowNum;
129
130 public void setup(Context context){
131 //leftMatrixColumnNum=context.getConfiguration().getInt("leftMatrixColumnNum",100);
132 leftMatrixRowNum=context.getConfiguration().getInt("leftMatrixRowNum",100);
133 sum=new DoubleWritable[leftMatrixRowNum];
134 initDoubleArrayWritable(leftMatrixRowNum,sum);
135 //tempRowArrayDoubleWritable=new DoubleWritable[leftMatrixColumnNum];
136 tempColumnArrayDoubleWritable=new DoubleWritable[leftMatrixRowNum];
137 initDoubleArrayWritable(leftMatrixRowNum,tempColumnArrayDoubleWritable);
138 }
139
140 public void reduce(IntWritable key,Iterable<DoubleArrayWritable>value,Context context) throws IOException, InterruptedException {
141 //int valueLength=0;
142 for(DoubleArrayWritable doubleValue:value){
143 int index=-1;
144 for (Writable val:doubleValue.get()){
145 tempColumnArrayDoubleWritable[++index].set(((DoubleWritable)val).get());
146 }
147 //valueLength=Array.getLength(obValue);
148 for (int i=0;i<leftMatrixRowNum;++i){
149 //sum[i]=new DoubleWritable(Double.parseDouble(Array.get(obValue,i).toString())+sum[i].get());
150 //sum[i]=new DoubleWritable(Array.getDouble(obValue,i)+sum[i].get());
151 sum[i].set(tempColumnArrayDoubleWritable[i].get()+sum[i].get());
152 }
153 }
154 //valueArrayWritable.set(sum);
155 valueArrayWritable.set(tempColumnArrayDoubleWritable);
156 context.write(key,valueArrayWritable);
157 for (int i=0;i<sum.length;++i){
158 sum[i].set(0.0);
159 }
160
161 }
162 }
163
164 public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
165 String uri=args[3];
166 String outUri=args[4];
167 String cachePath=args[2];
168 HDFSOperator.deleteDir(outUri);
169 Configuration conf=new Configuration();
170 DistributedCache.addCacheFile(URI.create(cachePath),conf);//添加分布式缓存
171 /**************************************************/
172 //FileSystem fs=FileSystem.get(URI.create(uri),conf);
173 //fs.delete(new Path(outUri),true);
174 /*********************************************************/
175 conf.setInt("leftMatrixColumnNum",Integer.parseInt(args[0]));
176 conf.setInt("leftMatrixRowNum",Integer.parseInt(args[1]));
177 conf.setFloat("u",0.35f);
178 conf.set("mapred.jar","MutiDoubleInputMatrixProduct.jar");
179 Job job=new Job(conf,"MatrixProdcut");
180 job.setJarByClass(MutiDoubleInputMatrixProduct.class);
181 job.setInputFormatClass(SequenceFileInputFormat.class);
182 job.setOutputFormatClass(SequenceFileOutputFormat.class);
183 job.setMapperClass(MyMapper.class);
184 job.setReducerClass(MyReducer.class);
185 job.setMapOutputKeyClass(IntWritable.class);
186 job.setMapOutputValueClass(DoubleArrayWritable.class);
187 job.setOutputKeyClass(IntWritable.class);
188 job.setOutputValueClass(DoubleArrayWritable.class);
189 FileInputFormat.setInputPaths(job, new Path(uri));
190 FileOutputFormat.setOutputPath(job,new Path(outUri));
191 System.exit(job.waitForCompletion(true)?0:1);
192 }
193
194
195 }
196 class DoubleArrayWritable extends ArrayWritable {
197 public DoubleArrayWritable(){
198 super(DoubleWritable.class);
199 }
200 /*
201 public String toString(){
202 StringBuilder sb=new StringBuilder();
203 for (Writable val:get()){
204 DoubleWritable doubleWritable=(DoubleWritable)val;
205 sb.append(doubleWritable.get());
206 sb.append(",");
207 }
208 sb.deleteCharAt(sb.length()-1);
209 return sb.toString();
210 }
211 */
212 }
213
214 class HDFSOperator{
215 public static boolean deleteDir(String dir)throws IOException{
216 Configuration conf=new Configuration();
217 FileSystem fs =FileSystem.get(conf);
218 boolean result=fs.delete(new Path(dir),true);
219 System.out.println("sOutput delete");
220 fs.close();
221 return result;
222 }
223 }
Hadoop下大矩阵乘法Version2的更多相关文章
- MapReduce实现大矩阵乘法
来自:http://blog.csdn.net/xyilu/article/details/9066973 引言 何 为大矩阵?Excel.SPSS,甚至SAS处理不了或者处理起来非常困难,需要设计巧 ...
- [模板][题解][Luogu1939]矩阵乘法加速递推(详解)
题目传送门 题目大意:计算数列a的第n项,其中: \[a[1] = a[2] = a[3] = 1\] \[a[i] = a[i-3] + a[i - 1]\] \[(n ≤ 2 \times 10^ ...
- CNN卷积神经网络_深度残差网络 ResNet——解决神经网络过深反而引起误差增加的根本问题,Highway NetWork 则允许保留一定比例的原始输入 x。(这种思想在inception模型也有,例如卷积是concat并行,而不是串行)这样前面一层的信息,有一定比例可以不经过矩阵乘法和非线性变换,直接传输到下一层,仿佛一条信息高速公路,因此得名Highway Network
from:https://blog.csdn.net/diamonjoy_zone/article/details/70904212 环境:Win8.1 TensorFlow1.0.1 软件:Anac ...
- 矩阵乘法在hadoop的实现
先随机生成一个矩阵,矩阵的行数与列数由用户输入: #!/bin/bashfor i in `seq 1 $1`do for j in `seq 1 $2` do s=$((RANDOM%100)) e ...
- THUSCH 2017 大魔法师(矩阵乘法+线段树)
题意 https://loj.ac/problem/2980 思路 区间修改考虑用线段树维护.由于一段区间的 \(A,B,C\) 可以表示成由原来的 \(A,B,C\) 乘上带上系数再加上某一个某个常 ...
- MapReduce实现矩阵乘法
简单回想一下矩阵乘法: 矩阵乘法要求左矩阵的列数与右矩阵的行数相等.m×n的矩阵A,与n×p的矩阵B相乘,结果为m×p的矩阵C.具体内容能够查看:矩阵乘法. 为了方便描写叙述,先进行如果: 矩阵A的行 ...
- 矩阵乘法的MapReduce实现
对于任意矩阵M和N,若矩阵M的列数等于矩阵N的行数,则记M和N的乘积为P=M*N,其中mik 记做矩阵M的第i行和第k列,nkj记做矩阵N的第k行和第j列,则矩阵P中,第i行第j列的元素可表示为公式( ...
- OpenGL学习进程(12)第九课:矩阵乘法实现3D变换
本节是OpenGL学习的第九个课时,下面将详细介绍OpenGL的多种3D变换和如何操作矩阵堆栈. (1)3D变换: OpenGL中绘制3D世界的空间变换包括:模型变换.视图变换.投影变换和视口 ...
- 【模拟题(电子科大MaxKU)】解题报告【树形问题】【矩阵乘法】【快速幂】【数论】
目录: 1:一道简单题[树形问题](Bzoj 1827 奶牛大集会) 2:一道更简单题[矩阵乘法][快速幂] 3:最简单题[技巧] 话说这些题目的名字也是够了.... 题目: 1.一道简单题 时间1s ...
随机推荐
- IntValue()方法 和 ValueOf()方法
intValue() 1.intValue()是java.lang.Number类的方法,Number是一个抽象类.Java中所有的数值类都继承它.也就是说,不单是Integer有intValue方法 ...
- 数学:乘法逆元-拓展GCD
乘法逆元应用在组合数学取模问题中,这里给出的实现不见得好用 给出拓展GCD算法: 扩展欧几里得算法是指对于两个数a,b 一定能找到x,y(均为整数,但不满足一定是正数) 满足x*a+y*b=gcd(a ...
- codeforces B. Okabe and Banana Trees 结论题
题目传送门 这道题 枚举一波y就好了 要求x,y整数 所以y最多1000个 然后算一波答案更新就好了 233 #include<cstdio> #include<cstring> ...
- ubuntu安装mysqlclient
安装mysql: sudo apt-get install mysql-server mysql-client 然后mysql -V查看mysql是否安装成功 sudo apt-get install ...
- wx.ScrolledWindow wx.PseudoDC
# encoding: utf-8 import logging import random import wx import wx.lib.inspection def GetMyBitmap(): ...
- TortoiseSVN与VisualSVN Server搭建SVN版本控制系统【转】
转自:http://www.cnblogs.com/xing901022/p/4399382.html 本片主要介绍如何搭建SVN版本控制系统,主要使用工具: 1 客户端:TortoiseSVN (小 ...
- css3带你实现酷炫效果
css3 私有前缀 -webkit- chrome/safari等webkit内核浏览器 -moz- firfox -o- opera -ms- IE css3 盒子模型 box-sizing 值co ...
- C#字节数组的常用解码处理方法
在某些情况下,比如说串口通信或者读取二进制的文件,通常会得到一个byte数组形式的数据. 然而对于这个数据处理常常令人苦恼,因为通常通信情况下,并不是一个字节代表一个字符或者某个数据,而是数据夹杂在字 ...
- 【SQL】多个表的查询
1.元组变量 SELECT * FROM a AS x, a AS y; 结果是显示自己和自己的笛卡尔乘积. 如果查询中对于某一个关系使用了多次,为了区别他们的属性,需要对关系定义别名,然后用 别名. ...
- linux--redis的安装和配置和开启多个端口
在workerman开发过程中需要安装redis来存储用户ip.端口等信息 首先UBUNTU中安装redis: apt-update //更新apt包源apt-get install redis-s ...