hadoop2.2编程:矩阵相乘简单实现
/* matrix-matrix multiplication on Hadoop A x B = C constraint: A, B, C must be of the same size I use this to evaluate the efficiency of Hadoop for matrix multiplication, so I really don't care to handle non-square matrices. ===Data preparation==== Matrix data must be stored in a file on Hadoop. Line number must be appended to the beginning of each line. For example, the following represents a 4x4 matrix: 0 18 20 16 14 1 17 12 11 19 2 10 17 11 19 3 14 17 20 10 Left (A in this example) matrix should be stored in file "left"; Right (B in this example) matrix should be stored in file "right"; I use filenames to distinguish input data. Place "left" and "right" in the same folder (let's call it "input") ====Run the program==== > hadoop jar matrixmul.jar MatrixMul input output 8 2 results will be placed in "output" folder on HDFS. 8: all matrices are 8x8 2: every partitioned block is of size 2x2 ===Read the results=== Given the above sample command, we multiply two 8x8 matrices, in many 2x2 blocks. So, that the resulted C matrix has 16 blocks. In the output folder, there will be 16 separate files: part-r-00000, part-r-00001, ... part-r-00015 Every file stores one block in C. In this example, every block has 2 rows and 2 columns. These files are organized in "row"-order. ===Algorithm=== Mappers read input data. Every reducer processes one block of the resulted matrix. */ import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class MatrixMul { public static class MyMapper extends Mapper<LongWritable, Text, IntWritable, Text>{ private String filename=null; private boolean isLeftMatrix=false; private int totalSize, partSize, npart; private boolean isLeft(){return isLeftMatrix;} protected void setup(Context context) throws IOException, InterruptedException{ //get filename FileSplit fileSplit = (FileSplit)context.getInputSplit(); filename = fileSplit.getPath().getName(); if("left".equalsIgnoreCase(filename)) isLeftMatrix=true; else isLeftMatrix=false; //get how size and partition information Configuration conf=context.getConfiguration(); totalSize=conf.getInt("matrix-mul-totalsize", -1); partSize=conf.getInt("matrix-mul-partsize", -1); npart=conf.getInt("matrix-mul-npart", -1); if(totalSize<0 || partSize<0 || npart<0){ System.out.println("Error in setup of MyMapper."); System.exit(1); } } public void map(LongWritable key, Text value, Context context ) throws IOException, InterruptedException { String line=value.toString(); String[] strs=line.split(" "); if(strs.length!=totalSize+1){ System.out.println("Error in map of Mapper."); System.out.println(strs.length+"___"+totalSize); System.out.println("line is: "+line); System.exit(1); } int linenum=Integer.parseInt(strs[0]); int[] numbers=new int[totalSize]; for(int i=0;i<totalSize;i++) numbers[i]=Integer.parseInt(strs[i+1]); int part_hor=linenum/partSize; //horizontal partitioned id int prev_part_ver=-1; String msg=null; for(int i=0;i<totalSize;i++){ int part_ver=i/partSize; //vertical partition number if(part_ver!=prev_part_ver){ if(msg!=null){ int baselinenum = part_hor * partSize; int old=part_ver; part_ver=prev_part_ver; if(isLeft()){ String toSend="l:"+(linenum - baselinenum)+":"+part_ver+"#"+msg; System.out.println("left "+linenum+","+part_ver+" "+msg); for(int k=0;k<npart;k++){ int dest=part_hor * npart + k; context.write(new IntWritable(dest), new Text(toSend)); } }else{ String toSend="r:"+(linenum - baselinenum)+":"+part_hor+"#"+msg; System.out.println("right "+part_ver+":"+linenum+" "+msg); for(int k=0;k<npart;k++){ int dest=k * npart + part_ver; context.write(new IntWritable(dest), new Text(toSend)); } } part_ver=old; } msg=null; prev_part_ver=part_ver; } if(msg==null) msg=""+strs[i+1]; else msg+=" "+strs[i+1]; } if(msg!=null){ //almost the same code int part_ver=npart-1; int baselinenum = part_hor * partSize; if(isLeft()){ String toSend="l:"+(linenum - baselinenum)+":"+part_ver+"#"+msg; System.out.println("left "+linenum+","+part_ver+" "+msg); for(int k=0;k<npart;k++){ int dest=part_hor * npart + k; context.write(new IntWritable(dest), new Text(toSend)); } }else{ String toSend="r:"+(linenum - baselinenum)+":"+part_hor+"#"+msg; System.out.println("right "+part_ver+":"+linenum+" "+msg); for(int k=0;k<npart;k++){ int dest=k * npart + part_ver; //has to be the last part context.write(new IntWritable(dest), new Text(toSend)); } } } } } public static class MyReducer extends Reducer<IntWritable, Text, Text, Text> { private int totalSize, partSize, npart; int[][] left=null; int[][] right=null; protected void setup(Context context) throws IOException, InterruptedException{ //get how # of partitions Configuration conf=context.getConfiguration(); totalSize=conf.getInt("matrix-mul-totalsize", -1); partSize=conf.getInt("matrix-mul-partsize", -1); npart=conf.getInt("matrix-mul-npart", -1); if(totalSize<0 || partSize<0 || npart<0){ System.out.println("Error in setup of MyReducer."); System.exit(1); } left=new int[partSize][totalSize]; right=new int[totalSize][partSize]; } public void reduce(IntWritable key, Iterable<Text> values, Context context ) throws IOException, InterruptedException { int sum = 0; for (Text val : values) { String line=val.toString(); String[] meta_val=line.split("#"); String[] metas=meta_val[0].split(":"); String[] numbers=meta_val[1].split(" "); int baselinenum=Integer.parseInt(metas[1]); int blkindex=Integer.parseInt(metas[2]); if("l".equalsIgnoreCase(metas[0])){ //from left matrix int start=blkindex * partSize; for(int i=0;i<partSize; i++) left[baselinenum][start+i]=Integer.parseInt(numbers[i]); }else{ int rowindex=blkindex*partSize + baselinenum; for(int i=0;i<partSize; i++) right[rowindex][i]=Integer.parseInt(numbers[i]); } } } protected void cleanup(Context context) throws IOException, InterruptedException { //now let's do the calculation int[][] res=new int[partSize][partSize]; for(int i=0;i<partSize;i++) for(int j=0;j<partSize;j++) res[i][j]=0; for(int i=0;i<partSize;i++){ for(int k=0;k<totalSize;k++){ for(int j=0;j<partSize;j++){ res[i][j]+=left[i][k]*right[k][j]; } } } for(int i=0;i<partSize;i++){ String output=null; for(int j=0;j<partSize;j++){ if(output==null) output=""+res[i][j]; else output+=" "+res[i][j]; } context.write(new Text(output), null); } } } public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length != 4) { System.err.println("Usage: MatrixMul input-dir output-dir total-size part-size"); System.exit(2); } int totalsize=Integer.parseInt(args[2]); int partsize=Integer.parseInt(args[3]); if(totalsize==0 || partsize==0 || partsize>totalsize){ System.out.println("Invalid total-size or part-size"); System.exit(1); } conf.setInt("matrix-mul-totalsize", totalsize); //the matrix is 'totalsize' by 'totalsize' conf.setInt("matrix-mul-partsize", partsize); //every block is 'partsize' by 'partsize' int npart=totalsize/partsize; if(npart*partsize<totalsize) npart++; conf.setInt("matrix-mul-npart", npart); //number of parts on one dimension Job job = new Job(conf, "matrix-mul"); job.setJarByClass(MatrixMul.class); job.setMapperClass(MyMapper.class); job.setReducerClass(MyReducer.class); job.setNumReduceTasks(npart*npart); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); //FileInputFormat.addInputPath(job, new Path(args[0])); TextInputFormat.addInputPath(job, new Path(args[0])); //need to read a complete line FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true) ; } }
hadoop2.2编程:矩阵相乘简单实现的更多相关文章
- CUDA编程-(2)其实写个矩阵相乘并不是那么难
程序代码及图解析: #include <iostream> #include "book.h" __global__ void add( int a, int b, i ...
- 编程计算2×3阶矩阵A和3×2阶矩阵B之积C。 矩阵相乘的基本方法是: 矩阵A的第i行的所有元素同矩阵B第j列的元素对应相乘, 并把相乘的结果相加,最终得到的值就是矩阵C的第i行第j列的值。 要求: (1)从键盘分别输入矩阵A和B, 输出乘积矩阵C (2) **输入提示信息为: 输入矩阵A之前提示:"Input 2*3 matrix a:\n" 输入矩阵B之前提示
编程计算2×3阶矩阵A和3×2阶矩阵B之积C. 矩阵相乘的基本方法是: 矩阵A的第i行的所有元素同矩阵B第j列的元素对应相乘, 并把相乘的结果相加,最终得到的值就是矩阵C的第i行第j列的值. 要求: ...
- 利用Hadoop实现超大矩阵相乘之我见(二)
前文 在<利用Hadoop实现超大矩阵相乘之我见(一)>中我们所介绍的方法有着“计算过程中文件占用存储空间大”这个缺陷,本文中我们着重解决这个问题. 矩阵相乘计算思想 传统的矩阵相乘方法为 ...
- 利用Hadoop实现超大矩阵相乘之我见(一)
前记 最近,公司一位挺优秀的总务离职,欢送宴上,她对我说“你是一位挺优秀的程序员”,刚说完,立马道歉说“对不起,我说你是程序员是不是侮辱你了?”我挺诧异,程序员现在是很低端,很被人瞧不起的工作吗?或许 ...
- Strassen 矩阵相乘算法(转)
偶尔在算法课本上面看到矩阵相乘的算法,联想到自己曾经在蓝桥杯系统上曾经做过一道矩阵相乘的题目,当时用的是普通的矩阵相乘的方法,效率极低,勉强通过编译.所以决定研究一下Strassen矩阵相乘算法,由于 ...
- dp方法论——由矩阵相乘问题学习dp解题思路
前篇戳:dp入门——由分杆问题认识动态规划 导语 刷过一些算法题,就会十分珍惜“方法论”这种东西.Leetcode上只有题目.讨论和答案,没有方法论.往往答案看起来十分切中要害,但是从看题目到得到思路 ...
- Opencv中Mat矩阵相乘——点乘、dot、mul运算详解
Opencv中Mat矩阵相乘——点乘.dot.mul运算详解 2016年09月02日 00:00:36 -牧野- 阅读数:59593 标签: Opencv矩阵相乘点乘dotmul 更多 个人分类: O ...
- C++两个矩阵相乘
/*编程求两个矩阵相乘的结果.输入第一行是整数m,n,表示第一个矩阵式m行n列的:然后是一个m * n的矩阵.再下一行的输入时整数p,q,表示下一个矩阵p行,q列的(n=p);然后就是一个p行q列的矩 ...
- 使用cublas 矩阵库函数实现矩阵相乘
2014-08-10 cublas中执行矩阵乘法运算的函数 首先要注意的是cublas使用的是以列为主的存储方式,和c/c++中的以行为主的方式是不一样的.处理方法可参考下面的注释代码 // SOME ...
随机推荐
- NOPI读取EXCEL
public void ReadEXCEL(string filePath) { IWorkbook wk = null; string extension = System.IO.Path.GetE ...
- .NET小项目之MyKtv(歌曲播放功能实现)
在KTV点歌系统中我们根据需求获取到歌手的歌曲信息,点击歌手的歌曲将其添加到一点歌曲列表中看似简单的一个操作其实涉及很多内容,这也是写这篇Blog的目的—分析歌曲播放的原理. 原理分析 我们应该清楚, ...
- Windows下Wamp装不上Memcache扩展
windows下wamp装不上memcache扩展2015.03.20 No Comments 1,243 views用的是WAMP集成包,PHP版本5.5.12http://windows.php. ...
- ASP.NET MVC Web API使用示例
上篇博客讲解rest服务开发时,曾经提到过asp.net mvc中的rest api,由于篇幅原因,没有在上篇博客中进行讲解,这里专门拿出来进行讨论.还是一样引用上次的案例,用asp.net mvc提 ...
- JSON对象的stringify()和parse()方法
1.stringify() ---- JavaScript对象序列化为JSON字符串 eg1. var book = {title: 'JS', authors: ['Van'], edition:3 ...
- Sql语句批量更新数据(多表关联)
最近在项目中遇到一个问题,原来设计的功能是不需要一个特定的字段值depid的,但是新的功能需要根据depid来展现,于是出现了这样一个问题,新增加的数据都有正确的depid,而原来的大量的数据就没有d ...
- oracle日期格式数据修改
select * from INVOICE_NEW where ref_no='32308' update INVOICE_NEW set check_d=to_date('2015/11/16', ...
- MySQL大数据量快速分页实现
一般刚开始学SQL语句的时候,会这样写 代码如下: SELECT * FROM table ORDER BY id LIMIT 1000, 10; 但在数据达到百万级的时候,这样写会慢死 代码如下: ...
- 使用Python编程语言连接MySQL数据库代码
使用Python编程语言连接MySQL数据库代码,跟大家分享一下: 前几天我用python操作了mysql的数据库,发现非常的有趣,而且python操作mysql的方法非常的简单和快速,所以我把代码分 ...
- Oracle控制文件丢失,日志文件丢失
控制文件丢失: alter database backup controlfile to traces; shutdown immediate; @j:\db\script\orcl_ora_ctl_ ...