1. #define min(x,y) (((x) < (y)) ? (x) : (y))
  2.  
  3. #include <stdio.h>
  4. #include <stdlib.h>
  5. #include <cublas_v2.h>
  6. #include <iostream>
  7. #include <vector>
  8. //extern "C"
  9. //{
  10. #include <cblas.h>
  11. //}
  12.  
  13. using namespace std;
  14. int main()
  15. {
  16.  
  17. const enum CBLAS_ORDER Order=CblasRowMajor;
  18. const enum CBLAS_TRANSPOSE TransA=CblasNoTrans;
  19. const enum CBLAS_TRANSPOSE TransB=CblasNoTrans;
  20. const int M=4;//A的行数,C的行数
  21. const int N=2;//B的列数,C的列数
  22. const int K=3;//A的列数,B的行数
  23. const float alpha=1;
  24. const float beta=0;
  25. const int lda=K;//A的列
  26. const int ldb=N;//B的列
  27. const int ldc=N;//C的列
  28. const float A[M*K]={1,2,3,4,5,6,7,8,9,8,7,6};
  29. const float B[K*N]={5,4,3,2,1,0};
  30. float C[M*N];
  31.  
  32. cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc);
  33.  
  34. for(int i=0;i<M;i++)
  35. {
  36. for(int j=0;j<N;j++)
  37. {
  38. cout<<C[i*N+j]<<"\n";
  39. }
  40. cout<<endl;
  41. }
  42.  
  43. return EXIT_SUCCESS;
  44.  
  45. }

g++ testblas.c++ -lopenblas  -o testout

g++ testblas.c++ -lopenblas_piledriverp-r0.2.9 -o testout   本地编译openblas版本

注意library放在引用library的函数的后面

  1. cblas_sgemm
  2.  
  3. Multiplies two matrices (single-precision).
  4.  
  5. void cblas_sgemm (
  6. const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
  7. //typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
  8.  
  9. const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
  10. const enum CBLAS_TRANSPOSE TransB,
  11. const int M, //Number of rows in matrices A and C.
  12. const int N,//Number of rows in matrices A and C.
  13. const int K, //Number of columns in matrix A; number of rows in matrix B
  14. const float alpha, //Scaling factor for the product of matrices A and B
  15. const float *A,
  16. const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m. stride
  17.  
  18. lda, ldb and ldc (the strides) are not relevant to my problem after all, but here's an explanation of them : 
  19.  
  20. The elements of a matrix (i.e a 2D array) are stored contiguously in memory. However, they may be stored in either column-major or row-major fashion. The stride represents the distance in memory between elements in adjacent rows (if row-major) or in adjacent columns (if column-major). This means that the stride is usually equal to the number of rows/columns in the matrix.
  21.  
  22. Matrix A =
  23. [1 2 3]
  24. [4 5 6]
  25. Row-major stores values as {1,2,3,4,5,6}
  26. Stride here is 3
  27.  
  28. Col-major stores values as {1, 4, 2, 5, 3, 6}
  29. Stride here is 2
  30.  
  31. Matrix B =
  32. [1 2 3]
  33. [4 5 6]
  34. [7 8 9]
  35.  
  36. Col-major storage is {1, 4, 7, 2, 5, 8, 3, 6, 9}
  37. Stride here is 3
  38.  
  39. Read more: http://www.physicsforums.com 
  40.  
  41. const float *B,
  42. const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
  43. const float beta, //Scaling factor for matrix C.
  44. float *C,
  45. const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
  46. );
  47.  
  48. Thus, it calculates either
  49. C←αAB + βC
  50. or
  51. C←αBA + βC
  52. with optional use of transposed forms of A, B, or both.
  1.  
  1.  
  1. typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
  2. typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;

C=A∗BC=A∗B

CT=(A∗B)T=BT∗ATCT=(A∗B)T=BT∗AT  把A和B的顺序颠倒,可以直接得到转制矩阵乘法的结果,不用作其他变换,(结果C也是转制)。

Y←αAX + βY

  1. cblas_sgemv
  2. Multiplies a matrix by a vector (single precision).
  1. void cblas_sgemv (
  2. const enum CBLAS_ORDER Order,
  3. const enum CBLAS_TRANSPOSE TransA,
  4. const int M,
  5. const int N,
  6. const float alpha,
  7. const float *A,
  8. const int lda,
  9. const float *X,
  10. const int incX,
  11. const float beta,
  12. float *Y,
  13. const int incY
  14. );

STL版本

cblas_daxpy

Computes a constant times a vector plus a vector (double-precision).  

On return, the contents of vector Y are replaced with the result. The value computed is (alpha * X[i]) +

Y[i].

  1. #include <OpenBlas/cblas.h>
  2. #include <OpenBlas/common.h>
  3. #include <iostream>
  4. #include <vector>
  5.  
  6. int main()
  7. {
  8. blasint n = 10;
  9. blasint in_x =1;
  10. blasint in_y =1;
  11.  
  12. std::vector<double> x(n);
  13. std::vector<double> y(n);
  14.  
  15. double alpha = 10;
  16.  
  17. std::fill(x.begin(),x.end(),1.0);
  18. std::fill(y.begin(),y.end(),2.0);
  19.  
  20. cblas_daxpy( n, alpha, &x[0], in_x, &y[0], in_y);
  21.  
  22. //Print y
  23. for(int j=0;j<n;j++)
  24. std::cout << y[j] << "\t";
  25.  
  26. std::cout << std::endl;
  27. }

  1. cublas

cublasStatus_t

cublasCreate(cublasHandle_t *handle)

  1.  

Return Value MeaningCUBLAS_STATUS_SUCCESS the initialization succeededCUBLAS_STATUS_NOT_INITIALIZED the CUDATM Runtime initialization failedCUBLAS_STATUS_ALLOC_FAILED the resources could not be allocated

cublasStatus_tcublasDestroy(cublasHandle_t handle)

Return Value MeaningCUBLAS_STATUS_SUCCESS the shut down succeededCUBLAS_STATUS_NOT_INITIALIZED the library was not initialized

  1.  
  2. cublasStatus_t cublasSgemm(cublasHandle_t handle, // 唯一的不同:handle to the cuBLAS library context.
  1. cublasOperation_t transa,
  2. cublasOperation_t transb
  3. int m,
  4. int n,
  5. int k,
  6. const float *alpha,
  7. const float*A,
  8. int lda,
  9. const float*B,
  10. int ldb,
  11. const float*beta,
  12. float*C,
  13. int ldc
  14. )
  1. void cblas_sgemm (
  2. const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
  3. //typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
  4.  
  5. const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
  6. const enum CBLAS_TRANSPOSE TransB,
  7. const int M, //Number of rows in matrices A and C.
  8. const int N,//Number of rows in matrices A and C.
  9. const int K, //Number of columns in matrix A; number of rows in matrix B
  10. const float alpha, //Scaling factor for the product of matrices A and B
  11. const float *A,
  12. const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m.
  13. const float *B,
  14. const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
  15. const float beta, //Scaling factor for matrix C.
  16. float *C,
  17. const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
  18. );

【神经网络与深度学习】【C/C++】使用blas做矩阵乘法的更多相关文章

  1. 使用blas做矩阵乘法

      #define min(x,y) (((x) < (y)) ? (x) : (y)) #include <stdio.h> #include <stdlib.h> # ...

  2. (转)神经网络和深度学习简史(第一部分):从感知机到BP算法

    深度|神经网络和深度学习简史(第一部分):从感知机到BP算法 2016-01-23 机器之心 来自Andrey Kurenkov 作者:Andrey Kurenkov 机器之心编译出品 参与:chen ...

  3. [DeeplearningAI笔记]神经网络与深度学习人工智能行业大师访谈

    觉得有用的话,欢迎一起讨论相互学习~Follow Me 吴恩达采访Geoffrey Hinton NG:前几十年,你就已经发明了这么多神经网络和深度学习相关的概念,我其实很好奇,在这么多你发明的东西中 ...

  4. 【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第二周测验【中英】

    [中英][吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第二周测验 第2周测验 - 神经网络基础 神经元节点计算什么? [ ]神经元节点先计算激活函数,再计算线性函数(z = Wx + ...

  5. 【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第一周测验【中英】

    [吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第一周测验[中英] 第一周测验 - 深度学习简介 和“AI是新电力”相类似的说法是什么? [  ]AI为我们的家庭和办公室的个人设备供电 ...

  6. 对比《动手学深度学习》 PDF代码+《神经网络与深度学习 》PDF

    随着AlphaGo与李世石大战的落幕,人工智能成为话题焦点.AlphaGo背后的工作原理"深度学习"也跳入大众的视野.什么是深度学习,什么是神经网络,为何一段程序在精密的围棋大赛中 ...

  7. 如何理解归一化(Normalization)对于神经网络(深度学习)的帮助?

    如何理解归一化(Normalization)对于神经网络(深度学习)的帮助? 作者:知乎用户链接:https://www.zhihu.com/question/326034346/answer/730 ...

  8. 【神经网络与深度学习】卷积神经网络(CNN)

    [神经网络与深度学习]卷积神经网络(CNN) 标签:[神经网络与深度学习] 实际上前面已经发布过一次,但是这次重新复习了一下,决定再发博一次. 说明:以后的总结,还应该以我的认识进行总结,这样比较符合 ...

  9. 【神经网络与深度学习】【CUDA开发】caffe-windows win32下的编译尝试

    [神经网络与深度学习][CUDA开发]caffe-windows win32下的编译尝试 标签:[神经网络与深度学习] [CUDA开发] 主要是在开发Qt的应用程序时,需要的是有一个使用的库文件也只是 ...

随机推荐

  1. PHP类的设计模式

    单例模式 <?php class mysingle { private static $myinstance =null; private function __construct() { } ...

  2. mongodb多条件分页查询的三种方法(转)

    一.使用limit和skip进行分页查询 public List<User> pageList(int pageNum ,int pageSize){ List<User> u ...

  3. 2. SaltStack数据系统: Grains、Pillar

    1. SaltStack数据系统 Grains (谷物) Pillar (支柱) 2.Grains Grains存放着Salt命令启动时收集的信息,运行时不收集 2.1  信息查询 收集资产 网卡,i ...

  4. python脚本打包成exe可执行文件

    1. 工具pyinstaller (pip install pyinstaller) 2.生成exe (pyinstaller -F -w Capturing_PDFs.py) 参数 -F 生成单个文 ...

  5. CSS中文本继承情况

    无继承性的属性 http://www.cnblogs.com/thislbq/p/5882105.html   vertical-align:  垂直文本对齐   CSS中文本可以继承父级样式   体 ...

  6. func<T> 和 action<T>

    一.Func Func<Result>,Func<T1,Result>是一个.Net内置的泛型委托. Func<TResult> Func<T,TResult ...

  7. OkHttp3 拦截器源码分析

    OkHttp 拦截器流程源码分析 在这篇博客 OkHttp3 拦截器(Interceptor) ,我们已经介绍了拦截器的作用,拦截器是 OkHttp 提供的对 Http 请求和响应进行统一处理的强大机 ...

  8. [论文理解] Acquisition of Localization Confidence for Accurate Object Detection

    Acquisition of Localization Confidence for Accurate Object Detection Intro 目标检测领域的问题有很多,本文的作者捕捉到了这样一 ...

  9. 微信小程序之阻止冒泡事件

    众所周知,在微信小程序给标签绑定点击方法大家都会想到 "bindtap" 但是在页面中会遇到 点击 会冒泡而触发其他元素的时间发生 那么怎么办呢 就把引发冒泡事件的始作俑者的 bi ...

  10. Node JS复制文件

    /** * Created by Administrator on 2019/11/6. *指尖敲打着世界 ----一个阳光而又不失帅气的少年!!!. */ var fs=require(" ...