#define min(x,y) (((x) < (y)) ? (x) : (y))

#include <stdio.h>
#include <stdlib.h>
#include <cublas_v2.h>
#include <iostream>
#include <vector>
//extern "C"
//{
#include <cblas.h>
//} using namespace std;
int main()
{ const enum CBLAS_ORDER Order=CblasRowMajor;
const enum CBLAS_TRANSPOSE TransA=CblasNoTrans;
const enum CBLAS_TRANSPOSE TransB=CblasNoTrans;
const int M=4;//A的行数,C的行数
const int N=2;//B的列数,C的列数
const int K=3;//A的列数,B的行数
const float alpha=1;
const float beta=0;
const int lda=K;//A的列
const int ldb=N;//B的列
const int ldc=N;//C的列
const float A[M*K]={1,2,3,4,5,6,7,8,9,8,7,6};
const float B[K*N]={5,4,3,2,1,0};
float C[M*N]; cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); for(int i=0;i<M;i++)
{
for(int j=0;j<N;j++)
{
cout<<C[i*N+j]<<"\n";
}
cout<<endl;
} return EXIT_SUCCESS; }

g++ testblas.c++ -lopenblas  -o testout

g++ testblas.c++ -lopenblas_piledriverp-r0.2.9 -o testout   本地编译openblas版本

注意library放在引用library的函数的后面

cblas_sgemm

Multiplies two matrices (single-precision).

void cblas_sgemm (
const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
//typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
const enum CBLAS_TRANSPOSE TransB,
const int M, //Number of rows in matrices A and C.
const int N,//Number of rows in matrices A and C.
const int K, //Number of columns in matrix A; number of rows in matrix B
const float alpha, //Scaling factor for the product of matrices A and B
const float *A,
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m. stride lda, ldb and ldc (the strides) are not relevant to my problem after all, but here's an explanation of them :  The elements of a matrix (i.e a 2D array) are stored contiguously in memory. However, they may be stored in either column-major or row-major fashion. The stride represents the distance in memory between elements in adjacent rows (if row-major) or in adjacent columns (if column-major). This means that the stride is usually equal to the number of rows/columns in the matrix. Matrix A =
[1 2 3]
[4 5 6]
Row-major stores values as {1,2,3,4,5,6}
Stride here is 3 Col-major stores values as {1, 4, 2, 5, 3, 6}
Stride here is 2 Matrix B =
[1 2 3]
[4 5 6]
[7 8 9] Col-major storage is {1, 4, 7, 2, 5, 8, 3, 6, 9}
Stride here is 3 Read more: http://www.physicsforums.com  const float *B,
const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
const float beta, //Scaling factor for matrix C.
float *C,
const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
); Thus, it calculates either
C←αAB + βC
or
C←αBA + βC
with optional use of transposed forms of A, B, or both.


typedef enum CBLAS_ORDER     {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER;
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;

C=A∗BC=A∗B

CT=(A∗B)T=BT∗ATCT=(A∗B)T=BT∗AT  把A和B的顺序颠倒,可以直接得到转制矩阵乘法的结果,不用作其他变换,(结果C也是转制)。

Y←αAX + βY

cblas_sgemv
Multiplies a matrix by a vector (single precision).
void cblas_sgemv (
const enum CBLAS_ORDER Order,
const enum CBLAS_TRANSPOSE TransA,
const int M,
const int N,
const float alpha,
const float *A,
const int lda,
const float *X,
const int incX,
const float beta,
float *Y,
const int incY
);

STL版本

cblas_daxpy

Computes a constant times a vector plus a vector (double-precision).  

On return, the contents of vector Y are replaced with the result. The value computed is (alpha * X[i]) +

Y[i].

#include <OpenBlas/cblas.h>
#include <OpenBlas/common.h>
#include <iostream>
#include <vector> int main()
{
blasint n = 10;
blasint in_x =1;
blasint in_y =1; std::vector<double> x(n);
std::vector<double> y(n); double alpha = 10; std::fill(x.begin(),x.end(),1.0);
std::fill(y.begin(),y.end(),2.0); cblas_daxpy( n, alpha, &x[0], in_x, &y[0], in_y); //Print y
for(int j=0;j<n;j++)
std::cout << y[j] << "\t"; std::cout << std::endl;
}

cublas

cublasStatus_t

cublasCreate(cublasHandle_t *handle)


Return Value MeaningCUBLAS_STATUS_SUCCESS the initialization succeededCUBLAS_STATUS_NOT_INITIALIZED the CUDATM Runtime initialization failedCUBLAS_STATUS_ALLOC_FAILED the resources could not be allocated

cublasStatus_tcublasDestroy(cublasHandle_t handle)

Return Value MeaningCUBLAS_STATUS_SUCCESS the shut down succeededCUBLAS_STATUS_NOT_INITIALIZED the library was not initialized


cublasStatus_t cublasSgemm(cublasHandle_t handle,  // 唯一的不同:handle to the cuBLAS library context.
cublasOperation_t transa,
cublasOperation_t transb
int m,
int n,
int k,
const float *alpha,
const float*A,
int lda,
const float*B,
int ldb,
const float*beta,
float*C,
int ldc
)
void cblas_sgemm (
const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
//typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
const enum CBLAS_TRANSPOSE TransB,
const int M, //Number of rows in matrices A and C.
const int N,//Number of rows in matrices A and C.
const int K, //Number of columns in matrix A; number of rows in matrix B
const float alpha, //Scaling factor for the product of matrices A and B
const float *A,
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m.
const float *B,
const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
const float beta, //Scaling factor for matrix C.
float *C,
const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
);

【神经网络与深度学习】【C/C++】使用blas做矩阵乘法的更多相关文章

  1. 使用blas做矩阵乘法

      #define min(x,y) (((x) < (y)) ? (x) : (y)) #include <stdio.h> #include <stdlib.h> # ...

  2. (转)神经网络和深度学习简史(第一部分):从感知机到BP算法

    深度|神经网络和深度学习简史(第一部分):从感知机到BP算法 2016-01-23 机器之心 来自Andrey Kurenkov 作者:Andrey Kurenkov 机器之心编译出品 参与:chen ...

  3. [DeeplearningAI笔记]神经网络与深度学习人工智能行业大师访谈

    觉得有用的话,欢迎一起讨论相互学习~Follow Me 吴恩达采访Geoffrey Hinton NG:前几十年,你就已经发明了这么多神经网络和深度学习相关的概念,我其实很好奇,在这么多你发明的东西中 ...

  4. 【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第二周测验【中英】

    [中英][吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第二周测验 第2周测验 - 神经网络基础 神经元节点计算什么? [ ]神经元节点先计算激活函数,再计算线性函数(z = Wx + ...

  5. 【吴恩达课后测验】Course 1 - 神经网络和深度学习 - 第一周测验【中英】

    [吴恩达课后测验]Course 1 - 神经网络和深度学习 - 第一周测验[中英] 第一周测验 - 深度学习简介 和“AI是新电力”相类似的说法是什么? [  ]AI为我们的家庭和办公室的个人设备供电 ...

  6. 对比《动手学深度学习》 PDF代码+《神经网络与深度学习 》PDF

    随着AlphaGo与李世石大战的落幕,人工智能成为话题焦点.AlphaGo背后的工作原理"深度学习"也跳入大众的视野.什么是深度学习,什么是神经网络,为何一段程序在精密的围棋大赛中 ...

  7. 如何理解归一化(Normalization)对于神经网络(深度学习)的帮助?

    如何理解归一化(Normalization)对于神经网络(深度学习)的帮助? 作者:知乎用户链接:https://www.zhihu.com/question/326034346/answer/730 ...

  8. 【神经网络与深度学习】卷积神经网络(CNN)

    [神经网络与深度学习]卷积神经网络(CNN) 标签:[神经网络与深度学习] 实际上前面已经发布过一次,但是这次重新复习了一下,决定再发博一次. 说明:以后的总结,还应该以我的认识进行总结,这样比较符合 ...

  9. 【神经网络与深度学习】【CUDA开发】caffe-windows win32下的编译尝试

    [神经网络与深度学习][CUDA开发]caffe-windows win32下的编译尝试 标签:[神经网络与深度学习] [CUDA开发] 主要是在开发Qt的应用程序时,需要的是有一个使用的库文件也只是 ...

随机推荐

  1. bat批处理文件

    将某个文件夹中的所有txt文件合并到a.txt中,如果文件比较多的话,手动会很费时,编写程序也很麻烦,这个时候就可以用批处理文件,如下: type *.txt > a.txt 把上面这行粘贴到新 ...

  2. Postman之 SSL证书配置随手记

    参考文档: https://learning.getpostman.com/docs/postman/sending_api_requests/certificates/ 随着 https 的推动,更 ...

  3. C语言构建小型Web服务器

    #include <stdio.h> #include <sys/socket.h> #include <stdlib.h> #include <string ...

  4. js实现网页上图片循环播放

    <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN""http://www.w3.org/T ...

  5. noi.ac #534 猫

    题目链接:戳我 [问题描述] 有n座山,m只猫和p个工作人员.山从左往右编号为1∼n,山i和i−1之间的距离是di米. 有一天,猫都到山上去玩了:第i只猫会到山hi去,并一直玩到时间ti,之后就在那座 ...

  6. java浮点数精度问题解决方法

    基础知识回顾: BigDecimal.setScale()方法用于格式化小数点setScale(1)表示保留一位小数,默认用四舍五入方式 setScale(1,BigDecimal.ROUND_DOW ...

  7. jeecg中自定义dialog,实现窗体的弹出

    自定一个dialog,在子窗体中写一个方法,然后通过iframe进行调取function createwindowoktext(title, addurl,width,height,oktext,ca ...

  8. 用bootstrap写一个登陆页

    <div class="container"> <p class="text-center marginTop35">后台管理系统< ...

  9. 通过nginx转发,用外网连接阿里云的redis,报Unexpected end of stream的解决办法

    一.在与redis同一个内网的服务器上A的nginx做了下面的设置 stream { upstream redis { server  redis.rds.aliyuncs.com:6379 max_ ...

  10. JavaWeb_(Struts2框架)Ognl小案例查询帖子

    此系列博文基于同一个项目已上传至github 传送门 JavaWeb_(Struts2框架)Struts创建Action的三种方式 传送门 JavaWeb_(Struts2框架)struts.xml核 ...