矩阵相乘的例子

参考博客:http://blog.csdn.net/kkk584520/article/details/18812321

MatrixMultiply.c

    typedef int data_type;
#define N 5 void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
{
int i,j;
for(i = 0;i<N;i++)
{
data_type sum = 0;
for(j = 0;j<N;j++)
{
sum += AA[i*N+j]*bb[j];
}
cc[i] = sum;
}
}

修改后:

#include <ap_cint.h>
typedef uint15 data_type;
#define N 5 void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
{
int i,j;
MatrixMultiply_label2:for(i = 0;i<N;i++)
{
data_type sum = 0;
MatrixMultiply_label1:for(j = 0;j<N;j++)
{
sum += AA[i*N+j]*bb[j];
}
cc[i] = sum;
}
}

测试文件:TestMatrixMultiply.c:

#include <stdio.h>
#include <ap_cint.h>
typedef uint15 data_type;
#define N 5
const data_type MatrixA[] = {
#include "a.h"
};
const data_type Vector_b[] = {
#include "b.h"
};
const data_type MatlabResult_c[] = {
#include "c.h"
};
data_type HLS_Result_c[N] = {0};
void CheckResult(data_type * matlab_result,data_type * your_result); int main(void)
{
int i;
printf("Checking Results:\r\n");
MatrixMultiply(MatrixA,Vector_b,HLS_Result_c);
CheckResult(MatlabResult_c,HLS_Result_c);
return 0;
}
void CheckResult(data_type * matlab_result,data_type * your_result)
{
int i;
for(i = 0;i<N;i++)
{
printf("Idx %d: Error = %d \r\n",i,matlab_result[i]-your_result[i]);
}
}

a.h

{82},  {10},  {16},  {15},  {66},
{91}, {28}, {98}, {43}, {4},
{13}, {55}, {96}, {92}, {85},
{92}, {96}, {49}, {80}, {94},
{64}, {97}, {81}, {96}, {68}

b.h

{76},
{75},
{40},
{66},
{18}

c.h

{9800},
{15846},
{16555},
{23124},
{22939}

ip核顶层:

	module test_multiply_v1_0 #
(
// Users to add parameters here // User parameters ends
// Do not modify the parameters beyond this line // Parameters of Axi Slave Bus Interface S00_AXIS
parameter integer C_S00_AXIS_TDATA_WIDTH = 32, // Parameters of Axi Master Bus Interface M00_AXIS
parameter integer C_M00_AXIS_TDATA_WIDTH = 32,
parameter integer C_M00_AXIS_START_COUNT = 32
)
(
// Users to add ports here // User ports ends
// Do not modify the ports beyond this line // Ports of Axi Slave Bus Interface S00_AXIS
input wire s00_axis_aclk,
input wire s00_axis_aresetn,
output wire s00_axis_tready,
input wire [C_S00_AXIS_TDATA_WIDTH-1 : 0] s00_axis_tdata,
input wire [(C_S00_AXIS_TDATA_WIDTH/8)-1 : 0] s00_axis_tstrb,
input wire s00_axis_tlast,
input wire s00_axis_tvalid, // Ports of Axi Master Bus Interface M00_AXIS
input wire m00_axis_aclk,
input wire m00_axis_aresetn,
output wire m00_axis_tvalid,
output wire [C_M00_AXIS_TDATA_WIDTH-1 : 0] m00_axis_tdata,
output wire [(C_M00_AXIS_TDATA_WIDTH/8)-1 : 0] m00_axis_tstrb,
output wire m00_axis_tlast,
input wire m00_axis_tready
);
// Instantiation of Axi Bus Interface S00_AXIS // Add user logic here
my_stream_ip my_stream_ip_v1_0_S01_AXIS_inst (
.ACLK(s00_axis_aclk),
.ARESETN(s00_axis_aresetn),
.S_AXIS_TREADY(s00_axis_tready),
.S_AXIS_TDATA(s00_axis_tdata),
.S_AXIS_TLAST(s00_axis_tlast),
.S_AXIS_TVALID(s00_axis_tvalid),
.M_AXIS_TVALID(m00_axis_tvalid),
.M_AXIS_TDATA(m00_axis_tdata),
.M_AXIS_TLAST(m00_axis_tlast),
.M_AXIS_TREADY(m00_axis_tready)
);
// User logic ends endmodule

 

ip核:(未完成)

`timescale 1ns / 1ps
module my_stream_ip
(
ACLK,
ARESETN,
S_AXIS_TREADY,
S_AXIS_TDATA,
S_AXIS_TLAST,
S_AXIS_TVALID,
M_AXIS_TVALID,
M_AXIS_TDATA,
M_AXIS_TLAST,
M_AXIS_TREADY, ); input ACLK;
input ARESETN;
output S_AXIS_TREADY;
input [31 :0] S_AXIS_TDATA;
input S_AXIS_TLAST;
input S_AXIS_TVALID;
output M_AXIS_TVALID;
output [31 :0] M_AXIS_TDATA;
output M_AXIS_TLAST;
input M_AXIS_TREADY; localparam NUMBER_OF_INPUT_WORDS = 30; localparam NUMBER_OF_OUTPUT_WORDS = 30; localparam Idle =3'b100;
localparam Read_Inputs = 3'b010;
localparam Write_Outputs = 3'b001;
localparam Wait_Calculate = 3'b000;//my add //send...
reg start2;
reg reset2;
//get...
wire done2;
wire idle2;
wire ready2;
//data
reg [31:0] AA [0:29];
reg [31:0] bb [0:4];
wire [31:0] cc [0:4];
wire cc_val [0:4];
reg [31:0] AA_index;
reg [31:0] bb_index;
reg [31:0] cc_index;
reg [2:0] state; reg [31:0] sum; reg [NUMBER_OF_INPUT_WORDS -1:0] nr_of_reads;
reg [NUMBER_OF_OUTPUT_WORDS - 1:0] nr_of_writes; assign S_AXIS_TREADY =(state == Read_Inputs);
assign M_AXIS_TVALID = (state == Write_Outputs); assign M_AXIS_TDATA = sum;
assign M_AXIS_TLAST = (nr_of_writes == 1); always @(posedge ACLK)
begin // process The_SW_accelerator
if(!ARESETN) // Synchronous reset (active low)
begin
state <= Idle;
nr_of_reads <= 0;
nr_of_writes <=0;
sum <= 0;
AA_index <= 0;
bb_index <= 0;
reset2 <= 1;
start2 <= 0;
end
else
case (state)
Idle:
if (S_AXIS_TVALID== 1)
begin
state <= Read_Inputs;
nr_of_reads <= NUMBER_OF_INPUT_WORDS - 1;
sum <= 0;
end Read_Inputs:
if(S_AXIS_TVALID == 1)
begin if(nr_of_reads >= 5)
begin
AA[AA_index] <= S_AXIS_TDATA;
AA_index <= AA_index + 1;
end
else
begin
bb[bb_index] <= S_AXIS_TDATA;
bb_index <= bb_index + 1;
end
if (nr_of_reads == 0)
begin
state <= Write_Outputs;
reset2 <= 0;
start2 <= 1;
nr_of_writes <= NUMBER_OF_OUTPUT_WORDS - 1;
end
else
nr_of_reads <= nr_of_reads - 1;
end Wait_Calculate:
if(done2 == 0)
begin
sum <= cc[0];
state <= Write_Outputs;
end Write_Outputs:
if(M_AXIS_TREADY == 1)
begin
if (nr_of_writes == 0)
state <= Idle;
else
sum <= done2;
nr_of_writes <= nr_of_writes - 1;
end
endcase
end
MatrixMultiply U1 (
.ap_clk(S_AXI_ACLK),
.ap_rst(reset2),
.ap_start(start2),
.ap_done(done2),
.ap_idle(idle2),
.ap_ready(ready2),
.AA_0(AA[0]),
.AA_1(AA[1]),
.AA_2(AA[2]),
.AA_3(AA[3]),
.AA_4(AA[4]),
.AA_5(AA[5]),
.AA_6(AA[6]),
.AA_7(AA[7]),
.AA_8(AA[8]),
.AA_9(AA[9]),
.AA_10(AA[10]),
.AA_11(AA[11]),
.AA_12(AA[12]),
.AA_13(AA[13]),
.AA_14(AA[14]),
.AA_15(AA[15]),
.AA_16(AA[16]),
.AA_17(AA[17]),
.AA_18(AA[18]),
.AA_19(AA[19]),
.AA_20(AA[20]),
.AA_21(AA[21]),
.AA_22(AA[22]),
.AA_23(AA[23]),
.AA_24(AA[24]),
.bb_0(bb[0]),
.bb_1(bb[1]),
.bb_2(bb[2]),
.bb_3(bb[3]),
.bb_4(bb[4]),
.cc_0(cc[0]),
.cc_0_ap_vld(cc_val[0]),
.cc_1(cc[1]),
.cc_1_ap_vld(cc_val[1]),
.cc_2(cc[2]),
.cc_2_ap_vld(cc_val[2]),
.cc_3(cc[3]),
.cc_3_ap_vld(cc_val[3]),
.cc_4(cc[4]),
.cc_4_ap_vld(cc_val[4])
);
endmodule

ip核引用的为HLS从c语言生成的verylog代码。

Learn ZYNQ (7)的更多相关文章

  1. Learn ZYNQ(10) – zybo cluster word count

    1.配置环境说明 spark:5台zybo板,192.168.1.1master,其它4台为slave hadoop:192.168.1.1(外接SanDisk ) 2.单节点hadoop测试: 如果 ...

  2. Learn ZYNQ (9)

    创建zybo cluster的spark集群(计算层面): 1.每个节点都是同样的filesystem,mac地址冲突,故: vi ./etc/profile export PATH=/usr/loc ...

  3. Learn ZYNQ (8)

    在zed的PS端运行spark(已成功): (1)设置uboot为sd卡启动rootfs: "sdboot=if mmcinfo; then " \                 ...

  4. Learn ZYNQ (3)

    移植android3.3到ZedBoard follow doc:Android移植Guide1.3.pdf follow website: http://elinux.org/Zedboard_An ...

  5. Learn ZYNQ Programming(1)

    GPIO LED AND KEY: part1:gpio leds and gpio btns combination. (include 1~4) part2:use gpio btns inter ...

  6. 大于16MB的QSPI存放程序引起的ZYNQ重启风险

    ZYNQ芯片是近两年比较流行的片子,双ARM+FPGA,在使用分立FPGA和CPU的场合很容易替代原来的分立器件. ZYNQ可以外接QSPI FLASH作为程序的存储介质. QSPI和SPI flas ...

  7. Atitit learn by need 需要的时候学与预先学习知识图谱路线图

    Atitit learn by need 需要的时候学与预先学习知识图谱路线图 1. 体系化是什么 架构 知识图谱路线图思维导图的重要性11.1. 体系就是架构21.2. 只见树木不见森林21.3. ...

  8. Python 爬取所有51VOA网站的Learn a words文本及mp3音频

    Python 爬取所有51VOA网站的Learn a words文本及mp3音频 #!/usr/bin/env python # -*- coding: utf-8 -*- #Python 爬取所有5 ...

  9. zynq学习01 新建一个Helloworld工程

    1,好早买了块FPGA板,zynq 7010 .终极目标是完成相机图像采集及处理.一个Window C++程序猿才开始学FPGA,一个小菜鸟,准备转行. 2,关于这块板,卖家的官方资料学起来没劲.推荐 ...

随机推荐

  1. 微信支付 发布后显示 System:access_denied

    微信支付发布后显示 System:access_denied (android)或 System:not_allow(IOS) 我们项目用的是.NET MVC3 授权目录是:http://mynetd ...

  2. 数组去重及数组的prototype原型

    Array.prototype.check= function(){ for(var i=0;i<this.length;i++){ for(var j=i+1;j<this.length ...

  3. python运算符

    aaarticlea/png;base64,iVBORw0KGgoAAAANSUhEUgAAAcIAAAHCCAIAAADzel4SAAAgAElEQVR4Aey9+bMcSXLnV1dmna/ejR

  4. C语言 链表排序

    #include <stdio.h> #include <stdlib.h> #include <assert.h> typedef struct node{ in ...

  5. 35.两链表的第一个公共结点[Find the first common node of two linked list]

    [题目] 两个单向链表,找出它们的第一个公共结点. 链表的结点定义为:  C++ Code  123456   struct ListNode {     int         m_nKey;    ...

  6. [Linux]Linux系统调用列表

    本文列出了大部分常见的Linux系统调用,并附有简要中文说明. 以下是Linux系统调用的一个列表,包含了大部分常用系统调用和由系统调用派生出的的函数.这可能是你在互联网上所能看到的唯一一篇中文注释的 ...

  7. SQL优化

    1.对查询进行优化,应尽量避免全表扫描,首先应考虑在 where 及 order by 涉及的列上建立索引. 2.应尽量避免在 where 子句中对字段进行 null 值判断,否则将导致引擎放弃使用索 ...

  8. UIPickerView选择器的使用方法

    UIPickerView是选择列表内容的控件 使用方法与UITableView类似 都需要用array传入数据 用Delegate DataSource中的代理方法实现各种显示功能 @interfac ...

  9. 开发ios的语言

    iOS发展这么多年了,很多第三方语言都向开发一种自己的iOS,于是多种跨平台诞生了! Object-c.swift: 当然是开发iOS的首先,毕竟是苹果自己的东西,也是最流行.最适合开发ios的,无论 ...

  10. CKEditor 配置

    在asp.net 中注意需要特殊处理控件的ID 先引入js <script type="text/javascript" src="ckeditor/ckedito ...