矩阵相乘的例子

参考博客:http://blog.csdn.net/kkk584520/article/details/18812321

MatrixMultiply.c

    typedef int data_type;
#define N 5 void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
{
int i,j;
for(i = 0;i<N;i++)
{
data_type sum = 0;
for(j = 0;j<N;j++)
{
sum += AA[i*N+j]*bb[j];
}
cc[i] = sum;
}
}

修改后:

#include <ap_cint.h>
typedef uint15 data_type;
#define N 5 void MatrixMultiply(data_type AA[N*N],data_type bb[N],data_type cc[N])
{
int i,j;
MatrixMultiply_label2:for(i = 0;i<N;i++)
{
data_type sum = 0;
MatrixMultiply_label1:for(j = 0;j<N;j++)
{
sum += AA[i*N+j]*bb[j];
}
cc[i] = sum;
}
}

测试文件:TestMatrixMultiply.c:

#include <stdio.h>
#include <ap_cint.h>
typedef uint15 data_type;
#define N 5
const data_type MatrixA[] = {
#include "a.h"
};
const data_type Vector_b[] = {
#include "b.h"
};
const data_type MatlabResult_c[] = {
#include "c.h"
};
data_type HLS_Result_c[N] = {0};
void CheckResult(data_type * matlab_result,data_type * your_result); int main(void)
{
int i;
printf("Checking Results:\r\n");
MatrixMultiply(MatrixA,Vector_b,HLS_Result_c);
CheckResult(MatlabResult_c,HLS_Result_c);
return 0;
}
void CheckResult(data_type * matlab_result,data_type * your_result)
{
int i;
for(i = 0;i<N;i++)
{
printf("Idx %d: Error = %d \r\n",i,matlab_result[i]-your_result[i]);
}
}

a.h

{82},  {10},  {16},  {15},  {66},
{91}, {28}, {98}, {43}, {4},
{13}, {55}, {96}, {92}, {85},
{92}, {96}, {49}, {80}, {94},
{64}, {97}, {81}, {96}, {68}

b.h

{76},
{75},
{40},
{66},
{18}

c.h

{9800},
{15846},
{16555},
{23124},
{22939}

ip核顶层:

	module test_multiply_v1_0 #
(
// Users to add parameters here // User parameters ends
// Do not modify the parameters beyond this line // Parameters of Axi Slave Bus Interface S00_AXIS
parameter integer C_S00_AXIS_TDATA_WIDTH = 32, // Parameters of Axi Master Bus Interface M00_AXIS
parameter integer C_M00_AXIS_TDATA_WIDTH = 32,
parameter integer C_M00_AXIS_START_COUNT = 32
)
(
// Users to add ports here // User ports ends
// Do not modify the ports beyond this line // Ports of Axi Slave Bus Interface S00_AXIS
input wire s00_axis_aclk,
input wire s00_axis_aresetn,
output wire s00_axis_tready,
input wire [C_S00_AXIS_TDATA_WIDTH-1 : 0] s00_axis_tdata,
input wire [(C_S00_AXIS_TDATA_WIDTH/8)-1 : 0] s00_axis_tstrb,
input wire s00_axis_tlast,
input wire s00_axis_tvalid, // Ports of Axi Master Bus Interface M00_AXIS
input wire m00_axis_aclk,
input wire m00_axis_aresetn,
output wire m00_axis_tvalid,
output wire [C_M00_AXIS_TDATA_WIDTH-1 : 0] m00_axis_tdata,
output wire [(C_M00_AXIS_TDATA_WIDTH/8)-1 : 0] m00_axis_tstrb,
output wire m00_axis_tlast,
input wire m00_axis_tready
);
// Instantiation of Axi Bus Interface S00_AXIS // Add user logic here
my_stream_ip my_stream_ip_v1_0_S01_AXIS_inst (
.ACLK(s00_axis_aclk),
.ARESETN(s00_axis_aresetn),
.S_AXIS_TREADY(s00_axis_tready),
.S_AXIS_TDATA(s00_axis_tdata),
.S_AXIS_TLAST(s00_axis_tlast),
.S_AXIS_TVALID(s00_axis_tvalid),
.M_AXIS_TVALID(m00_axis_tvalid),
.M_AXIS_TDATA(m00_axis_tdata),
.M_AXIS_TLAST(m00_axis_tlast),
.M_AXIS_TREADY(m00_axis_tready)
);
// User logic ends endmodule

 

ip核:(未完成)

`timescale 1ns / 1ps
module my_stream_ip
(
ACLK,
ARESETN,
S_AXIS_TREADY,
S_AXIS_TDATA,
S_AXIS_TLAST,
S_AXIS_TVALID,
M_AXIS_TVALID,
M_AXIS_TDATA,
M_AXIS_TLAST,
M_AXIS_TREADY, ); input ACLK;
input ARESETN;
output S_AXIS_TREADY;
input [31 :0] S_AXIS_TDATA;
input S_AXIS_TLAST;
input S_AXIS_TVALID;
output M_AXIS_TVALID;
output [31 :0] M_AXIS_TDATA;
output M_AXIS_TLAST;
input M_AXIS_TREADY; localparam NUMBER_OF_INPUT_WORDS = 30; localparam NUMBER_OF_OUTPUT_WORDS = 30; localparam Idle =3'b100;
localparam Read_Inputs = 3'b010;
localparam Write_Outputs = 3'b001;
localparam Wait_Calculate = 3'b000;//my add //send...
reg start2;
reg reset2;
//get...
wire done2;
wire idle2;
wire ready2;
//data
reg [31:0] AA [0:29];
reg [31:0] bb [0:4];
wire [31:0] cc [0:4];
wire cc_val [0:4];
reg [31:0] AA_index;
reg [31:0] bb_index;
reg [31:0] cc_index;
reg [2:0] state; reg [31:0] sum; reg [NUMBER_OF_INPUT_WORDS -1:0] nr_of_reads;
reg [NUMBER_OF_OUTPUT_WORDS - 1:0] nr_of_writes; assign S_AXIS_TREADY =(state == Read_Inputs);
assign M_AXIS_TVALID = (state == Write_Outputs); assign M_AXIS_TDATA = sum;
assign M_AXIS_TLAST = (nr_of_writes == 1); always @(posedge ACLK)
begin // process The_SW_accelerator
if(!ARESETN) // Synchronous reset (active low)
begin
state <= Idle;
nr_of_reads <= 0;
nr_of_writes <=0;
sum <= 0;
AA_index <= 0;
bb_index <= 0;
reset2 <= 1;
start2 <= 0;
end
else
case (state)
Idle:
if (S_AXIS_TVALID== 1)
begin
state <= Read_Inputs;
nr_of_reads <= NUMBER_OF_INPUT_WORDS - 1;
sum <= 0;
end Read_Inputs:
if(S_AXIS_TVALID == 1)
begin if(nr_of_reads >= 5)
begin
AA[AA_index] <= S_AXIS_TDATA;
AA_index <= AA_index + 1;
end
else
begin
bb[bb_index] <= S_AXIS_TDATA;
bb_index <= bb_index + 1;
end
if (nr_of_reads == 0)
begin
state <= Write_Outputs;
reset2 <= 0;
start2 <= 1;
nr_of_writes <= NUMBER_OF_OUTPUT_WORDS - 1;
end
else
nr_of_reads <= nr_of_reads - 1;
end Wait_Calculate:
if(done2 == 0)
begin
sum <= cc[0];
state <= Write_Outputs;
end Write_Outputs:
if(M_AXIS_TREADY == 1)
begin
if (nr_of_writes == 0)
state <= Idle;
else
sum <= done2;
nr_of_writes <= nr_of_writes - 1;
end
endcase
end
MatrixMultiply U1 (
.ap_clk(S_AXI_ACLK),
.ap_rst(reset2),
.ap_start(start2),
.ap_done(done2),
.ap_idle(idle2),
.ap_ready(ready2),
.AA_0(AA[0]),
.AA_1(AA[1]),
.AA_2(AA[2]),
.AA_3(AA[3]),
.AA_4(AA[4]),
.AA_5(AA[5]),
.AA_6(AA[6]),
.AA_7(AA[7]),
.AA_8(AA[8]),
.AA_9(AA[9]),
.AA_10(AA[10]),
.AA_11(AA[11]),
.AA_12(AA[12]),
.AA_13(AA[13]),
.AA_14(AA[14]),
.AA_15(AA[15]),
.AA_16(AA[16]),
.AA_17(AA[17]),
.AA_18(AA[18]),
.AA_19(AA[19]),
.AA_20(AA[20]),
.AA_21(AA[21]),
.AA_22(AA[22]),
.AA_23(AA[23]),
.AA_24(AA[24]),
.bb_0(bb[0]),
.bb_1(bb[1]),
.bb_2(bb[2]),
.bb_3(bb[3]),
.bb_4(bb[4]),
.cc_0(cc[0]),
.cc_0_ap_vld(cc_val[0]),
.cc_1(cc[1]),
.cc_1_ap_vld(cc_val[1]),
.cc_2(cc[2]),
.cc_2_ap_vld(cc_val[2]),
.cc_3(cc[3]),
.cc_3_ap_vld(cc_val[3]),
.cc_4(cc[4]),
.cc_4_ap_vld(cc_val[4])
);
endmodule

ip核引用的为HLS从c语言生成的verylog代码。

Learn ZYNQ (7)的更多相关文章

  1. Learn ZYNQ(10) – zybo cluster word count

    1.配置环境说明 spark:5台zybo板,192.168.1.1master,其它4台为slave hadoop:192.168.1.1(外接SanDisk ) 2.单节点hadoop测试: 如果 ...

  2. Learn ZYNQ (9)

    创建zybo cluster的spark集群(计算层面): 1.每个节点都是同样的filesystem,mac地址冲突,故: vi ./etc/profile export PATH=/usr/loc ...

  3. Learn ZYNQ (8)

    在zed的PS端运行spark(已成功): (1)设置uboot为sd卡启动rootfs: "sdboot=if mmcinfo; then " \                 ...

  4. Learn ZYNQ (3)

    移植android3.3到ZedBoard follow doc:Android移植Guide1.3.pdf follow website: http://elinux.org/Zedboard_An ...

  5. Learn ZYNQ Programming(1)

    GPIO LED AND KEY: part1:gpio leds and gpio btns combination. (include 1~4) part2:use gpio btns inter ...

  6. 大于16MB的QSPI存放程序引起的ZYNQ重启风险

    ZYNQ芯片是近两年比较流行的片子,双ARM+FPGA,在使用分立FPGA和CPU的场合很容易替代原来的分立器件. ZYNQ可以外接QSPI FLASH作为程序的存储介质. QSPI和SPI flas ...

  7. Atitit learn by need 需要的时候学与预先学习知识图谱路线图

    Atitit learn by need 需要的时候学与预先学习知识图谱路线图 1. 体系化是什么 架构 知识图谱路线图思维导图的重要性11.1. 体系就是架构21.2. 只见树木不见森林21.3. ...

  8. Python 爬取所有51VOA网站的Learn a words文本及mp3音频

    Python 爬取所有51VOA网站的Learn a words文本及mp3音频 #!/usr/bin/env python # -*- coding: utf-8 -*- #Python 爬取所有5 ...

  9. zynq学习01 新建一个Helloworld工程

    1,好早买了块FPGA板,zynq 7010 .终极目标是完成相机图像采集及处理.一个Window C++程序猿才开始学FPGA,一个小菜鸟,准备转行. 2,关于这块板,卖家的官方资料学起来没劲.推荐 ...

随机推荐

  1. poj 1733

    这题离散化+并查集,没看出关dp什么事.(那他为什么放到dp里面) 用Si记录前i项的和.拆成两个点,i*2表示与第i个相同,i*2+1表示与第i个不同.用并查集判断.区间[a,b]就可以看成Sb-S ...

  2. Android刷机教程

    我的机器是Nexus 5 一. 安装驱动 如何进入fastboot模式 1. 拔掉数据线,将手机关机 2. 关机后同时按住[音量减小键]和[开关机键]即可进入Fastboot模式 开启usb调试  - ...

  3. JavaScript——之对象参数的引用传递

    今天碰到一个问题,怎样把参数变更影响到函数外部,如: <script> var myname = "wood"; A(myname); document.write(m ...

  4. ACM/ICPC 之 网络流-拆点构图(POJ2391)

    需要直接到达,因此源点经过三条边后必须要达到汇点,但为了保证网络流的正确性(路径可反悔),因此不可限制层次网络的最高层次为3,最好的方法既是让所有点拆分成两个点,一个点从汇点进入,一个点通向汇点,任意 ...

  5. python logging模块笔记

    1 ) 给logger定制了两个日志级别INFO和DEBUG,分别通过filehandler添加不同输出到不同文件,但如何让DEBUG里只有DEBUG的信息? 答案:可重写DEBUG对应的Fileha ...

  6. SQL表新增触发(触发器)

    ALTER TRIGGER [InsertStoreJITOnloadQuantity] ON [dbo].[Sourceing] After INSERT AS --登記計劃數量(新增時YN=0) ...

  7. python习题 (1):login

    #!/uer/bin/env python # _*_ coding: utf-8 _*_ import sys retry_limit = 3 retry_count = 0 account_fil ...

  8. Android平台下OpenCV移植与使用---基于C/C++

    在<Android Studio增加NDK代码编译支持--Mac环境>和<Mac平台下Opencv开发环境搭建>两篇文章中,介绍了如何使用NDK环境和Opencv环境搭建与测试 ...

  9. 分页显示中关于"序号"的问题

    项目开发中要求列表显示要明显看到总条目数,所以就要求序号从1开始. 如下为从1开始的序号展示: <s:iterator value="#request.pageView.records ...

  10. python:列表与元组

    1.python包含六种内建的序列,列表和元组是其中的两种,列表可以修改,元组则不能 2.通用序列操作 2.1 索引:和C#的区别是索引可以为负数,最后一个元素索引为-1,索引超出范围会报错 例:&g ...