// sums_ex6.c 2019/09/14 by marsee
#include <stdio.h>
#include <stdint.h>
#include "xaxis2dma.h"
#include "xdma2axis8.h"
#include "xs_squares_axis.h"
volatile uint8_t data0[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
volatile uint8_t data1[10] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
volatile uint32_t result[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
int main(){
XDma2axis8 xdma2axis8_ap0, xdma2axis8_ap1;
XAxis2dma xaxis2dma_ap;
XS_squares_axis XS_squares_axis_ap;
XDma2axis8_Initialize(&xdma2axis8_ap0, 0);
XDma2axis8_Initialize(&xdma2axis8_ap1, 1);
XAxis2dma_Initialize(&xaxis2dma_ap, 0);
XS_squares_axis_Initialize(&XS_squares_axis_ap, 0);
XDma2axis8_Set_in_V(&xdma2axis8_ap0, (u32)data0);
XDma2axis8_Set_in_V(&xdma2axis8_ap1, (u32)data1);
XAxis2dma_Set_out_V(&xaxis2dma_ap, (u32)result);
XAxis2dma_Start(&xaxis2dma_ap);
XS_squares_axis_Start(&XS_squares_axis_ap);
XDma2axis8_Start(&xdma2axis8_ap0);
XDma2axis8_Start(&xdma2axis8_ap1);
while(!XAxis2dma_IsDone(&xaxis2dma_ap));
for(int i=0; i<10; i++){
printf("data0[%d] = %d, data1[%d] = %d, result[%d] = %d\n", i, (int)data0[i], i, (int)data1[i], i, (int)result[i]);
}
return(0);
}
// aixs2xf8uc3.cpp
// 2021/04/23 by marsee
//
#include <stdint.h>
#include "ap_int.h"
#include "hls_stream.h"
#include "ap_axi_sdata.h"
int axis2xf8uc3(hls::stream<ap_axis<32,1,1,1> >& ins, int rows, int cols, volatile ap_uint<32>* _dst){
#pragma HLS INTERFACE m_axi depth=360000 port=_dst offset=slave
#pragma HLS INTERFACE s_axilite port=cols
#pragma HLS INTERFACE s_axilite port=rows
#pragma HLS INTERFACE axis register_mode=both register port=ins
#pragma HLS INTERFACE s_axilite port=return
ap_axis<32,1,1,1> pix;
uint32_t pixay[4];
LOOP_DY : do { // user が 1になった時にフレームがスタートする
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
ins >> pix;
} while(pix.user == 0);
const int xy_limit = rows * cols;
int xfcnt = 0;
LOOP_XY: for(int xy=0; xy<xy_limit; xy++){
#pragma HLS LOOP_TRIPCOUNT avg=480000 max=480000 min=480000
#pragma HLS PIPELINE II=1
if(!(xy == 0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
switch(xy%4){
case 0 :
pixay[0] = pix.data;
break;
case 1 :
pixay[1] = pix.data;
_dst[xfcnt++] = ((pixay[1]&0xff)<<24)+(pixay[0]&0xffffff);
break;
case 2 :
pixay[2] = pix.data;
_dst[xfcnt++] = ((pixay[2]&0xffff)<<16)+((pixay[1]&0xffff00)>>8);
break;
default : // 3
pixay[3] = pix.data;
_dst[xfcnt++] = ((pixay[3]&0xffffff)<<8)+((pixay[2]&0xff0000)>>16);
break;
}
}
return(0);
}
// axis2xf8uc8_tb.cpp
// 2021/04/23 by marsee
//
#include "ap_int.h"
#include "hls_stream.h"
#include "ap_axi_sdata.h"
#include "opencv2/opencv.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgcodecs/imgcodecs.hpp"
int axis2xf8uc3(hls::stream<ap_axis<32,1,1,1> >& ins, int rows, int cols, volatile ap_uint<32>* _dst);
int main(int argc, char **argv){
hls::stream<ap_axis<32,1,1,1> > ins;
hls::stream<ap_axis<32,1,1,1> > ins_soft;
ap_axis<32,1,1,1> pix;
if (argc != 2) {
fprintf(stderr, "Usage: %s <INPUT IMAGE>", argv[0]);
exit(1);
}
cv::Mat img = cv::imread(argv[1], 1); // reading in the color image
if (img.data == NULL) {
fprintf(stderr, "ERROR: Cannot open image %s\n ", argv[1]);
exit(1);
}
// ピクセルを入れる領域の確保
std::vector<int32_t> rd_bmp(sizeof(int32_t)*img.cols*img.rows);
// rd_bmp にBMPのピクセルを代入
cv::Mat_<cv::Vec3b> dst_vec3b = cv::Mat_<cv::Vec3b>(img);
for (int y=0; y<img.rows; y++){
for (int x=0; x<img.cols; x++){
cv::Vec3b pixel;
pixel = dst_vec3b(y,x);
rd_bmp[y*img.cols+x] = (pixel[0] & 0xff) | ((pixel[1] & 0xff)<<8) | ((pixel[2] & 0xff)<<16);
// blue - pixel[0]; green - pixel[1]; red - pixel[2];
}
}
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data = i;
ins << pix;
}
for(int j=0; j < img.rows; j++){
for(int i=0; i < img.cols; i++){
pix.data = (ap_int<32>)rd_bmp[(j*img.cols)+i];
if (j==0 && i==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (i == img.cols-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
}
}
cv::Mat out_img;
out_img.create(img.rows, img.cols, CV_8UC3);
axis2xf8uc3(ins, img.rows, img.cols, (volatile ap_uint<32> *)out_img.data);
cv::imwrite("output.jpg", out_img);
return(0);
}
-I/usr/local/include
を設定した。-L/usr/local/lib -lopencv_core -lopencv_imgcodecs -lopencv_imgproc
を設定した。test2.jpg
// acc_sensor.c
// 2021/04/19 by marsee
// コード中のコメント部分は”OpenCores.org の I2C controller core ”のマニュアルから引用した
// https://opencores.org/projects/i2c
#include <stdio.h>
#include <stdint.h>
#include "xil_io.h"
#include "xparameters.h"
#include <unistd.h>
#include "xtime_l.h"
#define I2CM_PRER_LO XPAR_I2CM_AXI4LS_0_BASEADDR
#define I2CM_PRER_HI (XPAR_I2CM_AXI4LS_0_BASEADDR+0x4)
#define I2CM_CTR (XPAR_I2CM_AXI4LS_0_BASEADDR+0x8)
#define I2CM_RXR (XPAR_I2CM_AXI4LS_0_BASEADDR+0xc)
#define I2CM_TXR (XPAR_I2CM_AXI4LS_0_BASEADDR+0xc)
#define I2CM_CR (XPAR_I2CM_AXI4LS_0_BASEADDR+0x10)
#define I2CM_SR (XPAR_I2CM_AXI4LS_0_BASEADDR+0x10)
#define CR_STA 0x80 // generate (repeated) start condition
#define CR_STO 0x40 // generate stop condition
#define CR_RD 0x20 // read from slave
#define CR_WR 0x10 // write from slave
#define CR_NACK 0x08 // when a receiver, sent ACK (ACK = ‘0’) or NACK (ACK = ‘1’)
#define CR_IACK 0x01 // Interrupt acknowledge. When set, clears a pending interrupt.
#define SR_RxACK 0x80 // Received acknowledge from slave.This flag represents acknowledge from the addressed slave.
// ‘1’ = No acknowledge received , ‘0’ = Acknowledge received
#define SR_Busy 0x40 // I2C bus busy
// ‘1’ f after START signal detected , ‘0’ after STOP signal detected
#define SR_AL 0x20 // Arbitration lost. This bit is set when the core lost arbitration. Arbitration is lost when:
// * a STOP signal is detected, but non requested , * The master drives SDA high, but SDA is low.
#define SR_TIP 0x02 // Transfer in progress.
// ‘1’ f when transferring data , ‘0’f when transfer complete
#define SR_IF 0x01 // Interrupt Flag. This bit is set when an interrupt is pending, which
// will cause a processor interrupt request if the IEN bit is set.
// The Interrupt Flag is set when:
// * one byte transfer has been completed , * arbitration is lost
void idle_check(){
while(Xil_In32(I2CM_SR) & SR_TIP); // TIP bit is clear
}
void acc_sensor_write(uint8_t dev_addr, uint8_t waddr, uint8_t wdata){
dev_addr &= 0xfe;
idle_check();
Xil_Out32(I2CM_TXR, (u32)dev_addr);
Xil_Out32(I2CM_CR, (u32)(CR_STA|CR_WR));
idle_check();
Xil_Out32(I2CM_TXR, (u32)waddr);
Xil_Out32(I2CM_CR, (u32)CR_WR);
idle_check();
Xil_Out32(I2CM_TXR, (u32)wdata);
Xil_Out32(I2CM_CR, (u32)(CR_STO|CR_WR));
idle_check();
}
uint8_t acc_sensor_read1(uint8_t dev_addr, uint8_t raddr){
const uint8_t devw_addr = dev_addr & 0xfe;
dev_addr |= 0x01;
idle_check();
Xil_Out32(I2CM_TXR, (u32)devw_addr);
Xil_Out32(I2CM_CR, (u32)(CR_STA|CR_WR));
idle_check();
Xil_Out32(I2CM_TXR, (u32)raddr);
Xil_Out32(I2CM_CR, (u32)CR_WR);
idle_check();
Xil_Out32(I2CM_TXR, (u32)dev_addr);
Xil_Out32(I2CM_CR, (u32)(CR_STA|CR_WR));
idle_check();
Xil_Out32(I2CM_CR, (u32)(CR_STO|CR_RD|CR_NACK));
idle_check();
uint8_t rdata8 = (uint8_t)(Xil_In32(I2CM_RXR) & 0xff);
return(rdata8);
}
void acc_sensor_read3(uint8_t dev_addr, uint8_t raddr, int32_t *rdata){
uint8_t rdata8a[3];
const uint8_t devw_addr = dev_addr & 0xfe;
dev_addr |= 0x01;
idle_check();
Xil_Out32(I2CM_TXR, (u32)devw_addr);
Xil_Out32(I2CM_CR, (u32)(CR_STA|CR_WR));
idle_check();
Xil_Out32(I2CM_TXR, (u32)raddr);
Xil_Out32(I2CM_CR, (u32)CR_WR);
idle_check();
Xil_Out32(I2CM_TXR, (u32)dev_addr);
Xil_Out32(I2CM_CR, (u32)(CR_STA|CR_WR));
idle_check();
Xil_Out32(I2CM_CR, (u32)CR_RD);
idle_check();
rdata8a[0] = (uint8_t)(Xil_In32(I2CM_RXR) & 0xff);
Xil_Out32(I2CM_CR, (u32)CR_RD);
idle_check();
rdata8a[1] = (uint8_t)(Xil_In32(I2CM_RXR) & 0xff);
Xil_Out32(I2CM_CR, (u32)(CR_STO|CR_RD|CR_NACK));
idle_check();
rdata8a[2] = (uint8_t)(Xil_In32(I2CM_RXR) & 0xff);
*rdata = (((int32_t)rdata8a[0])<<12) + (((int32_t)rdata8a[1])<<4) + (((int32_t)(rdata8a[2] & 0xf0))>>4);
if(*rdata & 0x80000) // Is the 19th bit 1?
*rdata |= 0xfff00000; // sign extension
}
int main(){
uint8_t read_data, read_rdy;
int32_t dataX, dataY, dataZ;
XTime cur_time;
// I2C I2C operating frequency setting 433KHz/100MHz, 415kHz/96MHz
Xil_Out32(I2CM_PRER_LO, (u32)0x29);
Xil_Out32(I2CM_PRER_HI, (u32)0x0);
Xil_Out32(I2CM_CTR, 0x80); // enable core
acc_sensor_write(0x3a, 0x2c, 0x83); // I2C speed is Hi speed, +-8g
acc_sensor_write(0x3a, 0x1e, 0x00); // OFFSET_X_H
acc_sensor_write(0x3a, 0x1f, 0x00); // OFFSET_X_L
acc_sensor_write(0x3a, 0x20, 0x00); // OFFSET_Y_H
acc_sensor_write(0x3a, 0x21, 0x00); // OFFSET_Y_L
acc_sensor_write(0x3a, 0x22, 0x00); // OFFSET_Z_H
acc_sensor_write(0x3a, 0x23, 0x00); // OFFSET_Z_L
acc_sensor_write(0x3a, 0x2d, 0x00); // stanby clear
while(1){
do{
read_data = acc_sensor_read1(0x3b, 0x04);
read_rdy = read_data & 0x01;
}while(read_rdy != 0x01);
acc_sensor_read3(0x3b, 0x08, &dataX);
acc_sensor_read3(0x3b, 0x0b, &dataY);
acc_sensor_read3(0x3b, 0x0e, &dataZ);
XTime_GetTime(&cur_time);
printf("%lf,%x,%x,%x\n", (double)((long long int)cur_time)/333333.3435, (int)dataX, (int)dataY, (int)dataZ);
usleep(2434); // for 400 kHz
}
}
set_property IOSTANDARD LVCMOS33 [get_ports iic_0_scl_io]
set_property IOSTANDARD LVCMOS33 [get_ports iic_0_sda_io]
set_property PACKAGE_PIN V8 [get_ports iic_0_scl_io]
set_property PACKAGE_PIN W8 [get_ports iic_0_sda_io]
// i2cm_axi4ls_tb.v
// 2021/04/13 by marsee
// コード中のコメント部分は”OpenCores.org の I2C controller core ”のマニュアルから引用した
// https://opencores.org/projects/i2c
`default_nettype none
`timescale 100ps / 1ps
module i2cm_axi4ls_tb;
parameter DELAY = 10;
parameter integer C_S_AXI_LITE_ADDR_WIDTH = 12; // Address width of the AXI Lite Interface
parameter integer C_S_AXI_LITE_DATA_WIDTH = 32; // Data width of the AXI Lite Interface
// Quoted from tst_bench_top.v of I2C controller core on OpenCores.org.
// https://opencores.org/projects/i2c
parameter PRER_LO = 12'h000;
parameter PRER_HI = 12'h004;
parameter CTR = 12'h008;
parameter RXR = 12'h00C;
parameter TXR = 12'h00C;
parameter CR = 12'h010;
parameter SR = 12'h010;
parameter TXR_R = 12'h014; // undocumented / reserved output
parameter CR_R = 12'h018; // undocumented / reserved output
parameter RD = 1'b1;
parameter WR = 1'b0;
parameter SADR = 7'b0010_000;
// Inputs
reg ACLK;
reg ARESETN;
wire [C_S_AXI_LITE_ADDR_WIDTH-1:0] S_AXI_AWADDR;
wire [2:0] S_AXI_AWPROT;
wire S_AXI_AWVALID;
wire [C_S_AXI_LITE_DATA_WIDTH-1:0] S_AXI_WDATA;
wire [C_S_AXI_LITE_DATA_WIDTH/8-1:0] S_AXI_WSTRB;
wire S_AXI_WVALID;
wire S_AXI_BREADY;
wire [C_S_AXI_LITE_ADDR_WIDTH-1:0] S_AXI_ARADDR;
wire [2:0] S_AXI_ARPROT;
wire S_AXI_ARVALID;
wire S_AXI_RREADY;
// Outputs
wire S_AXI_AWREADY;
wire S_AXI_WREADY;
wire [1:0] S_AXI_BRESP;
wire S_AXI_BVALID;
wire S_AXI_ARREADY;
wire [C_S_AXI_LITE_DATA_WIDTH-1:0] S_AXI_RDATA;
wire [1:0] S_AXI_RRESP;
wire S_AXI_RVALID;
wire scl_pad_i;
wire scl_pad_o;
wire scl_padoen_o;
wire sda_pad_i;
wire sda_pad_o;
wire sda_padoen_o;
// InOut
wire scl;
wire sda;
reg [31:0] rdata;
always #50 ACLK = ~ACLK;
// Instantiate the Unit Under Test (UUT)
i2cm_axi4ls #(
.C_S_AXI_LITE_ADDR_WIDTH(12),
.C_S_AXI_LITE_DATA_WIDTH(32)
) i2cm_axi4ls_i (
.s_axi_lite_aclk(ACLK),
.s_axi_lite_resetn(ARESETN),
.s_axi_lite_awaddr(S_AXI_AWADDR),
.s_axi_lite_awvalid(S_AXI_AWVALID),
.s_axi_lite_awready(S_AXI_AWREADY),
.s_axi_lite_wdata(S_AXI_WDATA),
.s_axi_lite_wstrb(S_AXI_WSTRB),
.s_axi_lite_wvalid(S_AXI_WVALID),
.s_axi_lite_wready(S_AXI_WREADY),
.s_axi_lite_bresp(S_AXI_BRESP),
.s_axi_lite_bvalid(S_AXI_BVALID),
.s_axi_lite_bready(S_AXI_BREADY),
.s_axi_lite_araddr(S_AXI_ARADDR),
.s_axi_lite_arvalid(S_AXI_ARVALID),
.s_axi_lite_arready(S_AXI_ARREADY),
.s_axi_lite_rdata(S_AXI_RDATA),
.s_axi_lite_rresp(S_AXI_RRESP),
.s_axi_lite_rvalid(S_AXI_RVALID),
.s_axi_lite_rready(S_AXI_RREADY),
.scl_i(scl_pad_i),
.scl_o(scl_pad_o),
.scl_t(scl_padoen_o),
.sda_i(sda_pad_i),
.sda_o(sda_pad_o),
.sda_t(sda_padoen_o)
);
// i2c slave model
i2c_slave_model #(SADR) i2c_slave (
.scl(scl),
.sda(sda)
);
pullup p1(scl); // pullup scl line
pullup p2(sda); // pullup sda line
// AXI4_Lite_Master_BFM
AXI4_Lite_Master_BFM #(
.DELAY(DELAY),
.C_S_AXI_LITE_ADDR_WIDTH(12),
.C_S_AXI_LITE_DATA_WIDTH(32)
) LMBFMi(
.ACLK(ACLK),
.S_AXI_AWADDR(S_AXI_AWADDR),
.S_AXI_AWPROT(S_AXI_AWPROT),
.S_AXI_AWVALID(S_AXI_AWVALID),
.S_AXI_AWREADY(S_AXI_AWREADY),
.S_AXI_WDATA(S_AXI_WDATA),
.S_AXI_WSTRB(S_AXI_WSTRB),
.S_AXI_WVALID(S_AXI_WVALID),
.S_AXI_WREADY(S_AXI_WREADY),
.S_AXI_BRESP(S_AXI_BRESP),
.S_AXI_BVALID(S_AXI_BVALID),
.S_AXI_BREADY(S_AXI_BREADY),
.S_AXI_ARADDR(S_AXI_ARADDR),
.S_AXI_ARPROT(S_AXI_ARPROT),
.S_AXI_ARVALID(S_AXI_ARVALID),
.S_AXI_ARREADY(S_AXI_ARREADY),
.S_AXI_RDATA(S_AXI_RDATA),
.S_AXI_RRESP(S_AXI_RRESP),
.S_AXI_RVALID(S_AXI_RVALID),
.S_AXI_RREADY(S_AXI_RREADY)
);
scl_sda_buf cd_buf_i(
.scl_pad_i(scl_pad_i),
.scl_pad_o(scl_pad_o),
.scl_padoen_o(scl_padoen_o),
.sda_pad_i(sda_pad_i),
.sda_pad_o(sda_pad_o),
.sda_padoen_o(sda_padoen_o),
.scl(scl),
.sda(sda)
);
// test
initial begin
// Initialize Inputs
ACLK = 0;
ARESETN = 0;
// Wait 100 ns for global reset to finish
#1000;
ARESETN = 1;
#1000;
// Add stimulus here
@(posedge ACLK); // 次のクロックへ
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(PRER_LO, 32'h29, 0, 2); // address is PRER_LO, data Write = 0x22
LMBFMi.AXI_LiteM_1Seq_Write(PRER_HI, 32'h0, 0, 2); // address is PRER_HI, data Write = 0x0
#1000; // Wait 100 ns
@(posedge ACLK); // 次のクロックへ
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(PRER_LO, 2, rdata); // PRER_LO, rmax_wait=0
LMBFMi.AXI_LiteM_1Seq_Read(PRER_HI, 2, rdata); // PRER_HI, rmax_wait=0
#1000; // Wait 100 ns
@(posedge ACLK); // 次のクロックへ
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(CTR, 8'h80, 0, 2); // enable core
LMBFMi.AXI_LiteM_1Seq_Write(TXR, {SADR,WR}, 0, 2 ); // present slave address, set write-bit
LMBFMi.AXI_LiteM_1Seq_Write( CR, 8'h90, 0, 2 ); // set command (start, write)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
// send register address
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(TXR, 8'h01, 0, 2); // present slave's memory address
LMBFMi.AXI_LiteM_1Seq_Write(CR, 8'h10, 0, 2); // set command (write)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
// send data byte
LMBFMi.AXI_LiteM_1Seq_Write(TXR, 8'ha5, 0, 2); // present slave's memory address
LMBFMi.AXI_LiteM_1Seq_Write(CR, 8'h10, 0, 2); // set command (write)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
// send data byte for next register address (auto_inc)
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(TXR, 8'h5a, 0, 2); // present data
LMBFMi.AXI_LiteM_1Seq_Write(CR, 8'h50, 0, 2); // set command (stop, write)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
// Read Transaction
#1000;
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(TXR, {SADR,WR}, 0, 2 ); // present slave address, set write-bit
LMBFMi.AXI_LiteM_1Seq_Write( CR, 8'h90, 0, 2 ); // set command (start, write)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
// send register address
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(TXR, 8'h01, 0, 2); // present slave's memory address
LMBFMi.AXI_LiteM_1Seq_Write(CR, 8'h10, 0, 2); // set command (write)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
// repeat start
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(TXR, {SADR,WR}, 0, 2 ); // present slave address, set write-bit
LMBFMi.AXI_LiteM_1Seq_Write( CR, 8'h90, 0, 2 ); // set command (start, write)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Write(CR, 8'h68, 0, 2 ); // set command (read, nack, stop)
// check tip bit
#DELAY;
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata);
while(S_AXI_RDATA[1])
LMBFMi.AXI_LiteM_1Seq_Read(SR, 2, rdata); // SR bit1 TIP
LMBFMi.AXI_LiteM_1Seq_Read(RXR, 2, rdata);
$display("\nRead data = %x at time %t", rdata, $time);
$finish;
end
endmodule
`default_nettype wire
// wb2axi4ls_conv.v
// Convert Wishbone Bus Version I2C Master IP to AXI4-Lite Interface
// 2021/04/12 by marsee
//
`default_nettype none
module wb2axi4ls_conv # (
parameter integer C_S_AXI_LITE_ADDR_WIDTH = 5, // Address width of the AXI Lite Interface
parameter integer C_S_AXI_LITE_DATA_WIDTH = 32, // Data width of the AXI Lite Interface
parameter integer WISHBONE_BUS_ADDR_WIDTH = 3,
parameter integer WISHBONE_BUS_DATA_WIDTH = 8
)(
input wire s_axi_lite_aclk,
input wire axi_resetn,
// AXI Lite Write Address Channel
input wire s_axi_lite_awvalid,
output wire s_axi_lite_awready,
input wire [C_S_AXI_LITE_ADDR_WIDTH-1: 0] s_axi_lite_awaddr,
// AXI Lite Write Data Channel
input wire s_axi_lite_wvalid,
output wire s_axi_lite_wready,
input wire [C_S_AXI_LITE_DATA_WIDTH-1: 0] s_axi_lite_wdata,
input wire [C_S_AXI_LITE_DATA_WIDTH/8-1:0] s_axi_lite_wstrb,
// AXI Lite Write Response Channel
output wire [1:0] s_axi_lite_bresp,
output wire s_axi_lite_bvalid,
input wire s_axi_lite_bready,
// AXI Lite Read Address Channel
input wire s_axi_lite_arvalid,
output wire s_axi_lite_arready,
input wire [C_S_AXI_LITE_ADDR_WIDTH-1: 0] s_axi_lite_araddr,
// AXI Lite Read Data Channel
output wire s_axi_lite_rvalid,
input wire s_axi_lite_rready,
output wire [C_S_AXI_LITE_DATA_WIDTH-1: 0] s_axi_lite_rdata,
output wire [1:0] s_axi_lite_rresp,
// WishBone Interface
output reg [WISHBONE_BUS_ADDR_WIDTH-1:0] wb_adr_i,
output wire [WISHBONE_BUS_DATA_WIDTH-1:0] wb_dat_i,
input wire [WISHBONE_BUS_DATA_WIDTH-1:0] wb_dat_o,
output wire wb_we_i,
output wire wb_stb_i,
output wire wb_cyc_i,
input wire wb_ack_o,
input wire wb_inta_o
);
// RESP の値の定義
localparam RESP_OKAY = 2'b00;
localparam RESP_EXOKAY = 2'b01;
localparam RESP_SLVERR = 2'b10;
localparam RESP_DECERR = 2'b11;
localparam IDLE_WR = 5'b00001, // for wrt_cs
DATA_WRITE_HOLD = 5'b00010,
WB_WAIT_ACK_W = 5'b00100,
BREADY_ASSERT = 5'b01000,
BREADY_ASSERTED = 5'b10000;
localparam IDLE_RD = 3'b001, // for rdt_cs
WB_WAIT_ACK_R = 3'b010,
AR_DATA_WAIT = 3'b100;
reg [4:0] wrt_cs = IDLE_WR;
reg [2:0] rdt_cs = IDLE_RD;
reg reset_1d = 1'b0;
reg reset = 1'b0;
reg awready = 1'b1;
reg bvalid = 1'b0;
reg arready = 1'b1;
reg wready = 1'b0;
reg rvalid = 1'b0;
wire aclk;
reg [C_S_AXI_LITE_DATA_WIDTH-1:0] rdata;
reg wb_cyc = 1'b0;
reg wb_we = 1'b0;
assign aclk = s_axi_lite_aclk;
// Synchronization of axi_resetn
always @(posedge aclk) begin
reset_1d <= ~axi_resetn;
reset <= reset_1d;
end
// wb_adr_i
always @(posedge aclk) begin
if (reset) begin
wb_adr_i = 3'b000;
end else if (s_axi_lite_awvalid & s_axi_lite_awready) begin
wb_adr_i = s_axi_lite_awaddr[4:2];
end else if (s_axi_lite_arvalid & s_axi_lite_arready) begin
wb_adr_i = s_axi_lite_araddr[4:2];
end
end
// AXI4 Lite Slave Write Transaction State Machine
always @(posedge aclk) begin
if (reset) begin
wrt_cs <= IDLE_WR;
awready <= 1'b1;
bvalid <= 1'b0;
wready <= 1'b0;
wb_we <= 1'b0;
end else begin
case (wrt_cs)
IDLE_WR :
if (s_axi_lite_awvalid & ~s_axi_lite_wvalid) begin // Write Transaction Start
wrt_cs <= DATA_WRITE_HOLD;
awready <= 1'b0;
end else if (s_axi_lite_awvalid & s_axi_lite_wvalid) begin // Write Transaction Start with data
wrt_cs <= WB_WAIT_ACK_W;
awready <= 1'b0;
if (s_axi_lite_wstrb[0]) begin
wb_we <= 1'b1;
end
end
DATA_WRITE_HOLD :
if (s_axi_lite_wvalid) begin // Write data just valid
wrt_cs <= WB_WAIT_ACK_W;
if (s_axi_lite_wstrb[0]) begin
wb_we <= 1'b1;
end
end
WB_WAIT_ACK_W :
if (wb_ack_o) begin
wrt_cs <= BREADY_ASSERT;
wready <= 1'b1;
wb_we <= 1'b0;
end
BREADY_ASSERT: begin
wrt_cs <= BREADY_ASSERTED;
wready <= 1'b0;
bvalid <= 1'b1;
end
BREADY_ASSERTED :
if (s_axi_lite_bready) begin // The write transaction was terminated.
wrt_cs <= IDLE_WR;
bvalid <= 1'b0;
awready <= 1'b1;
end
endcase
end
end
assign s_axi_lite_awready = awready;
assign s_axi_lite_bvalid = bvalid;
assign s_axi_lite_bresp = 2'b00;
assign s_axi_lite_wready = wready;
assign wb_we_i = wb_we;
assign wb_dat_i = s_axi_lite_wdata[WISHBONE_BUS_DATA_WIDTH-1:0];
// AXI4 Lite Slave Read Transaction State Machine
always @(posedge aclk) begin
if (reset) begin
rdt_cs <= IDLE_RD;
arready <= 1'b1;
rvalid <= 1'b0;
rdata <= 0;
end else begin
case (rdt_cs)
IDLE_RD :
if (s_axi_lite_arvalid) begin
rdt_cs <= WB_WAIT_ACK_R;
arready <= 1'b0;
end
WB_WAIT_ACK_R :
if(wb_ack_o) begin
rdata[WISHBONE_BUS_DATA_WIDTH-1:0] <= wb_dat_o;
rdata[C_S_AXI_LITE_DATA_WIDTH-1:WISHBONE_BUS_DATA_WIDTH] = 0;
rvalid <= 1'b1;
rdt_cs <= AR_DATA_WAIT;
end
AR_DATA_WAIT :
if (s_axi_lite_rready) begin
rdt_cs <= IDLE_RD;
rvalid <= 1'b0;
arready <= 1'b1;
end
endcase
end
end
assign s_axi_lite_arready = arready;
assign s_axi_lite_rvalid = rvalid;
assign s_axi_lite_rresp = 2'b00;
assign s_axi_lite_rdata = rdata;
// wb_cyc_i
always @(posedge aclk) begin
if (reset) begin
wb_cyc <= 1'b0;
end else begin
if (wrt_cs==IDLE_WR & s_axi_lite_awvalid & s_axi_lite_wvalid) begin // Write
wb_cyc <= 1'b1;
end else if (wrt_cs==DATA_WRITE_HOLD & s_axi_lite_wvalid) begin
wb_cyc <= 1'b1;
end else if (wrt_cs==WB_WAIT_ACK_W & wb_ack_o) begin
wb_cyc <= 1'b0;
end else if (rdt_cs==IDLE_RD & s_axi_lite_arvalid) begin // Read
wb_cyc <= 1'b1;
end else if (rdt_cs==WB_WAIT_ACK_R & wb_ack_o) begin
wb_cyc <= 1'b0;
end
end
end
assign wb_cyc_i = wb_cyc;
assign wb_stb_i = wb_cyc;
endmodule
`default_nettype wire
`default_nettype none
// i2cm_axi4ls.v
// 2021/04/16 by marsee
//
module i2cm_axi4ls #(
parameter integer C_S_AXI_LITE_ADDR_WIDTH = 12, // Address width of the AXI Lite Interface
parameter integer C_S_AXI_LITE_DATA_WIDTH = 32 // Data width of the AXI Lite Interface
)(
input wire s_axi_lite_aclk,
input wire s_axi_lite_resetn,
// AXI Lite Write Address Channel
input wire s_axi_lite_awvalid,
output wire s_axi_lite_awready,
input wire [C_S_AXI_LITE_ADDR_WIDTH-1: 0] s_axi_lite_awaddr,
// AXI Lite Write Data Channel
input wire s_axi_lite_wvalid,
output wire s_axi_lite_wready,
input wire [C_S_AXI_LITE_DATA_WIDTH-1: 0] s_axi_lite_wdata,
input wire [C_S_AXI_LITE_DATA_WIDTH/8-1:0] s_axi_lite_wstrb,
// AXI Lite Write Response Channel
output wire [1:0] s_axi_lite_bresp,
output wire s_axi_lite_bvalid,
input wire s_axi_lite_bready,
// AXI Lite Read Address Channel
input wire s_axi_lite_arvalid,
output wire s_axi_lite_arready,
input wire [C_S_AXI_LITE_ADDR_WIDTH-1: 0] s_axi_lite_araddr,
// AXI Lite Read Data Channel
output wire s_axi_lite_rvalid,
input wire s_axi_lite_rready,
output wire [C_S_AXI_LITE_DATA_WIDTH-1: 0] s_axi_lite_rdata,
output wire [1:0] s_axi_lite_rresp,
// I2C Bus
output wire scl_o,
output wire scl_t,
input wire scl_i,
output wire sda_o,
output wire sda_t,
input wire sda_i
//inout wire scl,
//inout wire sda
);
// WishBone Interface
wire [2:0] wb_adr_i;
wire [7:0] wb_dat_i;
wire [7:0] wb_dat_o;
wire wb_we_i;
wire wb_stb_i;
wire wb_cyc_i;
wire wb_ack_o;
wire wb_inta_o;
wire wb_rst_i;
wb2axi4ls_conv #(
.C_S_AXI_LITE_ADDR_WIDTH(C_S_AXI_LITE_ADDR_WIDTH),
.C_S_AXI_LITE_DATA_WIDTH(C_S_AXI_LITE_DATA_WIDTH)
) wb2axi4c_i (
.s_axi_lite_aclk(s_axi_lite_aclk),
.axi_resetn(s_axi_lite_resetn),
.s_axi_lite_awvalid(s_axi_lite_awvalid),
.s_axi_lite_awready(s_axi_lite_awready),
.s_axi_lite_awaddr(s_axi_lite_awaddr),
.s_axi_lite_wvalid(s_axi_lite_wvalid),
.s_axi_lite_wready(s_axi_lite_wready),
.s_axi_lite_wdata(s_axi_lite_wdata),
.s_axi_lite_wstrb(s_axi_lite_wstrb),
.s_axi_lite_bresp(s_axi_lite_bresp),
.s_axi_lite_bvalid(s_axi_lite_bvalid),
.s_axi_lite_bready(s_axi_lite_bready),
.s_axi_lite_arvalid(s_axi_lite_arvalid),
.s_axi_lite_arready(s_axi_lite_arready),
.s_axi_lite_araddr(s_axi_lite_araddr),
.s_axi_lite_rvalid(s_axi_lite_rvalid),
.s_axi_lite_rready(s_axi_lite_rready),
.s_axi_lite_rdata(s_axi_lite_rdata),
.s_axi_lite_rresp(s_axi_lite_rresp),
.wb_adr_i(wb_adr_i),
.wb_dat_i(wb_dat_i),
.wb_dat_o(wb_dat_o),
.wb_we_i(wb_we_i),
.wb_stb_i(wb_stb_i),
.wb_cyc_i(wb_cyc_i),
.wb_ack_o(wb_ack_o),
.wb_inta_o(wb_inta_o)
);
assign wb_rst_i = ~s_axi_lite_resetn;
i2c_master_top i2cm_wb_i(
.wb_clk_i(s_axi_lite_aclk),
.wb_rst_i(wb_rst_i),
.arst_i(1'b1),
.wb_adr_i(wb_adr_i),
.wb_dat_i(wb_dat_i),
.wb_dat_o(wb_dat_o),
.wb_we_i(wb_we_i),
.wb_stb_i(wb_stb_i),
.wb_cyc_i(wb_cyc_i),
.wb_ack_o(wb_ack_o),
.wb_inta_o(wb_inta_o),
.scl_pad_i(scl_i),
.scl_pad_o(scl_o),
.scl_padoen_o(scl_t),
.sda_pad_i(sda_i),
.sda_pad_o(sda_o),
.sda_padoen_o(sda_t)
);
endmodule
`default_nettype wire
AXI4 Lite Master BFM の Verilog HDL コードを貼っておく。AXI4 Lite Master BFM は、AXI4 Master BFM のラッパーとして作られている。
Verilog HDL の task で書いてあって、下に示す。7つの task がある。
・AXI_LiteM_1Seq_Write(Write Address, Write Data, Write Response をシーケンシャルにオーバーラップせずに行う)
・AXI_LiteM_WAC(Write Address Channel の Transaction を実行する)
・AXI_LiteM_WDC(Write Data Channel の Transaction を実行する)
・AXI_LiteM_WRC(Write Response Channel の Transaction を実行する)
・AXI_LiteM_1Seq_Read(Read Address, Read Data をシーケンシャルに行う)
・AXI_LiteM_RAC(Read Address Channel の Transaction を実行する)
・AXI_LiteM_RDC(Read Data Channel の Transaction を実行する)
// AXI4 bus Lite Master Bus Fucntion Mode
// AXI4_Lite_Master_BFM.v
// https://marsee101.blog.fc2.com/blog-entry-2673.html
// 2013/12/14
// AXI4_Master_BFM のラッパー
// 2021/04/15 AXI_LiteM_1Seq_Read と AXI_LiteM_RDC に rdata 出力を追加
//
`default_nettype none
`timescale 100ps / 1ps
module AXI4_Lite_Master_BFM #(
parameter DELAY = 10,
parameter integer C_S_AXI_LITE_ADDR_WIDTH = 12, // Address width of the AXI Lite Interface
parameter integer C_S_AXI_LITE_DATA_WIDTH = 32 // Data width of the AXI Lite Interface
)(
input wire ACLK,
output wire [C_S_AXI_LITE_ADDR_WIDTH-1:0] S_AXI_AWADDR,
output wire [2:0] S_AXI_AWPROT,
output wire S_AXI_AWVALID,
output wire [C_S_AXI_LITE_DATA_WIDTH-1:0] S_AXI_WDATA,
output wire [C_S_AXI_LITE_DATA_WIDTH/8-1:0] S_AXI_WSTRB,
output wire S_AXI_WVALID,
output wire S_AXI_BREADY,
output wire [C_S_AXI_LITE_ADDR_WIDTH-1:0] S_AXI_ARADDR,
output wire [2:0] S_AXI_ARPROT,
output wire S_AXI_ARVALID,
output wire S_AXI_RREADY,
input wire S_AXI_AWREADY,
input wire S_AXI_WREADY,
input wire [1:0] S_AXI_BRESP,
input wire S_AXI_BVALID,
input wire S_AXI_ARREADY,
input wire [C_S_AXI_LITE_DATA_WIDTH-1:0] S_AXI_RDATA,
input wire [1:0] S_AXI_RRESP,
input wire S_AXI_RVALID
);
parameter ASIZE_BT_4 = 3'd2; // 32 bit width
parameter ASIZE_BT_2 = 3'd1; // 16 bit width
parameter ASIZE_BT_1 = 3'd0; // 8 bit width
parameter ABURST_FIXED = 2'd0;
parameter ABURST_INCR = 2'd1;
parameter ABURST_WRAP = 2'd2;
// RESP の値の定義
parameter RESP_OKAY = 2'b00;
parameter RESP_EXOKAY = 2'b01;
parameter RESP_SLVERR = 2'b10;
parameter RESP_DECERR = 2'b11;
reg [7:0] awlen_hold = 0;
reg [0:0] wid_hold = 0;
reg axi_w_transaction_active = 0;
reg axi_r_transaction_active = 0;
reg [7:0] arlen_hold = 0;
// AXI4_BFM のインスタンス
AXI4_Master_BFM #(
.DELAY(DELAY),
.C_S_AXI_ADDR_WIDTH(C_S_AXI_LITE_ADDR_WIDTH),
.C_S_AXI_DATA_WIDTH(C_S_AXI_LITE_DATA_WIDTH)
) MBFMi(
.ACLK(ACLK),
.S_AXI_AWID(),
.S_AXI_AWADDR(S_AXI_AWADDR),
.S_AXI_AWLEN(),
.S_AXI_AWSIZE(),
.S_AXI_AWBURST(),
.S_AXI_AWLOCK(),
.S_AXI_AWCACHE(),
.S_AXI_AWPROT(S_AXI_AWPROT),
.S_AXI_AWREGION(),
.S_AXI_AWQOS(),
.S_AXI_AWUSER(),
.S_AXI_AWVALID(S_AXI_AWVALID),
.S_AXI_AWREADY(S_AXI_AWREADY),
.S_AXI_WID(),
.S_AXI_WDATA(S_AXI_WDATA),
.S_AXI_WSTRB(S_AXI_WSTRB),
.S_AXI_WLAST(),
.S_AXI_WUSER(),
.S_AXI_WVALID(S_AXI_WVALID),
.S_AXI_WREADY(S_AXI_WREADY),
.S_AXI_BID(1'b0),
.S_AXI_BRESP(S_AXI_BRESP),
.S_AXI_BUSER(1'b0),
.S_AXI_BVALID(S_AXI_BVALID),
.S_AXI_BREADY(S_AXI_BREADY),
.S_AXI_ARID(),
.S_AXI_ARADDR(S_AXI_ARADDR),
.S_AXI_ARLEN(),
.S_AXI_ARSIZE(),
.S_AXI_ARBURST(),
.S_AXI_ARLOCK(),
.S_AXI_ARCACHE(),
.S_AXI_ARPROT(S_AXI_ARPROT),
.S_AXI_ARREGION(),
.S_AXI_ARQOS(),
.S_AXI_ARUSER(),
.S_AXI_ARVALID(S_AXI_ARVALID),
.S_AXI_ARREADY(S_AXI_ARREADY),
.S_AXI_RID(1'b0),
.S_AXI_RDATA(S_AXI_RDATA),
.S_AXI_RRESP(S_AXI_RRESP),
.S_AXI_RLAST(1'b1),
.S_AXI_RUSER(1'b0),
.S_AXI_RVALID(S_AXI_RVALID),
.S_AXI_RREADY(S_AXI_RREADY)
);
// Write Channel
// wait_clk_bready : 0 - bready の Wait は無し、0以外 - bready の Wait は wait_clk_bready の値の Wait が入る
// wmax_wait : 0 - wvalid の Wait は無し、0以外 - wmax_wait を最大値とするランダムな値の Wait が wvalid に入る
task AXI_LiteM_1Seq_Write; // Write Address, Write Data, Write Response をシーケンシャルにオーバーラップせずに行う。
input [C_S_AXI_LITE_ADDR_WIDTH-1:0] awaddr;
input [C_S_AXI_LITE_DATA_WIDTH-1:0] wdata;
input [7:0] wait_clk_bready;
input [7:0] wmax_wait;
begin
MBFMi.AXI_Master_1Seq_Write(1'b0, awaddr, 8'd0, ASIZE_BT_4, ABURST_INCR, wdata, wait_clk_bready, wmax_wait);
end
endtask
// Write Address Channel
task AXI_LiteM_WAC;
input [C_S_AXI_LITE_ADDR_WIDTH-1:0] awaddr;
begin
MBFMi.AXI_MASTER_WAC(1'b0, awaddr, 8'd0, ASIZE_BT_4, ABURST_INCR);
end
endtask
// Write Data Channel
// wmax_wait : 0 - wvalid の Wait は無し、0以外 - wmax_wait を最大値とするランダムな値の Wait が wvalid に入る
task AXI_LiteM_WDC; // WDATA は+1する
// とりあえず、WSTRBはオール1にする
input [C_S_AXI_LITE_DATA_WIDTH-1:0] wdata;
input [7:0] wmax_wait; // Write時の最大wait数
begin
MBFMi.AXI_MASTER_WDC(wdata, wmax_wait);
end
endtask
// Write Response Channel
// wait_clk_bready : 0 - bready の Wait は無し、0以外 - bready の Wait は wait_clk_bready の値の Wait が入る
task AXI_LiteM_WRC; // wait_clk_bready
input [7:0] wait_clk_bready;
begin
MBFMi.AXI_MASTER_WRC(wait_clk_bready);
end
endtask
// Read Channel
task AXI_LiteM_1Seq_Read; // Read Address, Read Data をシーケンシャルに行う
input [C_S_AXI_LITE_ADDR_WIDTH-1:0] araddr;
input [7:0] rmax_wait; // Read時の最大wait数
output [C_S_AXI_LITE_DATA_WIDTH-1:0] rdata; // S_AXI_RDATA のラッチ
begin
MBFMi.AXI_Master_1Seq_Read(1'b0, araddr, 8'd0, ASIZE_BT_4, ABURST_INCR, rmax_wait, rdata);
end
endtask
// Read Address Channel
task AXI_LiteM_RAC;
input [C_S_AXI_LITE_ADDR_WIDTH-1:0] araddr;
begin
MBFMi.AXI_MASTER_RAC(1'b0, araddr, 8'd0, ASIZE_BT_4, ABURST_INCR);
end
endtask
// Read Data Channel
task AXI_LiteM_RDC; // S_AXI_RLAST がアサートされるまでS_AXI_RREADY をアサートする
input [7:0] rmax_wait; // Read時の最大wait数
output [C_S_AXI_LITE_DATA_WIDTH-1:0] rdata; // S_AXI_RDATA のラッチ
begin
MBFMi.AXI_MASTER_RDC(rmax_wait, rdata);
end
endtask
endmodule
`default_nettype wire
AXI Master BFMの Write の task がAXI_Master_1Seq_Write だ。このtaskは、(Write Address, Write Data), Write Response をシーケンシャルにオーバーラップせずに行う。今回の修正で、(Write Address, Write Data)はfork ~ join を使用して、オーバーラップするように変更した。AXI_Master_1Seq_Writeは、8個の引数を持つ。wait_clk_bready、wmax_wait 以外の信号はAXIバスの信号なので、説明は省く。wait_clk_bready はWriteのデータ転送が終了して、BREADYをアサートするまでのWaitクロック数を設定する。Writeデータ転送時にランダムな数のWaitが入るが、wmax_wait は、そのWait の最大値を指示する。
AXI Master BFMの Read の task が、AXI_Master_1Seq_Read で、Read Address, Read Data をシーケンシャルに行う。
// AXI4 bus Master Bus Fucntion Mode
// AXI4_Master_BFM.v
// https://marsee101.blog.fc2.com/blog-entry-2288.html
// 2012/10/24 : 修正、S_AXI_AWREADYが1になるのを確認してからS_AXI_WVALIDを1にしていたのでは、AXIバスの非標準となる。
// よって、AXI_MASTER_WAC とAXI_MASTER_WDC をfork ~ join で並列に実行する
// 2013/12/14 : input に DELAYを入れるように変更
// 2021/04/15 : AXI_Master_1Seq_Read, AXI_MASTER_RDC に rdata 出力を追加
//
`default_nettype none
`timescale 100ps / 1ps
module AXI4_Master_BFM #(
parameter DELAY = 10,
parameter integer C_S_AXI_ADDR_WIDTH = 32, // Address width of the AXI Interface
parameter integer C_S_AXI_DATA_WIDTH = 32 // Data width of the AXI Interface
)(
input wire ACLK,
output reg [0:0] S_AXI_AWID = 0,
output reg [C_S_AXI_ADDR_WIDTH-1:0] S_AXI_AWADDR = 0,
output reg [7:0] S_AXI_AWLEN = 0,
output reg [2:0] S_AXI_AWSIZE = 0,
output reg [1:0] S_AXI_AWBURST = 0,
output reg [1:0] S_AXI_AWLOCK = 0,
output reg [3:0] S_AXI_AWCACHE = 3, // Normal Non-cacheable bufferable
output reg [2:0] S_AXI_AWPROT = 0,
output reg [3:0] S_AXI_AWREGION = 0,
output reg [3:0] S_AXI_AWQOS = 0,
output reg [0:0] S_AXI_AWUSER = 0,
output reg S_AXI_AWVALID = 0,
output reg [0:0] S_AXI_WID = 0,
output reg [C_S_AXI_DATA_WIDTH-1:0] S_AXI_WDATA = 0,
output reg [C_S_AXI_DATA_WIDTH/8-1:0] S_AXI_WSTRB = 0,
output reg S_AXI_WLAST = 0,
output reg [0:0] S_AXI_WUSER = 0,
output reg S_AXI_WVALID = 0,
output reg S_AXI_BREADY = 0,
output reg [0:0] S_AXI_ARID = 0,
output reg [C_S_AXI_ADDR_WIDTH-1:0] S_AXI_ARADDR = 0,
output reg [7:0] S_AXI_ARLEN = 0,
output reg [2:0] S_AXI_ARSIZE = 0,
output reg [1:0] S_AXI_ARBURST = 0,
output reg [1:0] S_AXI_ARLOCK = 0,
output reg [3:0] S_AXI_ARCACHE = 2, // Normal Non-cacheable bufferable
output reg [2:0] S_AXI_ARPROT = 0,
output reg [3:0] S_AXI_ARREGION = 0,
output reg [3:0] S_AXI_ARQOS = 0,
output reg [0:0] S_AXI_ARUSER = 0,
output reg S_AXI_ARVALID = 0,
output reg S_AXI_RREADY = 0,
input wire S_AXI_AWREADY,
input wire S_AXI_WREADY,
input wire [0:0] S_AXI_BID,
input wire [1:0] S_AXI_BRESP,
input wire [0:0] S_AXI_BUSER,
input wire S_AXI_BVALID,
input wire S_AXI_ARREADY,
input wire [0:0] S_AXI_RID,
input wire [C_S_AXI_DATA_WIDTH-1:0] S_AXI_RDATA,
input wire [1:0] S_AXI_RRESP,
input wire S_AXI_RLAST,
input wire [0:0] S_AXI_RUSER,
input wire S_AXI_RVALID
);
reg [7:0] awlen_hold = 0;
reg [0:0] wid_hold = 0;
reg axi_w_transaction_active = 0;
reg axi_r_transaction_active = 0;
reg [7:0] arlen_hold = 0;
reg S_AXI_AWREADY_d;
reg S_AXI_WREADY_d;
reg [0:0] S_AXI_BID_d;
reg [1:0] S_AXI_BRESP_d;
reg [0:0] S_AXI_BUSER_d;
reg S_AXI_BVALID_d;
reg S_AXI_ARREADY_d;
reg [0:0] S_AXI_RID_d;
reg [C_S_AXI_DATA_WIDTH-1:0] S_AXI_RDATA_d;
reg [1:0] S_AXI_RRESP_d;
reg S_AXI_RLAST_d;
reg [0:0] S_AXI_RUSER_d;
reg S_AXI_RVALID_d;
always @* S_AXI_AWREADY_d <= #DELAY S_AXI_AWREADY;
always @* S_AXI_WREADY_d <= #DELAY S_AXI_WREADY;
always @* S_AXI_BID_d <= #DELAY S_AXI_BID;
always @* S_AXI_BRESP_d <= #DELAY S_AXI_BRESP;
always @* S_AXI_BUSER_d <= #DELAY S_AXI_BUSER;
always @* S_AXI_BVALID_d <= #DELAY S_AXI_BVALID;
always @* S_AXI_ARREADY_d <= #DELAY S_AXI_ARREADY;
always @* S_AXI_RID_d <= #DELAY S_AXI_RID;
always @* S_AXI_RDATA_d <= #DELAY S_AXI_RDATA;
always @* S_AXI_RRESP_d <= #DELAY S_AXI_RRESP;
always @* S_AXI_RLAST_d <= #DELAY S_AXI_RLAST;
always @* S_AXI_RUSER_d <= #DELAY S_AXI_RUSER;
always @* S_AXI_RVALID_d <= #DELAY S_AXI_RVALID;
// Write Channel
// wait_clk_bready : 0 - bready の Wait は無し、0以外 - bready の Wait は wait_clk_bready の値の Wait が入る
// wmax_wait : 0 - wvalid の Wait は無し、0以外 - wmax_wait を最大値とするランダムな値の Wait が wvalid に入る
task AXI_Master_1Seq_Write; // Write Address; Write Data, Write Response をシーケンシャルにオーバーラップせずに行う。
input [0:0] awid;
input [C_S_AXI_ADDR_WIDTH-1:0] awaddr;
input [7:0] awlen;
input [2:0] awsize;
input [1:0] awburst;
input [C_S_AXI_DATA_WIDTH-1:0] wdata;
input [7:0] wait_clk_bready;
input [7:0] wmax_wait;
begin
fork
AXI_MASTER_WAC(awid, awaddr, awlen, awsize, awburst);
AXI_MASTER_WDC(wdata, wmax_wait);
join
AXI_MASTER_WRC(wait_clk_bready);
end
endtask
// Write Address Channel
task AXI_MASTER_WAC;
input [0:0] awid;
input [C_S_AXI_ADDR_WIDTH-1:0] awaddr;
input [7:0] awlen;
input [2:0] awsize;
input [1:0] awburst;
begin
S_AXI_AWID = awid;
S_AXI_AWADDR = awaddr;
S_AXI_AWLEN = awlen;
S_AXI_AWSIZE = awsize;
S_AXI_AWBURST = awburst;
S_AXI_AWVALID = 1'b1;
if (axi_w_transaction_active == 1'b0) begin // AXI Write トランザクションが開始されている場合は戻る
axi_w_transaction_active = 1'b1; // AXIトランザクション開始
awlen_hold = awlen; // Write Data Channel のためにバースト数を取っておく
@(posedge ACLK); // 次のクロックへ
while (~S_AXI_AWREADY_d) begin // S_AXI_AWREADY が1になるまで待つ
#DELAY;
@(posedge ACLK); // 次のクロックへ
end
#DELAY;
S_AXI_AWID = 0;
S_AXI_AWADDR = 0;
S_AXI_AWLEN = 0;
S_AXI_AWSIZE = 0;
S_AXI_AWBURST = 0;
S_AXI_AWVALID = 1'b0;
@(posedge ACLK); // 次のクロックへ
#DELAY;
end
end
endtask
// Write Data Channel
// wmax_wait : 0 - wvalid の Wait は無し、0以外 - wmax_wait を最大値とするランダムな値の Wait が wvalid に入る
task AXI_MASTER_WDC; // WDATA は+1する
// とりあえず、WSTRBはオール1にする
input [C_S_AXI_DATA_WIDTH-1:0] wdata;
input [7:0] wmax_wait; // Write時の最大wait数
integer i, j, val;
begin
i = 0; j = 0;
S_AXI_WSTRB = {(C_S_AXI_DATA_WIDTH/8-1){1'b1}};
while (~S_AXI_AWVALID) begin // S_AXI_AWVALID が1になるまで待つ
@(posedge ACLK); // 次のクロックへ
#DELAY;
end
while (i<=awlen_hold) begin
if (wmax_wait == 0) // wmax_wait が0の時は$random を実行しない
val = 0;
else
val = $unsigned($random) % (wmax_wait+1);
if (val == 0) begin // waitなし
S_AXI_WVALID = 1'b1;
end else begin // waitあり
S_AXI_WVALID = 1'b0;
for (j=0; j<val; j=j+1) begin
@(posedge ACLK); // 次のクロックへ
#DELAY;
end
S_AXI_WVALID = 1'b1; // wait終了
end
if (i == awlen_hold)
S_AXI_WLAST = 1'b1;
else
S_AXI_WLAST = 1'b0;
S_AXI_WDATA = wdata;
wdata = wdata + 1;
@(posedge ACLK); // 次のクロックへ
while (~S_AXI_WREADY_d) begin // S_AXI_WREADY が0の時は1になるまで待つ
#DELAY;
@(posedge ACLK); // 次のクロックへ
end
#DELAY;
i = i + 1;
end
S_AXI_WVALID = 1'b0;
S_AXI_WLAST = 1'b0;
S_AXI_WSTRB = {(C_S_AXI_DATA_WIDTH/8-1){1'b0}};
end
endtask
// Write Response Channel
// wait_clk_bready : 0 - bready の Wait は無し、0以外 - bready の Wait は wait_clk_bready の値の Wait が入る
task AXI_MASTER_WRC; // wait_clk_bready
input [7:0] wait_clk_bready;
integer i;
begin
for (i=0; i<wait_clk_bready; i=i+1) begin
@(posedge ACLK); // 次のクロックへ
#DELAY;
end
S_AXI_BREADY = 1'b1;
@(posedge ACLK); // 次のクロックへ
while (~S_AXI_BVALID_d) begin // S_AXI_BVALID が1になるまでWait
#DELAY;
@(posedge ACLK); // 次のクロックへ
end
#DELAY;
S_AXI_BREADY = 1'b0;
axi_w_transaction_active = 1'b0; // AXIトランザクション終了
@(posedge ACLK);
#DELAY;
end
endtask
// Read Channel
task AXI_Master_1Seq_Read; // Read Address, Read Data をシーケンシャルに行う。
input [0:0] arid;
input [C_S_AXI_ADDR_WIDTH-1:0] araddr;
input [7:0] arlen;
input [2:0] arsize;
input [1:0] arburst;
input [7:0] rmax_wait; // Read時の最大wait数
output [C_S_AXI_DATA_WIDTH-1:0] rdata; // S_AXI_RDATA のラッチ
begin
AXI_MASTER_RAC(arid, araddr, arlen, arsize, arburst);
AXI_MASTER_RDC(rmax_wait, rdata);
end
endtask
// Read Address Channel
task AXI_MASTER_RAC;
input [0:0] arid;
input [C_S_AXI_ADDR_WIDTH-1:0] araddr;
input [7:0] arlen;
input [2:0] arsize;
input [1:0] arburst;
begin
S_AXI_ARID = arid;
S_AXI_ARADDR = araddr;
S_AXI_ARLEN = arlen;
S_AXI_ARSIZE = arsize;
S_AXI_ARBURST = arburst;
S_AXI_ARVALID = 1'b1;
if (axi_r_transaction_active == 1'b0) begin // AXI Read トランザクションが開始されている場合は戻る
arlen_hold =arlen; // Read Data Channel のためにバースト数を取っておく
@(posedge ACLK); // 次のクロックへ
while (~S_AXI_ARREADY_d) begin // S_AXI_ARREADY が1になるまで待つ
#DELAY;
@(posedge ACLK); // 次のクロックへ
end
#DELAY;
S_AXI_ARID = 0;
S_AXI_ARADDR = 0;
S_AXI_ARLEN = 0;
S_AXI_ARSIZE = 0;
S_AXI_ARBURST = 0;
S_AXI_ARVALID = 1'b0;
@(posedge ACLK); // 次のクロックへ
#DELAY;
axi_r_transaction_active = 1'b1; // AXIトランザクション開始
end
end
endtask
// Read Data Channel
task AXI_MASTER_RDC; // S_AXI_RLAST がアサートされるまでS_AXI_RREADY をアサートする
input [7:0] rmax_wait; // Read時の最大wait数
output [C_S_AXI_DATA_WIDTH-1:0] rdata; // S_AXI_RDATA のラッチ
integer i, val;
begin
while (~(S_AXI_RLAST_d & S_AXI_RVALID_d & S_AXI_RREADY)) begin // S_AXI_RLAST & S_AXI_RVALID & S_AXI_RREADY で終了
if (rmax_wait == 0) begin // rmax_wait が0の時は$random を実行しない
val = 0;
S_AXI_RREADY = 1'b1;
end else begin
val = $unsigned($random) % (rmax_wait+1);
if (val == 0)
S_AXI_RREADY = 1'b1;
else
S_AXI_RREADY = 1'b0;
end
#DELAY;
for (i=0; i<val; i=i+1) begin // ランダム値でWait、val=0の時はスキップ
@(posedge ACLK); // 次のクロックへ
#DELAY;
end
S_AXI_RREADY = 1'b1;
@(posedge ACLK); // 次のクロックへ
while (~S_AXI_RVALID_d) begin // S_AXI_RVALID が1になるまでWait
#DELAY;
@(posedge ACLK); // 次のクロックへ
end
#DELAY;
rdata = S_AXI_RDATA;
end
#DELAY;
S_AXI_RREADY = 1'b0;
axi_r_transaction_active = 1'b0; // AXIトランザクション終了
@(posedge ACLK);
#DELAY;
end
endtask
endmodule
`default_nettype wire
// median_vision_axis.c
// 2021/04/19 by marsee
//
#include <stdio.h>
#include <stdint.h>
#include "xil_io.h"
#include "xparameters.h"
#include "xmedian_blur_accel.h"
#include "xxf_8uc3_2axis.h"
#include "bmp_data.h"
#define ORG_PICT_XF_8UC4_ADDR 0x10000000
#define ORG_PICT_XF_8UC3_ADDR 0x10200000
#define FILTER_XF_8UC3_ADDR 0x10400000
#define FILTER_XF_8UC4_ADDR 0x10600000
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
int bmp_write_xf_8uc3(uint32_t xf_8uc4_addr, uint32_t xf_8uc3_addr);
void Xil_DCacheFlush(void);
int main(){
XMedian_blur_accel XMedian_blur_accel_ap;
XXf_8uc3_2axis XXf_8uc3_2axis_ap;
int inbyte_in;
XMedian_blur_accel_Initialize(&XMedian_blur_accel_ap, 0);
XXf_8uc3_2axis_Initialize(&XXf_8uc3_2axis_ap, 0);
XMedian_blur_accel_Set_rows(&XMedian_blur_accel_ap, (u32)VERTICAL_LINES);
XMedian_blur_accel_Set_cols(&XMedian_blur_accel_ap, (u32)HORIZONTAL_PIXELS);
XXf_8uc3_2axis_Set_rows(&XXf_8uc3_2axis_ap, (u32)VERTICAL_LINES);
XXf_8uc3_2axis_Set_cols(&XXf_8uc3_2axis_ap, (u32)HORIZONTAL_PIXELS);
XMedian_blur_accel_Set_img_in(&XMedian_blur_accel_ap, (u32)ORG_PICT_XF_8UC3_ADDR);
XMedian_blur_accel_Set_img_out(&XMedian_blur_accel_ap, (u32)FILTER_XF_8UC3_ADDR);
XXf_8uc3_2axis_Set_p_src(&XXf_8uc3_2axis_ap, (u32)ORG_PICT_XF_8UC3_ADDR);
bmp_write_xf_8uc3(ORG_PICT_XF_8UC4_ADDR, ORG_PICT_XF_8UC3_ADDR);
Xil_DCacheFlush();
XMedian_blur_accel_Start(&XMedian_blur_accel_ap);
while(!XMedian_blur_accel_IsDone(&XMedian_blur_accel_ap));
Xil_Out32(XPAR_BITMAP_DISP_CONT_AXIS_0_BASEADDR, ORG_PICT_XF_8UC4_ADDR); // dummy address, start
XXf_8uc3_2axis_Start(&XXf_8uc3_2axis_ap);
XXf_8uc3_2axis_EnableAutoRestart(&XXf_8uc3_2axis_ap);
while(1){
printf("\nPlease input <0> or <1> (<q> : exit) = ");
fflush(stdout);
inbyte_in = inbyte();
printf("%c", inbyte_in);
fflush(stdout);
switch(inbyte_in) {
case '0': //bmp image
XXf_8uc3_2axis_Set_p_src(&XXf_8uc3_2axis_ap, (u32)ORG_PICT_XF_8UC3_ADDR);
break;
case '1': // median filter
XXf_8uc3_2axis_Set_p_src(&XXf_8uc3_2axis_ap, (u32)FILTER_XF_8UC3_ADDR);
break;
case 'q': // exit
return(0);
}
}
}
int bmp_write_xf_8uc3(uint32_t xf_8uc4_addr, uint32_t xf_8uc3_addr){
uint32_t pix[4];
for(int y=0; y<VERTICAL_LINES; y++){
for(int x=0; x<HORIZONTAL_PIXELS; x++){
int xf_8uc4 = 0xff000000 + ((uint32_t)bmp_file_array[y][x][2]<<16)
+((uint32_t)bmp_file_array[y][x][1]<<8)+(uint32_t)bmp_file_array[y][x][0];
Xil_Out32(xf_8uc4_addr+(y*HORIZONTAL_PIXELS+x)*sizeof(uint32_t), xf_8uc4);
switch((x+y*HORIZONTAL_PIXELS)%4){
case 0 :
pix[0] = xf_8uc4;
break;
case 1 :
pix[1] = xf_8uc4;
Xil_Out32(xf_8uc3_addr, ((pix[1]&0xff)<<24)+(pix[0]&0xffffff));
xf_8uc3_addr += sizeof(uint32_t);
break;
case 2 :
pix[2] = xf_8uc4;
Xil_Out32(xf_8uc3_addr, ((pix[2]&0xffff)<<16)+((pix[1]&0xffff00)>>8));
xf_8uc3_addr += sizeof(uint32_t);
break;
default : // 3
pix[3] = xf_8uc4;
Xil_Out32(xf_8uc3_addr, ((pix[3]&0xffffff)<<8)+((pix[2]&0xff0000)>>16));
xf_8uc3_addr += sizeof(uint32_t);
break;
}
}
}
return(0);
}
がXil_Out32(XPAR_BITMAP_DISP_CONT_AXIS_0_BASEADDR, ORG_PICT_XF_8UC4_ADDR); // dummy address, start
の後になっていたからだった。後ろのモジュールからスタートさせる必要がある。XXf_8uc3_2axis_Start(&XXf_8uc3_2axis_ap);
XXf_8uc3_2axis_EnableAutoRestart(&XXf_8uc3_2axis_ap);
set_property PACKAGE_PIN H16 [get_ports TMDS_Clk_p_0]
set_property PACKAGE_PIN D19 [get_ports {TMDS_Data_p_0[0]}]
set_property PACKAGE_PIN C20 [get_ports {TMDS_Data_p_0[1]}]
set_property PACKAGE_PIN B19 [get_ports {TMDS_Data_p_0[2]}]
set_property IOSTANDARD TMDS_33 [get_ports TMDS_Clk_p_0]
set_property IOSTANDARD TMDS_33 [get_ports {TMDS_Data_p_0[2]}]
set_property IOSTANDARD TMDS_33 [get_ports {TMDS_Data_p_0[1]}]
set_property IOSTANDARD TMDS_33 [get_ports {TMDS_Data_p_0[0]}]
const int rows_cols_limit = (int)((float)rows * (float)cols * 3.0/4.0 + 0.76);
const int rows_cols_limit = (int)((ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)rows *
(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)cols *
(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)3.0/(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)4.0 +
(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)0.75);
// xf_8uc3_2axis.cpp
// 2021/04/13 by marsee
//
#include "ap_int.h"
#include "hls_stream.h"
#include "ap_axi_sdata.h"
#include "ap_fixed.h"
int dmar2axis(volatile ap_uint<32>* _src, int rows, int cols, hls::stream<ap_uint<32> >& axis_out);
int xf_8uc3s_2axis(hls::stream<ap_uint<32> >& axis_in, int rows, int cols, hls::stream<ap_axis<32,1,1,1> >& axis_out);
//#define DEBUG
int xf_8uc3_2axis(volatile ap_uint<32>* _src, int rows, int cols, hls::stream<ap_axis<32,1,1,1> >& axis_out){
#pragma HLS INTERFACE axis register_mode=both register port=axis_out
#pragma HLS DATAFLOW
#pragma HLS INTERFACE s_axilite port=cols
#pragma HLS INTERFACE s_axilite port=rows
#pragma HLS INTERFACE m_axi depth=360000 bundle=gmem port=_src offset=slave
#pragma HLS INTERFACE s_axilite port=return
hls::stream<ap_uint<32> > axis0;
dmar2axis(_src, rows, cols, axis0);
xf_8uc3s_2axis(axis0, rows, cols, axis_out);
return(0);
}
int dmar2axis(volatile ap_uint<32>* _src, int rows, int cols, hls::stream<ap_uint<32> >& axis_out){
const int rows_cols_limit = (int)((ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)rows *
(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)cols *
(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)3.0/(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)4.0 +
(ap_ufixed<32,30,AP_TRN_ZERO,AP_SAT>)0.75);
ap_uint<32> pix;
//printf("rows_cols_limit = %d\n",rows_cols_limit);
LOOP_dr2a: for(int xy=0; xy<rows_cols_limit; xy++){
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT avg=360000 max=360000 min=360000
pix = _src[xy];
#ifdef DEBUG
if(xy < 10)
printf("%x\n", (unsigned int)pix);
#endif
axis_out << pix;
}
return(0);
}
int xf_8uc3s_2axis(hls::stream<ap_uint<32> >& axis_in, int rows, int cols, hls::stream<ap_axis<32,1,1,1> >& axis_out){
ap_uint<32> rgb[3];
ap_axis<32,1,1,1> pix;
LOOP_y:for(int y=0; y<rows; y++){
#pragma HLS LOOP_TRIPCOUNT avg=600 max=600 min=600
LOOP_x:for(int x=0; x<cols; x++){
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT avg=800 max=800 min=800
int xy = x + y * cols;
switch(xy%4){
case 0 :
axis_in >> rgb[0];
pix.data = (rgb[0] & 0xffffff) + 0xff000000;
break;
case 1 :
axis_in >> rgb[1];
pix.data = ((rgb[1] & 0xffff)<<8) + ((rgb[0] & 0xff000000)>>24) + 0xff000000;
break;
case 2 :
axis_in >> rgb[2];
pix.data = ((rgb[2] & 0xff)<<16) + ((rgb[1] & 0xffff0000)>>16) + 0xff000000;
break;
default : // 3
pix.data = ((rgb[2] & 0xffffff00)>>8) + 0xff000000;
break;
}
if(x==0 && y==0)
pix.user = 1;
else
pix.user = 0;
if(x == cols-1)
pix.last = 1;
else
pix.last = 0;
axis_out << pix;
}
}
return(0);
}
// xf_8uc3_2axis.cpp
// 2021/04/13 by marsee
//
#include "ap_int.h"
#include "hls_stream.h"
#include "ap_axi_sdata.h"
int dmar2axis(volatile ap_uint<32>* _src, int rows, int cols, hls::stream<ap_uint<32> >& axis_out);
int xf_8uc3s_2axis(hls::stream<ap_uint<32> >& axis_in, int rows, int cols, hls::stream<ap_axis<32,1,1,1> >& axis_out);
//#define DEBUG
int xf_8uc3_2axis(volatile ap_uint<32>* _src, int rows, int cols, hls::stream<ap_axis<32,1,1,1> >& axis_out){
#pragma HLS INTERFACE axis register_mode=both register port=axis_out
#pragma HLS DATAFLOW
#pragma HLS INTERFACE s_axilite port=cols
#pragma HLS INTERFACE s_axilite port=rows
#pragma HLS INTERFACE m_axi depth=360000 bundle=gmem port=_src offset=slave
#pragma HLS INTERFACE s_axilite port=return
hls::stream<ap_uint<32> > axis0;
dmar2axis(_src, rows, cols, axis0);
xf_8uc3s_2axis(axis0, rows, cols, axis_out);
return(0);
}
int dmar2axis(volatile ap_uint<32>* _src, int rows, int cols, hls::stream<ap_uint<32> >& axis_out){
const int rows_cols_limit = (int)((float)rows * (float)cols * 3.0/4.0 + 0.76);
ap_uint<32> pix;
//printf("rows_cols_limit = %d\n",rows_cols_limit);
LOOP_dr2a: for(int xy=0; xy<rows_cols_limit; xy++){
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT avg=360000 max=360000 min=360000
pix = _src[xy];
#ifdef DEBUG
if(xy < 10)
printf("%x\n", (unsigned int)pix);
#endif
axis_out << pix;
}
return(0);
}
int xf_8uc3s_2axis(hls::stream<ap_uint<32> >& axis_in, int rows, int cols, hls::stream<ap_axis<32,1,1,1> >& axis_out){
ap_uint<32> rgb[3];
ap_axis<32,1,1,1> pix;
LOOP_y:for(int y=0; y<rows; y++){
#pragma HLS LOOP_TRIPCOUNT avg=600 max=600 min=600
LOOP_x:for(int x=0; x<cols; x++){
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT avg=800 max=800 min=800
int xy = x + y * cols;
switch(xy%4){
case 0 :
axis_in >> rgb[0];
pix.data = (rgb[0] & 0xffffff) + 0xff000000;
break;
case 1 :
axis_in >> rgb[1];
pix.data = ((rgb[1] & 0xffff)<<8) + ((rgb[0] & 0xff000000)>>24) + 0xff000000;
break;
case 2 :
axis_in >> rgb[2];
pix.data = ((rgb[2] & 0xff)<<16) + ((rgb[1] & 0xffff0000)>>16) + 0xff000000;
break;
default : // 3
pix.data = ((rgb[2] & 0xffffff00)>>8) + 0xff000000;
break;
}
if(x==0 && y==0)
pix.user = 1;
else
pix.user = 0;
if(x == cols-1)
pix.last = 1;
else
pix.last = 0;
axis_out << pix;
}
}
return(0);
}
// xf_8uc3_2axis_tb.cpp
// 2021/04/13 by marsee
//
#include "ap_int.h"
#include "hls_stream.h"
#include "ap_axi_sdata.h"
#include "opencv2/opencv.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgcodecs/imgcodecs.hpp"
int xf_8uc3_2axis(volatile ap_uint<32>* _src, int rows, int cols, hls::stream<ap_axis<32,1,1,1> >& axis_out);
int main(int argc, char **argv){
hls::stream<ap_axis<32,1,1,1> > axis_out;
ap_uint<32> *pp;
ap_axis<32,1,1,1> pix;
if (argc != 2) {
fprintf(stderr, "Usage: %s <INPUT IMAGE>", argv[0]);
exit(1);
}
cv::Mat in_img, out_img, conv_img;
in_img = cv::imread(argv[1], 1); // reading in the color image
if (in_img.data == NULL) {
fprintf(stderr, "ERROR: Cannot open image %s\n ", argv[1]);
exit(1);
}
out_img.create(in_img.rows, in_img.cols, CV_8UC4);
conv_img.create(in_img.rows, in_img.cols, CV_8UC3);
xf_8uc3_2axis((volatile ap_uint<32> *)in_img.data, in_img.rows, in_img.cols, axis_out);
pp = (ap_uint<32> *)out_img.data;
for(int y=0; y<in_img.rows; y++){
for(int x=0; x<in_img.cols; x++){
axis_out >> pix;
*pp++ = pix.data;
}
}
cv::cvtColor(out_img, conv_img, cv::COLOR_BGRA2BGR);
cv::imwrite("output.png", conv_img);
return(0);
}
-I/usr/local/include
-L/usr/local/lib -lopencv_core -lopencv_imgcodecs -lopencv_imgproc
test2.jpg
番号,時刻,アドレス,レジスタ名,R/W,データ,説明
1,46ns,0,PRERlo,W,0xfa,
2,76ns,0,PRERlo,W,0xc8,
3,106ns,1,PRERhi,W,0x00,
4,126ns,0,PRERlo,R,0xc8,
5,146ns,1,PRERhi,R,0x00,
6,176ns,2,CTR,W,0x80,"EN, I2C core enable bit. "
7,206ns,3,TXR,W,0x20,
8,226ns,4,CR,W,0x90,"STA, WR"
9,256ns,4,SR,R,0x03,"TIP, IF"
10,,,,,,0x20出力、ACK
11,"113,386ns",4,SR,R,0x41,"Busy, IF"
12,"113,446ns",3,TXR,W,0x01,
13,"113,466ns",4,CR,W,0x10,WR
14,"113,496ns",4,SR,R,0x43,"Busy, TIP, IF"
15,,,,,,0x01出力、ACK
16,"214,396ns",4,SR,R,0x41,"Busy, IF"
17,"214,426ns",3,TXR,W,0xa5,
18,"214,446ns",4,CR,W,0x10,WR
19,"314,966ns",4,SR,R,0x43,"Busy, TIP, IF"
20,,,,,,"0xa5, ACK"
21,"409,736ns",4,SR,R,0x41,"Busy, IF"
22,"409,766ns",3,TXR,W,0x5a,
23,"409,786ns",4,CR,W,0x50,"STO, WR"
24,"409,816ns",4,SR,R,0x43,"Busy, TIP, IF"
25,,,,,,0x5a出力、ACK、STOP
26,"521,926ns",4,SR,R,0x41,"Busy, IF"
27,"521,956ns",3,TXR,W,0x20,
28,"521,976ns",4,CR,W,0x90,"STA, WR"
29,"522,006ns",4,SR,R,0x43,"Busy, TIP, IF"
30,,,,,,STOP出力
31,"523,506ns",4,SR,R,0x03,"TIP, IF"
32,,,,,,START出力
33,"531,156ns",4,SR,R,0x43,"Busy, TIP, IF"
34,,,,,,0x20出力、ACK
35,"635,136ns",4,SR,R,0x41,"Busy, IF"
36,"635,166ns",3,TXR,W,0x01,
37,"635,186ns",4,CR,W,0x10,WR
38,"635,216ns",4,SR,R,0x43,"Busy, TIP, IF"
39,,,,,,0x01出力、ACK
40,"736,136ns",4,SR,R,0x41,"Busy, IF"
41,"736,166ns",3,TXR,W,0x21,
42,"736,186ns",4,CR,W,0x90,"STA, WR"
43,"736,216ns",4,SR,R,0x43,"Busy, TIP, IF"
44,,,,,,"Repeat Start, 0x21"
45,"850,366ns",4,SR,R,0x41,"Busy, IF"
46,"850,396ns",4,CR,W,0x20,"RD, ACK"
47,"850,426ns",4,SR,R,0x43,"Busy, TIP, IF"
48,,,,,,0xa5入力、ACK
49,"951,346ns",4,SR,R,0x41,"Busy, IF"
50,"951,376ns",3,RXR,R,0xa5,
51,"951,406ns",4,CR,W,0x20,"RD, ACK"
52,"951,436ns",4,SR,R,0x43,"Busy, TIP, IF"
53,,,,,,0x5a入力、ACK
54,"1,052,326ns",4,SR,R,0x41,"Busy, IF"
55,"1,052,356ns",3,RXR,R,0x5a,
56,"1,052,386ns",4,CR,W,0x20,"RD, ACK"
57,"1,052,416ns",4,SR,R,0x43,"Busy, TIP, IF"
58,,,,,,xx入力
59,"1,153,306ns",4,SR,R,0xX1,
60,"1,153,336ns",3,RXR,R,0xXX,
61,"1,153,366ns",4,CR,W,0x28,"RD, NACK"
62,"1,153,396ns",4,SR,R,0xX3,"TIP, IF"
63,,,,,,"xx入力, NACK"
64,"1,254,286ns",4,SR,R,0xX1,
65,"1,254,316ns",3,RXR,R,0xXX,
66,"1,254,346ns",3,TXR,W,0x20,
67,"1,254,366ns",4,CR,W,0x90,"STA, WR"
68,"1,254,396ns",4,SR,R,0xX3,"TIP, IF"
69,"1,265,046ns",4,SR,R,0xc3,"RxACK, Busy, TIP, IF"
70,,,,,,0x20出力、ACK
71,"1,368,516ns",4,SR,R,0x41,"Busy, IF"
72,"1,368,546ns",3,TXR,W,0x10,
73,"1,368,566ns",4,CR,W,0x10,WR
74,"1,368,596ns",4,SR,R,0x43,"Busy, TIP, IF"
75,,,,,,0x10出力、ACK
76,"1,469,516ns",4,SR,R,0xc1,"RxACK, Busy, IF"
77,"1,469,546ns",4,CR,W,0x40,STO
// bm_disp_axis_test.c
// 2021/04/09 by marsee
//
#include <stdio.h>
#include "xil_io.h"
#include "xparameters.h"
#include "xdma2axis.h"
#include "bmp_data.h"
#define FRAME_BUFFER_ADDRESS 0x10000000
#define DMA_DEST_ADDRESS 0x10200000
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
int bmp_write(unsigned int addr);
void Xil_DCacheFlush(void);
int main(){
XDma2axis xdma2s_ap;
XDma2axis_Initialize(&xdma2s_ap, XPAR_DMA2AXIS_0_DEVICE_ID);
XDma2axis_Set_x_size(&xdma2s_ap, (u32)HORIZONTAL_PIXELS);
XDma2axis_Set_y_size(&xdma2s_ap, (u32)VERTICAL_LINES);
XDma2axis_Set_in_V(&xdma2s_ap, (u32)FRAME_BUFFER_ADDRESS);
bmp_write(FRAME_BUFFER_ADDRESS);
Xil_DCacheFlush();
Xil_Out32(XPAR_BITMAP_DISP_CONT_AXIS_0_BASEADDR, (u32)DMA_DEST_ADDRESS); // bm_disp_axis start
XDma2axis_Start(&xdma2s_ap);
XDma2axis_EnableAutoRestart(&xdma2s_ap);
return(0);
}
int bmp_write(unsigned int addr){
for(int y=0; y<VERTICAL_LINES; y++){
for(int x=0; x<HORIZONTAL_PIXELS; x++){
Xil_Out32(addr+(y*HORIZONTAL_PIXELS+x)*sizeof(int),
((int)bmp_file_array[y][x][2]<<16)+((int)bmp_file_array[y][x][1]<<8)+(int)bmp_file_array[y][x][0]);
}
}
return(0);
}
// DMA2axis.cpp
// 2021/04/08 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
int DMA2axis(volatile ap_int<32> *in, hls::stream<ap_axis<32,1,1,1> >& outs,
int x_size, int y_size){
#pragma HLS INTERFACE s_axilite port=y_size
#pragma HLS INTERFACE s_axilite port=x_size
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE m_axi depth=480000 port=in offset=slave
ap_axis<32,1,1,1> out_val;
for(int y=0; y<y_size; y++){
#pragma HLS LOOP_TRIPCOUNT avg=600 max=1080 min=480
for(int x=0; x<x_size; x++){
#pragma HLS LOOP_TRIPCOUNT avg=800 max=1920 min=640
#pragma HLS PIPELINE II=1
out_val.data = in[y*x_size+x];
if(x==0 && y==0)
out_val.user = 1;
else
out_val.user = 0;
if(x == x_size-1)
out_val.last = 1;
else
out_val.last = 0;
outs << out_val;
}
}
return(0);
}
// DMA2axis_tb.cpp
// 2021/04/08 by marsee
//
#include <stdint.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
int DMA2axis(volatile ap_int<32> *in, hls::stream<ap_axis<32,1,1,1> >& outs,
int x_size, int y_size);
int main(){
hls::stream<ap_axis<32,1,1,1> > outs;
ap_axis<32,1,1,1> vals;
// 800 x 600 の領域を確保
std::vector<ap_int<32>> buf(sizeof(ap_int<32>)*800*600);
// 確保した領域にデータを入力する
for(int i=0; i<480000; i++){
buf[i] = i;
}
DMA2axis((volatile ap_int<32> *)&buf[0], outs, 800, 600);
for(int i=0; i<480000; i++){
outs >> vals;
if(vals.data != i){
printf("Error : i = %d; vals.data = %d\n", i, vals.data);
return(1);
}
}
printf("Simulation succeeded without error\n");
return(0);
}
// video_timing_param.vh
// by marsee
// 2014/07/26
parameter integer H_ACTIVE_VIDEO = (RESOLUTION=="VGA") ? 640 : // VGA 25MHz
(RESOLUTION=="SVGA") ? 800 : // SVGA 40MHz
(RESOLUTION=="XGA") ? 1024 : // XGA 65MHz
(RESOLUTION=="SXGA") ? 1280 : // SXGA 108MHz
(RESOLUTION=="HD") ? 1920 : 1920; // HD 148.5MHz
parameter integer H_FRONT_PORCH = (RESOLUTION=="VGA") ? 16 : // VGA
(RESOLUTION=="SVGA") ? 40 : // SVGA
(RESOLUTION=="XGA") ? 24 : // XGA
(RESOLUTION=="SXGA") ? 48 : // SXGA
(RESOLUTION=="HD") ? 88 : 88; // HD
parameter integer H_SYNC_PULSE = (RESOLUTION=="VGA") ? 96 : // VGA
(RESOLUTION=="SVGA") ? 128 : // SVGA
(RESOLUTION=="XGA") ? 136 : // XGA
(RESOLUTION=="SXGA") ? 112 : // SXGA
(RESOLUTION=="HD") ? 44 : 44; // HD
parameter integer H_BACK_PORCH = (RESOLUTION=="VGA") ? 48 : // VGA
(RESOLUTION=="SVGA") ? 88 : // SVGA
(RESOLUTION=="XGA") ? 160 : // XGA
(RESOLUTION=="SXGA") ? 248 : // SXGA
(RESOLUTION=="HD") ? 148 : 148; // HD
parameter integer V_ACTIVE_VIDEO = (RESOLUTION=="VGA") ? 480 : // VGA
(RESOLUTION=="SVGA") ? 600 : // SVGA
(RESOLUTION=="XGA") ? 768 : // XGA
(RESOLUTION=="SXGA") ? 1024 : // SXGA
(RESOLUTION=="HD") ? 1080 : 1080; // HD
parameter integer V_FRONT_PORCH = (RESOLUTION=="VGA") ? 11 : // VGA
(RESOLUTION=="SVGA") ? 1 : // SVGA
(RESOLUTION=="XGA") ? 2 : // XGA
(RESOLUTION=="SXGA") ? 1 : // SXGA
(RESOLUTION=="HD") ? 4 : 4; // HD
parameter integer V_SYNC_PULSE = (RESOLUTION=="VGA") ? 2 : // VGA
(RESOLUTION=="SVGA") ? 4 : // SVGA
(RESOLUTION=="XGA") ? 6 : // XGA
(RESOLUTION=="SXGA") ? 3 : // SXGA
(RESOLUTION=="HD") ? 5 : 5; // HD
parameter integer V_BACK_PORCH = (RESOLUTION=="VGA") ? 31 : // VGA
(RESOLUTION=="SVGA") ? 23 : // SVGA
(RESOLUTION=="XGA") ? 29 : // XGA
(RESOLUTION=="SXGA") ? 38 : // SXGA
(RESOLUTION=="HD") ? 36 : 36; // HD
parameter H_SUM = H_ACTIVE_VIDEO + H_FRONT_PORCH + H_SYNC_PULSE + H_BACK_PORCH;
parameter V_SUM = V_ACTIVE_VIDEO + V_FRONT_PORCH + V_SYNC_PULSE + V_BACK_PORCH;
parameter H_DISPLAY_SIZE = H_ACTIVE_VIDEO/8; // 横?桁
parameter V_DISPLAY_SIZE = V_ACTIVE_VIDEO/8; // 縦?行
parameter ALL_CHAR_SIZE = H_DISPLAY_SIZE*V_DISPLAY_SIZE;
parameter RED_DOT_POS = 15; // 15~13ビット目がRED
parameter GREEN_DOT_POS = 12; // 12~10ビット目がGREEN
parameter BLUE_DOT_POS = 9; // 9~7ビット目がBLUE
parameter COLOR_ATTRIB_WIDHT = 3; // 色情報のビット幅
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)
となったようで、 np_utils を抜かす様になったようだ。keras.utils.to_categorical(y, num_classes=None, dtype='float32')
に変更すると実行することができた。y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
# My Mnist CNN (Convolution layerの特徴マップは5個)
# Conv2D - ReLU - MaxPooling - Dence - ReLU - Dence
# 2018/05/25 by marsee
# Keras / Tensorflowで始めるディープラーニング入門 https://qiita.com/yampy/items/706d44417c433e68db0d
# のPythonコードを再利用させて頂いている
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 30
img_rows, img_cols = 28, 28
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#Kerasのバックエンドで動くTensorFlowとTheanoでは入力チャンネルの順番が違うので場合分けして書いています
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
y_train = y_train.astype('int32')
y_test = y_test.astype('int32')
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(10, kernel_size=(5, 5),
input_shape=input_shape))
model.add(Activation(activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(100))
model.add(Activation(activation='relu'))
model.add(Dense(num_classes))
model.add(Activation(activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(x_test, y_test))
と言われてしまった。ModuleNotFoundError: No module named 'nngen'
Traceback (most recent call last):
File "/home/masaaki/anaconda3/bin/conda", line 7, in <module>
from conda.cli import main
ModuleNotFoundError: No module named 'conda'
# 「ゼロから作るDeep Learning」のCNNをNNgenでハードウェア化する
参照URL”「ゼロから作るDeep Learning」の畳み込みニューラルネットワークのハードウェア化1”
https://marsee101.blog.fc2.com/blog-entry-3829.html
NNgen/nngen https://github.com/NNgen/nngen
”「ゼロから作るDeep Learning」の畳み込みニューラルネットワークのハードウェア化1”のMNIST CNNをもう一度やってみる
```python
# train_convnet.py
# 2017/06/06 FPGAによるハードウェア化をにらんで、量子化を行う by marsee
# 元になったコードは、https://github.com/oreilly-japan/deep-learning-from-scratch にあります。
# 改変したコードもMITライセンスとします。 2017/06/19 by marsee
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from trainer_int import Trainer
from simple_convnet_int import SimpleConvNet
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
# 処理に時間のかかる場合はデータを削減
#x_train, t_train = x_train[:5000], t_train[:5000]
#x_test, t_test = x_test[:1000], t_test[:1000]
#max_epochs = 5
max_epochs = 20
network = SimpleConvNet(input_dim=(1,28,28),
conv_param = {'filter_num': 10, 'filter_size': 5, 'pad': 0, 'stride': 1},
#conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01)
trainer = Trainer(network, x_train, t_train, x_test, t_test,
epochs=max_epochs, mini_batch_size=100,
optimizer='Adam', optimizer_param={'lr': 0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
'''x_testn, t_testn = x_test[:500], t_test[:500]
test_accn = network.accuracy_msg(x_testn, t_testn)
print(test_accn)'''
'''train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
print(train_acc, test_acc)
train_acc_int = network.accuracy_int(x_train, t_train)'''
#test_acc_int = network.accuracy_int(x_test, t_test)
#print(test_acc_int)
# パラメータの保存
network.save_params("params.pkl")
print("Saved Network Parameters!")
# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
```
train loss:2.301385118388284
=== epoch:1, train acc:0.184, test acc:0.188 ===
train loss:2.300783416724263
train loss:2.2985280211636856
train loss:0.0014416474424052506
train loss:0.00029753533974773714
train loss:0.0024815949870809124
train loss:0.001080201861468442
train loss:0.009542414638567448
train loss:0.0003716470091183171
train loss:0.0010014160283579254
train loss:0.000636980396385224
train loss:0.0007263677665641715
train loss:0.0025971938006329677
train loss:0.003134432045827001
train loss:0.0030433384352522848
train loss:0.0016623108178602428
train loss:0.004724656919902167
train loss:0.002754500627006845
train loss:0.00028287348906820743
train loss:0.0020828163320118203
train loss:0.0031006529569975816
train loss:0.005907909665949748
train loss:0.0037946706647029875
train loss:0.0007289706523717551
train loss:0.0006987468668795674
train loss:0.0027144383364796636
train loss:0.006576938863826107
=============== Final Test Accuracy ===============
test acc:0.9877
Saved Network Parameters!

一旦落としてからもう一度やる時のロード関数
```python
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from trainer_int import Trainer
from simple_convnet_int import SimpleConvNet
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
# 処理に時間のかかる場合はデータを削減
#x_train, t_train = x_train[:5000], t_train[:5000]
#x_test, t_test = x_test[:1000], t_test[:1000]
#max_epochs = 5
max_epochs = 20
network = SimpleConvNet(input_dim=(1,28,28),
conv_param = {'filter_num': 10, 'filter_size': 5, 'pad': 0, 'stride': 1},
#conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01)
network.load_params("params.pkl")
```
```python
test_acc_int = network.accuracy_int(x_test, t_test)
print(test_acc_int)
```
0.9833
「ゼロから作るDeep Learning」のMNIST CNNの重みやバイアスの配列の形状を見て、reshapeでNNgenに合わせる
```python
print(network.params['W1'].shape)
print(network.params['b1'].shape)
print(network.params['W2'].shape)
print(network.params['b2'].shape)
print(network.params['W3'].shape)
print(network.params['b3'].shape)
W1n = network.params['W1'].transpose(0,2,3,1)
print(W1n.shape)
print(np.max(W1n))
print(np.min(W1n))
B1n = network.params['b1']
print(np.max(B1n))
print(np.min(B1n))
W2n=network.params['W2'].transpose(1,0)
print(W2n.shape)
print(np.max(W2n))
print(np.min(W2n))
B2n = network.params['b2']
print(np.max(B2n))
print(np.min(B2n))
W3n=network.params['W3'].transpose(1,0)
print(W3n.shape)
print(np.max(W3n))
print(np.min(W3n))
B3n = network.params['b3']
print(np.max(B3n))
print(np.min(B3n))
```
(10, 1, 5, 5)
(10,)
(1440, 100)
(100,)
(100, 10)
(10,)
(10, 5, 5, 1)
0.6205409499741875
-0.9768615311384286
0.001046327841590367
-0.43582685576224633
(100, 1440)
0.8932422073086069
-0.9514574018404229
0.17520125723869304
-0.12940758873286193
(10, 100)
0.3224653381346921
-0.6238471654267962
0.09732122727552153
-0.07801633865297178
NNgenのオペレータを使用したMNIST CNNの実装
```python
from __future__ import absolute_import
from __future__ import print_function
import sys
import os
import nngen as ng
# data types
act_dtype = ng.int32
weight_dtype = ng.int8
bias_dtype = ng.int16
scale_dtype = ng.int8
batchsize = 1
# input
input_layer = ng.placeholder(dtype=ng.int32,
shape=(batchsize, 28, 28, 1), # N, H, W, C
name='input_layer')
# layer 0: conv2d (with bias and scale (= batchnorm)), relu, max_pool
wn0 = ng.variable(dtype=weight_dtype,
shape=(10, 5, 5, 1), # Och, Ky, Kx, Ich
name='wn0')
bn0 = ng.variable(dtype=bias_dtype,
shape=(wn0.shape[0],), name='bn0')
sn0 = ng.variable(dtype=scale_dtype,
shape=(wn0.shape[0],), name='sn0')
a0 = ng.conv2d(input_layer, wn0,
strides=(1, 1, 1, 1),
bias=bn0,
scale=sn0,
padding='VALID',
act_func=ng.relu,
dtype=act_dtype,
sum_dtype=ng.int32)
a0p = ng.max_pool_serial(a0,
ksize=(1, 2, 2, 1),
strides=(1, 2, 2, 1))
a0r = ng.reshape(a0p, [batchsize, -1])
# layer 1: full-connection, relu
wn1 = ng.variable(weight_dtype,
shape=(100, a0r.shape[-1]),
name='wn1')
bn1 = ng.variable(bias_dtype,
shape=(wn1.shape[0],),
name='bn1')
sn1 = ng.variable(scale_dtype,
shape=(wn1.shape[0],),
name='sn1')
a1 = ng.matmul(a0r, wn1,
bias=bn1,
scale=sn1,
transposed_b=True,
act_func=ng.relu,
dtype=act_dtype,
sum_dtype=ng.int32)
# layer 2: full-connection, relu
wn2 = ng.variable(weight_dtype,
shape=(10, a1.shape[-1]),
name='wn2')
bn2 = ng.variable(bias_dtype,
shape=(wn2.shape[0],),
name='bn2')
sn2 = ng.variable(scale_dtype,
shape=(wn2.shape[0],),
name='sn2')
# output
output_layer = ng.matmul(a1, wn2,
bias=bn2,
scale=sn2,
transposed_b=True,
name='output_layer',
dtype=act_dtype,
sum_dtype=ng.int32)
```
NNgenのMIST CNNの重みやバイアスの配列の形状を確認する
```python
print(wn0.shape)
print(bn0.shape)
print(wn1.shape)
print(bn1.shape)
print(wn2.shape)
print(bn2.shape)
print(a0.shape)
print(a0p.shape)
print(a0r.shape)
```
(10, 5, 5, 1)
(10,)
(100, 1440)
(100,)
(10, 100)
(10,)
(1, 24, 24, 10)
(1, 12, 12, 10)
(1, 1440)
「ゼロから作るDeep Learning」のMNIST CNNの重みやバイアスを整数化するために128を乗算するためのテスト
```python
print(W1n[0][0][0][0])
print(W1n[0][0][1][0])
W1n2 = W1n * 127.9
print(W1n2[0][0][0][0])
print(W1n2[0][0][1][0])
```
0.10187240072522206
0.08383048453038348
13.029480052755902
10.721918971436049
「ゼロから作るDeep Learning」のMNIST CNNの重みやバイアスを整数化するために128を乗算する
```python
W1n2 = W1n * 127.9
print(np.max(W1n2))
print(np.min(W1n2))
B1n2 = B1n * 16383.9
print(np.max(B1n2))
print(np.min(B1n2))
W2n2 = W2n * 127.9
B2n2 = B2n * 16383.9
W3n2 = W3n * 127.9
B3n2 = B3n * 16383.9
```
79.36718750169858
-124.94058983260501
17.142930723832414
-7140.5436221230675
NNgenのMIST CNNの重みやバイアスに「ゼロから作るDeep Learning」のMNIST CNNの重みやバイアスの値を代入する
Aout = ((Ain * W + bias) * scale) >> rshift_out だそうなので、scaleは要素がすべて1の配列とする
```python
wn0_value = W1n2.astype(np.int8)
wn0.set_value(wn0_value)
print(wn0_value[0][0][0][0])
print(wn0_value[0][0][1][0])
bn0_value = B1n2.astype(np.int16)
bn0.set_value(bn0_value)
sn0_value = np.ones(sn0.shape, dtype=np.int8)
sn0.set_value(sn0_value)
print(sn0_value[0])
wn1_value = W2n2.astype(np.int8)
wn1.set_value(wn1_value)
bn1_value = B2n2.astype(np.int16)
bn1.set_value(bn1_value)
sn1_value = np.ones(sn1.shape, dtype=np.int8)
sn1.set_value(sn1_value)
wn2_value = W3n2.astype(np.int8)
wn2.set_value(wn2_value)
bn2_value = B3n2.astype(np.int16)
bn2.set_value(bn2_value)
sn2_value = np.ones(sn2.shape, dtype=np.int8)
sn2.set_value(sn2_value)
```
13
10
1
ハードウェア属性の割当
```python
# conv2d, matmul
# par_ich: parallelism in input-channel
# par_och: parallelism in output-channel
# par_col: parallelism in pixel column
# par_row: parallelism in pixel row
# cshamt_out: right shift amount after applying bias/scale
par_ich = 2
par_och = 2
cshamt_out = weight_dtype.width - 1
a0.attribute(par_ich=par_ich, par_och=par_och,
cshamt_out=0)
a1.attribute(par_ich=par_ich, par_och=par_och,
cshamt_out=0)
output_layer.attribute(par_ich=par_ich, par_och=par_och,
cshamt_out=weight_dtype.width +7)
# max_pool
# par: parallelism in in/out channel
par = par_och
a0p.attribute(par=par)
```
NNgenデータフローをソフトウェアとして実行して、DNNモデルの動作を確認
```python
print(t_test[0])
print(t_test[1])
print(t_test[2])
print(t_test[3])
print(t_test[4])
print(x_test[0].shape)
```
7
2
1
0
4
(1, 28, 28)
```python
input_layer_value = x_test[1].transpose(1,2,0)
input_layer_value = input_layer_value.reshape(input_layer.shape)
print(input_layer_value.shape)
input_layer_value = input_layer_value * 127.9
input_layer_value = input_layer_value.astype(np.int8)
eval_outs = ng.eval([output_layer], input_layer=input_layer_value)
output_layer_value = eval_outs[0]
print(output_layer_value)
```
(1, 28, 28, 1)
[[-112 -155 -19 7 -57 29 -42 -275 37 -118]]
```python
print(t_test[0:5])
print(network.predict_int(x_test[0:5]))
```
[7 2 1 0 4]
[[ -8.59375 -12.0625 -5.34375 -2.25 -29.25 -12.09375 -30.8125
16.65625 -9.90625 -1.1875 ]
[ -8.59375 3.25 18.0625 -4.59375 -32. -12.375 -7.40625
-23.46875 -12.3125 -23.75 ]
[-14.15625 4.71875 -5.96875 -8.21875 -3.84375 -9.8125 -11.34375
-3.90625 -0.375 -5.53125]
[ 15.25 -12.4375 -6.40625 -6.8125 -16.84375 -5.03125 -0.3125
0.625 -10.71875 -5.84375]
[-20.28125 -13.0625 -9.3125 -10.84375 12.5 -6.03125 -9.46875
-6.5625 -5.375 2.28125]]
「ゼロから作るDeep Learning」の畳み込みニューラルネットワークでNNgen用に変換した重みとバイアスを使って正しい推論ができるかどうか?を検証する
```python
network.params['W1'] = np.int32(network.params['W1'] * 127.9)
network.params['b1'] = np.int32(network.params['b1'] * 16383.9)
network.params['W2'] = np.int32(network.params['W2'] * 127.9)
network.params['b2'] = np.int32(network.params['b2'] * 16383.9)
network.params['W3'] = np.int32(network.params['W3'] * 127.9)
network.params['b3'] = np.int32(network.params['b3'] * 16383.9)
```
```python
print(np.max(network.params['W1']))
print(np.min(network.params['W1']))
print(np.max(network.params['b1']))
print(np.min(network.params['b1']))
print(np.max(network.params['W2']))
print(np.min(network.params['W2']))
print(np.max(network.params['b2']))
print(np.min(network.params['b2']))
print(np.max(network.params['W3']))
print(np.min(network.params['W3']))
print(np.max(network.params['b3']))
print(np.min(network.params['b3']))
```
79
-124
17
-7140
114
-121
2870
-2120
41
-79
1594
-1278
```python
print(t_test[0:5])
x_test_rshft7 = np.int32(x_test * 127.9)
predict_result = network.predict(x_test_rshft7[0:5])/512
predict_result = np.int32(predict_result)
print(predict_result)
```
[7 2 1 0 4]
[[-2 -3 -1 -1 -8 -4 -9 5 -3 0]
[-2 0 4 -2 -9 -3 -1 -8 -2 -6]
[-3 1 -1 -2 -1 -3 -3 -1 0 -2]
[ 4 -3 -2 -1 -4 -1 0 0 -2 -1]
[-4 -3 -2 -3 4 -2 -3 -1 -1 0]]
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | 1 | 2 | 3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | 28 | 29 | 30 | - |