// affine_layer1.h
// 2018/02/25 by marsee
//
#ifndef __AFFINE_LAYER1_H__
#define __AFFINE_LAYER1_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed2_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float2_axis{
struct data {
float data0;
float data1;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_MIDDLE_LAYER 100
typedef struct {
ap_fixed<19,7,AP_TRN,AP_WRAP> data [NUMBER_OF_MIDDLE_LAYER];
} mdata_type;
typedef struct {
float data [NUMBER_OF_MIDDLE_LAYER];
} fmdata_type;
typedef ap_fixed<19,7,AP_TRN,AP_WRAP> affine_type;
#define V_PRE_LAYER_HIGHT 3
#define H_PRE_LAYER_WIDTH 26
#endif
// affine_layer1.cpp
// 2018/02/26 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "affine_layer1.h"
#include "af1_weight.h"
#include "af1_bias.h"
int affine_layer1(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
mdata_type & outd){
//#pragma HLS ARRAY_PARTITION variable=af1_weight complete dim=1
#pragma HLS INTERFACE ap_hs register port=outd
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
#pragma HLS DATA_PACK variable=outd
ap_fixed2_axis<16,6,1,1,1> stdata;
affine_type dot[100];
//#pragma HLS ARRAY_PARTITION variable=dot complete dim=1
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for (int y=0; y<V_PRE_LAYER_HIGHT; y++){
Loop3: for (int x=0; x<H_PRE_LAYER_WIDTH; x++){
//#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
Loop4: for (int col=0; col<100; col++){
//#pragma HLS PIPELINE II=1
if (x==0 && y==0) // 最初は 0 にクリアする
dot[col] = 0;
dot[col] += stdata.data.data0 * af1_weight[y*H_PRE_LAYER_WIDTH+x][col];
dot[col] += stdata.data.data1 * af1_weight[V_PRE_LAYER_HIGHT*H_PRE_LAYER_WIDTH+y*H_PRE_LAYER_WIDTH+x][col];
if (y==V_PRE_LAYER_HIGHT-1 && x==H_PRE_LAYER_WIDTH-1){ // 最後はバイアスを加算する
dot[col] += af1_bias[col];
if(dot[col] < 0) // ReLU
dot[col] = 0;
}
}
}
}
Loop5: for (int col=0; col<100; col++){ // 出力にコピー
#pragma HLS UNROLL
outd.data[col] = dot[col];
}
return(0);
}
そしてその、IOレベルのインターフェースは ap_hs にしている。#pragma HLS DATA_PACK variable=outd
#pragma HLS INTERFACE ap_hs register port=outd
Failed checking during preprocessing.
while executing
"source C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO_Z7-20/test/affine_layer1/solution1/csynth.tcl"
invoked from within
"hls::main C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO_Z7-20/test/affine_layer1/solution1/csynth.tcl"
("uplevel" body line 1)
invoked from within
"uplevel 1 hls::main {*}$args"
(procedure "hls_proc" line 5)
invoked from within
"hls_proc $argv"
Finished C synthesis.
affine_layer1/affine_layer1.cpp:38:24: error: no viable overloaded '='
dot[col] = 0;
affine_layer1/affine_layer1.cpp:40:23: error: no viable overloaded '+='
dot[col] += stdata.data.data0 * af1_weight[y*26 +x][col];
typedef struct {
ap_fixed<19,7,AP_TRN,AP_WRAP> data [NUMBER_OF_MIDDLE_LAYER];
} mdata_type;
typedef ap_fixed<19,7,AP_TRN,AP_WRAP> affine_type;
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
// max_pooling.h
// 2018/02/19 by marsee
//
#ifndef __MAX_POOLING_H__
#define __MAX_POOLING_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed2_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float2_axis{
struct data {
float data0;
float data1;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define H_PIXEL_WIDTH_IN 52
#define V_PIXEL_WIDTH_IN 6
#define H_PIXEL_WIDTH_OUT 26
#define V_PIXEL_WIDTH_OUT 3
#define ARRAY_SIZE 2
#define NUMBER_OF_KERNEL 2
#define X_STRIDE 2
#define Y_STRIDE 2
typedef ap_fixed<16, 6, AP_TRN, AP_WRAP> conv_type;
#endif
// max_pooling.cpp
// 2018/02/20 by marsee
// 2018/04/20 : bug fix
// 2018/04/25 : Loop10 bug fix
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "max_pooling.h"
int max_pooling(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs){
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_fixed2_axis<16,6,1,1,1> pix;
ap_fixed2_axis<16,6,1,1,1> mp_out;
conv_type line_buf[NUMBER_OF_KERNEL][ARRAY_SIZE-1][H_PIXEL_WIDTH_IN];
#pragma HLS ARRAY_PARTITION variable=line_buf block factor=2 dim=1
#pragma HLS ARRAY_PARTITION variable=line_buf block factor=1 dim=2
conv_type pix_mat[NUMBER_OF_KERNEL][ARRAY_SIZE][ARRAY_SIZE];
#pragma HLS array_partition variable=pix_mat complete
conv_type val[NUMBER_OF_KERNEL], conv_data;
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop2: for (int y=0; y<V_PIXEL_WIDTH_IN; y++){
Loop3: for (int x=0; x<H_PIXEL_WIDTH_IN; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
Loop4: for (int n=0; n<NUMBER_OF_KERNEL; n++){
#pragma HLS UNROLL
if (n == 0)
conv_data = pix.data.data0;
else
conv_data = pix.data.data1;
// 2次元配列のデータを左シフト
Loop5 : for (int k=0; k<ARRAY_SIZE; k++){
#pragma HLS UNROLL
Loop6 : for (int m=0; m<ARRAY_SIZE-1; m++){
pix_mat[n][k][m] = pix_mat[n][k][m+1];
}
}
Loop7: for (int i=0; i<ARRAY_SIZE-1; i++){ // 以前の行のデータを line_buf から入力
pix_mat[n][i][ARRAY_SIZE-1] = line_buf[n][i][x];
}
pix_mat[n][ARRAY_SIZE-1][ARRAY_SIZE-1] = conv_data; // pix_mat の最後に新しいデータを入力
Loop8: for (int i=0; i<ARRAY_SIZE-2; i++){ // 行の入れ替え
line_buf[n][i][x] = line_buf[n][i+1][x];
}
line_buf[n][ARRAY_SIZE-2][x] = conv_data;
// max pooling の検索
Loop9 : for (int k=0; k<ARRAY_SIZE; k++){
#pragma HLS UNROLL
Loop10 : for (int m=0; m<ARRAY_SIZE; m++){
if (k==0 && m==0){
val[n] = pix_mat[n][k][m];
} else if (val[n] < pix_mat[n][k][m]){
val[n] = pix_mat[n][k][m];
}
}
}
if (n == 0)
mp_out.data.data0 = val[0];
else
mp_out.data.data1 = val[1];
if (x==X_STRIDE-1 && y==Y_STRIDE-1){ // 最初のデータでは、TUSERをアサートする
mp_out.user = 1;
} else {
mp_out.user = 0;
}
if (x == H_PIXEL_WIDTH_IN-1){ // 行の最後で TLAST をアサートする
mp_out.last = 1;
} else {
mp_out.last = 0;
}
}
if (x%X_STRIDE==X_STRIDE-1 && y%Y_STRIDE==Y_STRIDE-1){ // ストライド
outs << mp_out;
}
}
}
return(0);
}
// max_pooling_tb.cpp
// 2018/02/23 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "max_pooling.h"
#include "relu_output.h"
int max_pooling(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int max_pooling_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > ins;
hls::stream<float2_axis<1,1,1> > ins_soft;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outs;
hls::stream<float2_axis<1,1,1> > outs_soft;
float mp_fout[H_PIXEL_WIDTH_OUT*V_PIXEL_WIDTH_OUT][2];
conv_type mp_out[H_PIXEL_WIDTH_OUT*V_PIXEL_WIDTH_OUT][2];
ap_fixed2_axis<16,6,1,1,1> pix;
float2_axis<1,1,1> fpix;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data.data0 = (conv_type)i;
pix.data.data1 = (conv_type)i;
ins << pix;
fpix.user = 0;
fpix.data.data0 = (float)i;
fpix.data.data1 = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < V_PIXEL_WIDTH_IN; j++){
for(int i=0; i < H_PIXEL_WIDTH_IN; i++){
pix.data.data0 = relu_out[j*H_PIXEL_WIDTH_IN+i][0];
pix.data.data1 = relu_out[j*H_PIXEL_WIDTH_IN+i][1];
fpix.data.data0 = relu_fout[j*H_PIXEL_WIDTH_IN+i][0];
fpix.data.data1 = relu_fout[j*H_PIXEL_WIDTH_IN+i][1];
if (j==0 && i==0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == H_PIXEL_WIDTH_IN-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
}
max_pooling(ins, outs);
max_pooling_soft(ins_soft, outs_soft);
// outs, outs_soft を mp_out[][], relu_fout[][] に出力する
for(int j=0; j < V_PIXEL_WIDTH_OUT; j++){
for(int i=0; i < H_PIXEL_WIDTH_OUT; i++){
outs >> pix;
outs_soft >> fpix;
mp_out[j*H_PIXEL_WIDTH_OUT+i][0] = pix.data.data0;
mp_out[j*H_PIXEL_WIDTH_OUT+i][1] = pix.data.data1;
mp_fout[j*H_PIXEL_WIDTH_OUT+i][0] = fpix.data.data0;
mp_fout[j*H_PIXEL_WIDTH_OUT+i][1] = fpix.data.data1;
printf("%d, %d, data0 = %f, data1 = %f, fdata0 = %f, fdata1 = %f\n", j, i, (float)pix.data.data0, (float)pix.data.data1, fpix.data.data0, fpix.data.data1);
if ((double)pow((double)pix.data.data0-(double)fpix.data.data0,(double)2) > 4 ||
(double)pow((double)pix.data.data1-(double)fpix.data.data1,(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %ld, j = %ld, HW = %f, %f, SW = %f, %f\n", i, j, (float)pix.data.data0, (float)pix.data.data1, fpix.data.data0, fpix.data.data1);
//return(1);
}
}
}
cout << "Success HW and SW results match" << endl;
cout << endl;
// max_pooling の結果をヘッダファイルに出力
ofstream OH("max_pooling_output.h");
OH << "// max_pooling_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __MAX_POOLING_OUTPUT_H__" << endl;
OH << "#define __MAX_POOLING_OUTPUT_H__" << endl;
OH << endl;
OH << "const float mp_fout[" << V_PIXEL_WIDTH_OUT*H_PIXEL_WIDTH_OUT << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<V_PIXEL_WIDTH_OUT ; y++){
for (int x=0; x<H_PIXEL_WIDTH_OUT ; x++){
OH << " {" << fixed << setprecision(12) << mp_fout[H_PIXEL_WIDTH_OUT*y+x][0] << ", "
<< mp_fout[H_PIXEL_WIDTH_OUT*y+x][1] << "}";
if (y==V_PIXEL_WIDTH_OUT-1 && x==H_PIXEL_WIDTH_OUT-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> mp_out[" << V_PIXEL_WIDTH_OUT*H_PIXEL_WIDTH_OUT << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<V_PIXEL_WIDTH_OUT ; y++){
for (int x=0; x<H_PIXEL_WIDTH_OUT ; x++){
OH << " {" << fixed << setprecision(12) << (float)mp_out[H_PIXEL_WIDTH_OUT*y+x][0] << ", "
<< (float)mp_out[H_PIXEL_WIDTH_OUT*y+x][1] << "}";
if (y==V_PIXEL_WIDTH_OUT -1 && x==H_PIXEL_WIDTH_OUT -1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int max_pooling_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs){
float2_axis<1,1,1> fpix;
float fpixd_ary[NUMBER_OF_KERNEL][V_PIXEL_WIDTH_IN][H_PIXEL_WIDTH_IN];
float fval[NUMBER_OF_KERNEL];
do {
// user が 1になった時にフレームがスタートする
ins >> fpix;
} while(fpix.user == 0);
for (int y=0; y<V_PIXEL_WIDTH_IN; y++){
for (int x=0; x<H_PIXEL_WIDTH_IN; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> fpix;
fpixd_ary[0][y][x] = fpix.data.data0;
fpixd_ary[1][y][x] = fpix.data.data1;
}
}
for (int y=0; y<V_PIXEL_WIDTH_IN-1; y+=2){
for (int x=0; x<H_PIXEL_WIDTH_IN-1; x+=2){
for(int p=0; p<2; p++){
for(int m=0; m<2; m++){
for(int n=0; n<2; n++){
if(m==0 && n==0){
fval[p] = fpixd_ary[p][y][x];
} else if(fval[p] < fpixd_ary[p][y+m][x+n]){
fval[p] = fpixd_ary[p][y+m][x+n];
}
}
}
}
fpix.data.data0 = fval[0];
fpix.data.data1 = fval[1];
if(x==0 && y==0)
fpix.user = 1;
else
fpix.user = 0;
if(x==V_PIXEL_WIDTH_OUT-2)
fpix.last = 1;
else
fpix.last = 0;
outs << fpix;
}
}
return(0);
}
F1起動前準備&EC2インスタンスの起動
FPGAアクセラレーション体験
SDAccelによるF1アプリケーション開発
後⽚付けとまとめ
// relu.h
// 2018/02/20 by marsee
//
#ifndef __RELU_H__
#define __RELU_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed2_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float2_axis{
struct data {
float data0;
float data1;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define HORIZONTAL_PIXEL_WIDTH 52
#define VERTICAL_PIXEL_WIDTH 6
#define ARRAY_SIZE 2
#define NUMBER_OF_KERNEL 2
typedef ap_fixed<16, 6, AP_TRN, AP_WRAP> conv_type;
#endif
// relu.cpp
// 2018/02/20 by marsee
// 2018/02/23 : 0 を conv_type(0.0) に変更
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "relu.h"
int relu(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs){
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_fixed2_axis<16,6,1,1,1> pix;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
if (pix.data.data0 < conv_type(0.0)) // データが 0 以下だったら 0 にする
pix.data.data0 = conv_type(0.0);
if (pix.data.data1 < conv_type(0.0)) // データが 0 以下だったら 0 にする
pix.data.data1 = conv_type(0.0);
outs << pix;
}
}
return(0);
}
// relu_tb.cpp
// 2018/02/20 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "relu.h"
#include "conv_layer_output.h"
int relu(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int relu_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > ins;
hls::stream<float2_axis<1,1,1> > ins_soft;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outs;
hls::stream<float2_axis<1,1,1> > outs_soft;
float relu_fout[312][2];
conv_type relu_out[312][2];
ap_fixed2_axis<16,6,1,1,1> pix;
float2_axis<1,1,1> fpix;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data.data0 = (conv_type)i;
pix.data.data1 = (conv_type)i;
ins << pix;
fpix.user = 0;
fpix.data.data0 = (float)i;
fpix.data.data1 = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < VERTICAL_PIXEL_WIDTH; j++){
for(int i=0; i < HORIZONTAL_PIXEL_WIDTH; i++){
pix.data.data0 = conv_layer_out[j*HORIZONTAL_PIXEL_WIDTH+i][0];
pix.data.data1 = conv_layer_out[j*HORIZONTAL_PIXEL_WIDTH+i][1];
fpix.data.data0 = conv_layer_fout[j*HORIZONTAL_PIXEL_WIDTH+i][0];
fpix.data.data1 = conv_layer_fout[j*HORIZONTAL_PIXEL_WIDTH+i][1];
if (j==0 && i==0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == HORIZONTAL_PIXEL_WIDTH-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
}
relu(ins, outs);
relu_soft(ins_soft, outs_soft);
// outs, outs_soft を relu_out[][], relu_fout[][] に出力する
for(int j=0; j < VERTICAL_PIXEL_WIDTH; j++){
for(int i=0; i < HORIZONTAL_PIXEL_WIDTH; i++){
outs >> pix;
outs_soft >> fpix;
relu_out[j*HORIZONTAL_PIXEL_WIDTH+i][0] = pix.data.data0;
relu_out[j*HORIZONTAL_PIXEL_WIDTH+i][1] = pix.data.data1;
relu_fout[j*HORIZONTAL_PIXEL_WIDTH+i][0] = fpix.data.data0;
relu_fout[j*HORIZONTAL_PIXEL_WIDTH+i][1] = fpix.data.data1;
if ((double)pow((double)pix.data.data0-(double)fpix.data.data0,(double)2) > 4 ||
(double)pow((double)pix.data.data1-(double)fpix.data.data1,(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %ld, j = %ld, HW = %f, %f, SW = %f, %f\n", i, j, (float)pix.data.data0, (float)pix.data.data1, fpix.data.data0, fpix.data.data1);
return(1);
}
}
}
cout << "Success HW and SW results match" << endl;
cout << endl;
// ReLU の結果をヘッダファイルに出力
ofstream OH("relu_output.h");
OH << "// relu_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __RELU_OUTPUT_H__" << endl;
OH << "#define __RELU_OUTPUT_H__" << endl;
OH << endl;
OH << "const float relu_fout[" << VERTICAL_PIXEL_WIDTH*HORIZONTAL_PIXEL_WIDTH << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<VERTICAL_PIXEL_WIDTH ; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH ; x++){
OH << " {" << fixed << setprecision(12) << relu_fout[HORIZONTAL_PIXEL_WIDTH*y+x][0] << ", "
<< relu_fout[HORIZONTAL_PIXEL_WIDTH*y+x][1] << "}";
if (y==VERTICAL_PIXEL_WIDTH-1 && x==HORIZONTAL_PIXEL_WIDTH-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> relu_out[" << VERTICAL_PIXEL_WIDTH*HORIZONTAL_PIXEL_WIDTH << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<VERTICAL_PIXEL_WIDTH ; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH ; x++){
OH << " {" << fixed << setprecision(12) << (float)relu_out[HORIZONTAL_PIXEL_WIDTH*y+x][0] << ", "
<< (float)relu_out[HORIZONTAL_PIXEL_WIDTH*y+x][1] << "}";
if (y==VERTICAL_PIXEL_WIDTH -1 && x==HORIZONTAL_PIXEL_WIDTH -1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int relu_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs){
float2_axis<1,1,1> fpix;
do {
// user が 1になった時にフレームがスタートする
ins >> fpix;
} while(fpix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> fpix; // AXI4-Stream からの入力
if (fpix.data.data0 < 0.0) // データが 0 以下だったら 0 にする
fpix.data.data0 = 0.0;
if (fpix.data.data1 < 0.0) // データが 0 以下だったら 0 にする
fpix.data.data1 = 0.0;
outs << fpix;
}
}
return(0);
}
// conv_layer_tb.cpp
// 2018/02/13 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "conv_layer.h"
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "bmp_header.h"
int conv_layer(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int conv_layer_soft(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs);
#define BMP_FILE_NAME "straight_RED_rect0_00_rgb.bmp"
int main(){
using namespace std;
hls::stream<ap_axiu<32,1,1,1> > ins;
hls::stream<ap_axiu<32,1,1,1> > ins_soft;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outs;
hls::stream<float2_axis<1,1,1> > outs_soft;
ap_axiu<32,1,1,1> pix;
ap_fixed2_axis<16,6,1,1,1> vals;
float2_axis<1,1,1> vals_soft;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpw, *fbmpwf;
int *rd_bmp, *hw_conv, *sw_conv;
float *hw_convf;
float *sw_convf;
int blue, green, red;
ap_uint<2> r_l;
char fhname[100];
char fsname[100];
if ((fbmpr = fopen(BMP_FILE_NAME, "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open straight_RED_rect0_00.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(char), 2, fbmpr);
fread(&bmpfhr.bfSize, sizeof(long), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
if ((hw_conv =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate hw_conv0 memory\n");
exit(1);
}
if ((sw_conv =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate sw_conv0 memory\n");
exit(1);
}
if ((hw_convf =(float *)malloc(sizeof(float) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate hw_conv0 memory\n");
exit(1);
}
if ((sw_convf =(float *)malloc(sizeof(float) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate sw_conv0 memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data = i;
ins << pix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
pix.data = (ap_uint<32>)rd_bmp[(j*bmpihr.biWidth)+i];
if (j==0 && i==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (i == bmpihr.biWidth-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
ins_soft << pix;
}
}
conv_layer(ins, outs);
conv_layer_soft(ins_soft, outs_soft);
// 画像サイズの縮小(畳み込みをすると行、列共に -4
bmpfhr.bfSize = (HORIZONTAL_PIXEL_WIDTH-4) * (VERTICAL_PIXEL_WIDTH-4) * 3 + 54;
bmpihr.biHeight = VERTICAL_PIXEL_WIDTH - 4;
bmpihr.biWidth = HORIZONTAL_PIXEL_WIDTH - 4;
// ハードウェアとソフトウェアのラプラシアン・フィルタの値のチェック
cout << endl;
cout << "outs" << endl;
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
outs >> vals;
outs_soft >> vals_soft;
out_type val0 = vals.data.data0;
out_type val1 = vals.data.data1;
float val_soft0 = vals_soft.data.data0;
float val_soft1 = vals_soft.data.data1;
hw_conv[(j*bmpihr.biWidth)+i] = ((int)val0+32)*4; // 32を足して負の符号を排除し、整数部6ビットなので、2ビット分補正する
hw_conv[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = ((int)val1+32)*4;
sw_conv[(j*bmpihr.biWidth)+i] = ((int)val_soft0+32)*4;
sw_conv[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = ((int)val_soft1+32)*4;
hw_convf[(j*bmpihr.biWidth)+i] = (float)val0; // 32を足して負の符号を排除し、整数部6ビットなので、2ビット分補正する
hw_convf[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = (float)val1;
sw_convf[(j*bmpihr.biWidth)+i] = val_soft0;
sw_convf[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = val_soft1;
if ((double)pow((double)val0-(double)val_soft0,(double)2) > 4 || (double)pow((double)val1-(double)val_soft1,(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %ld, j = %ld, HW = %f, %f, SW = %f, %f\n", i, j, (float)val0, (float)val1, val_soft0, val_soft1);
//return(1);
}
printf("HW and SW results i = %ld, j = %ld, HW = %f, %f, SW = %f, %f\n", i, j, (float)val0, (float)val1, val_soft0, val_soft1);
//if (vals.last)
//cout << "AXI-Stream is end" << endl;
}
}
cout << "Success HW and SW results match" << endl;
cout << endl;
// ハードウェアの畳み込み演算の結果を temp_conv0.bmp, temp_conv1.bmp に出力する
for (int k=0; k<2; k++){
if (k==0){
if ((fbmpw=fopen("temp_conv0.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv0.bmp by binary write mode\n");
exit(1);
}
} else {
if ((fbmpw=fopen("temp_conv1.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv1.bmp by binary write mode\n");
exit(1);
}
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
if (k == 0){
blue = hw_conv[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
} else {
blue = hw_conv[(bmpihr.biWidth * bmpihr.biHeight)+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
}
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
}
// ソフトウェアの畳み込み演算の結果を temp_conv_float0.bmp, temp_conv_float1.bmp に出力する
for(int k=0; k<2; k++){
if (k == 0){
if ((fbmpwf=fopen("temp_conv_float0.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv_float0.bmp by binary write mode\n");
exit(1);
}
} else {
if ((fbmpwf=fopen("temp_conv_float1.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv_float1.bmp by binary write mode\n");
exit(1);
}
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpwf);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpwf);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpwf);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpwf);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
if (k == 0){
blue = sw_conv[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
} else {
blue = sw_conv[(bmpihr.biWidth * bmpihr.biHeight)+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
}
fputc(blue, fbmpwf);
fputc(green, fbmpwf);
fputc(red, fbmpwf);
}
}
fclose(fbmpwf);
}
// ヘッダ出力
ofstream OH("conv_layer_output.h");
OH << "// conv_layer_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __CONV_LAYER_OUTPUT_H__" << endl;
OH << "#define __CONV_LAYER_OUTPUT_H__" << endl;
OH << endl;
OH << "const float conv_layer_fout[" << bmpihr.biHeight*bmpihr.biWidth << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
OH << " {" << fixed << setprecision(12) << sw_convf[bmpihr.biWidth*y+x] << ", "
<< sw_convf[bmpihr.biHeight*bmpihr.biWidth+bmpihr.biWidth*y+x] << "}";
if (y==bmpihr.biHeight-1 && x==bmpihr.biWidth-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> conv_layer_out[" << bmpihr.biHeight*bmpihr.biWidth << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
OH << " {" << hw_convf[bmpihr.biWidth*y+x] << ", "
<< hw_convf[bmpihr.biHeight*bmpihr.biWidth+bmpihr.biWidth*y+x] << "}";
if (y==bmpihr.biHeight-1 && x==bmpihr.biWidth-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
free(rd_bmp);
free(hw_conv);
free(sw_conv);
free(hw_convf);
free(sw_convf);
return(0);
}
int conv_layer_soft(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs){
ap_axiu<32,1,1,1> pix;
float2_axis<1,1,1> conv_out;
hls::LineBuffer<ARRAY_SIZE-1, HORIZONTAL_PIXEL_WIDTH, float> linebuf;
hls::Window<ARRAY_SIZE, ARRAY_SIZE, float> mbuf;
float ap_uf_pix;
float val;
do {
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
ap_uf_pix = (float)(pix.data & 0xff) / 256.0;
//printf("ap_uf_pix_soft = %f\n", ap_uf_pix);
mbuf.shift_pixels_left(); // mbuf の列を1ビット左シフト
for(int i=0; i<ARRAY_SIZE-1; i++){
mbuf.insert_pixel(linebuf.getval(i,x), i, ARRAY_SIZE-1);
}
mbuf.insert_pixel(ap_uf_pix, ARRAY_SIZE-1, ARRAY_SIZE-1);
// LineBuffer の更新
linebuf.shift_pixels_up(x);
linebuf.insert_bottom_row(ap_uf_pix, x);
// conv_layer の演算
for (int k=0; k<NUMBER_OF_KERNEL; k++){
val=0.0;
for (int j=0; j<ARRAY_SIZE; j++){
for (int i=0; i<ARRAY_SIZE; i++){
val += mbuf.getval(j,i) * conv1_fweight[k][0][j][i];
}
}
val += conv1_fbias[k];
if(k==0)
conv_out.data.data0 = val;
else
conv_out.data.data1 = val;
}
// 最初のARRAY_SIZE-1行とその他の行の最初のARRAY_SIZE-1列は無効データなので出力しない
if (x<(ARRAY_SIZE-1) || y<(ARRAY_SIZE-1))
continue;
else { // 有効なデータの時
if (x==(ARRAY_SIZE-1) && y==(ARRAY_SIZE-1)){ // 最初のデータでは、TUSERをアサートする
conv_out.user = 1;
} else {
conv_out.user = 0;
}
if (x == (HORIZONTAL_PIXEL_WIDTH-1)){ // 行の最後で TLAST をアサートする
conv_out.last = 1;
} else {
conv_out.last = 0;
}
outs << conv_out;
}
}
}
return(0);
}
// conv1_weight.h
// 2017/12/06 10:54:11 by marsee
const float conv1_fweight[2][1][5][5] =
{
{
{
{0.764403421227,0.658424746889,0.595604201652,0.554044871161,0.367767232883},
{0.582414155838,0.413274869036,0.31659268154,0.3508390519,0.331194144626},
{0.589182274309,0.462105790282,-0.241299390378,-0.10093021104,0.233291757594},
{0.792411286764,0.315893121865,0.0397628864727,0.356726636694,0.426826537165},
{0.634481192118,0.651475977113,0.688949928547,0.707285991358,0.681420943406}
}
}
,
{
{
{0.00564732125401,-0.012955272371,-0.0231571581103,-0.00289983746176,0.0281080593816},
{-0.0115360072012,0.00253310449813,-0.00860163957467,0.00112793810127,-0.01455040341},
{-0.00881717612899,-0.00902248113722,0.0004194288468,0.00110240651437,-0.0140454059394},
{0.00271556513713,-0.00307791921855,0.000117170379207,-0.00891721414879,0.0173026634286},
{0.000808453898046,0.000116327205532,-0.00275343050716,-0.00683461392689,-0.0169130858704}
}
}
};
const ap_fixed<9, 1, AP_TRN, AP_WRAP> conv1_weight[2][1][5][5] =
{
{
{
{0.765625,0.66015625,0.59375,0.5546875,0.3671875},
{0.58203125,0.4140625,0.31640625,0.3515625,0.33203125},
{0.58984375,0.4609375,-0.23828125,-0.09765625,0.234375},
{0.79296875,0.31640625,0.0390625,0.35546875,0.42578125},
{0.6328125,0.65234375,0.6875,0.70703125,0.6796875}
}
}
,
{
{
{0.00390625,-0.0078125,-0.01953125,0.0,0.02734375},
{-0.0078125,0.00390625,-0.00390625,0.0,-0.01171875},
{-0.00390625,-0.00390625,0.0,0.0,-0.01171875},
{0.00390625,0.0,0.0,-0.00390625,0.015625},
{0.0,0.0,0.0,-0.00390625,-0.01171875}
}
}
};
// conv1_bias.h
// 2017/12/06 10:54:20 by marsee
const float conv1_fbias[2] = {
-2.37814890843, -0.00283377712987
};
const ap_fixed<9, 1, AP_TRN, AP_WRAP> conv1_bias[2] = {
-1.0, 0.0
};
// conv_layer_tb.cpp
// 2018/02/13 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "conv_layer.h"
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "bmp_header.h"
int conv_layer(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int conv_layer_soft(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs);
#define BMP_FILE_NAME "straight_RED_rect0_00_rgb.bmp"
int main(){
using namespace std;
hls::stream<ap_axiu<32,1,1,1> > ins;
hls::stream<ap_axiu<32,1,1,1> > ins_soft;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outs;
hls::stream<float2_axis<1,1,1> > outs_soft;
ap_axiu<32,1,1,1> pix;
ap_fixed2_axis<16,6,1,1,1> vals;
float2_axis<1,1,1> vals_soft;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpw, *fbmpwf;
int *rd_bmp, *hw_conv, *sw_conv;
float *hw_convf;
float *sw_convf;
int blue, green, red;
ap_uint<2> r_l;
char fhname[100];
char fsname[100];
if ((fbmpr = fopen(BMP_FILE_NAME, "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open straight_RED_rect0_00.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(char), 2, fbmpr);
fread(&bmpfhr.bfSize, sizeof(long), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
if ((hw_conv =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate hw_conv0 memory\n");
exit(1);
}
if ((sw_conv =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate sw_conv0 memory\n");
exit(1);
}
if ((hw_convf =(float *)malloc(sizeof(float) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate hw_conv0 memory\n");
exit(1);
}
if ((sw_convf =(float *)malloc(sizeof(float) * (bmpihr.biWidth * bmpihr.biHeight * NUMBER_OF_KERNEL))) == NULL){
fprintf(stderr, "Can't allocate sw_conv0 memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data = i;
ins << pix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
pix.data = (ap_uint<32>)rd_bmp[(j*bmpihr.biWidth)+i];
if (j==0 && i==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (i == bmpihr.biWidth-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
ins_soft << pix;
}
}
conv_layer(ins, outs);
conv_layer_soft(ins_soft, outs_soft);
// 画像サイズの縮小(畳み込みをすると行、列共に -4
bmpfhr.bfSize = (HORIZONTAL_PIXEL_WIDTH-4) * (VERTICAL_PIXEL_WIDTH-4) * 3 + 54;
bmpihr.biHeight = VERTICAL_PIXEL_WIDTH - 4;
bmpihr.biWidth = HORIZONTAL_PIXEL_WIDTH - 4;
// ハードウェアとソフトウェアのラプラシアン・フィルタの値のチェック
cout << endl;
cout << "outs" << endl;
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
outs >> vals;
outs_soft >> vals_soft;
out_type val0 = vals.data.data0;
out_type val1 = vals.data.data1;
float val_soft0 = vals_soft.data.data0;
float val_soft1 = vals_soft.data.data1;
hw_conv[(j*bmpihr.biWidth)+i] = ((int)val0+32)*4; // 32を足して負の符号を排除し、整数部6ビットなので、2ビット分補正する
hw_conv[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = ((int)val1+32)*4;
sw_conv[(j*bmpihr.biWidth)+i] = ((int)val_soft0+32)*4;
sw_conv[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = ((int)val_soft1+32)*4;
hw_convf[(j*bmpihr.biWidth)+i] = (float)val0; // 32を足して負の符号を排除し、整数部6ビットなので、2ビット分補正する
hw_convf[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = (float)val1;
sw_convf[(j*bmpihr.biWidth)+i] = val_soft0;
sw_convf[(bmpihr.biWidth * bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = val_soft1;
if ((double)pow((double)val0-(double)val_soft0,(double)2) > 4 || (double)pow((double)val1-(double)val_soft1,(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %ld, j = %ld, HW = %f, %f, SW = %f, %f\n", i, j, (float)val0, (float)val1, val_soft0, val_soft1);
//return(1);
}
printf("HW and SW results i = %ld, j = %ld, HW = %f, %f, SW = %f, %f\n", i, j, (float)val0, (float)val1, val_soft0, val_soft1);
//if (vals.last)
//cout << "AXI-Stream is end" << endl;
}
}
cout << "Success HW and SW results match" << endl;
cout << endl;
// ハードウェアの畳み込み演算の結果を temp_conv0.bmp, temp_conv1.bmp に出力する
for (int k=0; k<2; k++){
if (k==0){
if ((fbmpw=fopen("temp_conv0.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv0.bmp by binary write mode\n");
exit(1);
}
} else {
if ((fbmpw=fopen("temp_conv1.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv1.bmp by binary write mode\n");
exit(1);
}
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
if (k == 0){
blue = hw_conv[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
} else {
blue = hw_conv[(bmpihr.biWidth * bmpihr.biHeight)+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
}
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
}
// ソフトウェアの畳み込み演算の結果を temp_conv_float0.bmp, temp_conv_float1.bmp に出力する
for(int k=0; k<2; k++){
if (k == 0){
if ((fbmpwf=fopen("temp_conv_float0.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv_float0.bmp by binary write mode\n");
exit(1);
}
} else {
if ((fbmpwf=fopen("temp_conv_float1.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv_float1.bmp by binary write mode\n");
exit(1);
}
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpwf);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpwf);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpwf);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpwf);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
if (k == 0){
blue = sw_conv[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
} else {
blue = sw_conv[(bmpihr.biWidth * bmpihr.biHeight)+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
}
fputc(blue, fbmpwf);
fputc(green, fbmpwf);
fputc(red, fbmpwf);
}
}
fclose(fbmpwf);
}
// ヘッダ出力
ofstream OH("conv_layer_output.h");
OH << "// conv_layer_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __CONV_LAYER_OUTPUT_H__" << endl;
OH << "#define __CONV_LAYER_OUTPUT_H__" << endl;
OH << endl;
OH << "const float conv_layer_fout[" << bmpihr.biHeight*bmpihr.biWidth << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
OH << " {" << fixed << setprecision(12) << sw_convf[bmpihr.biWidth*y+x] << ", "
<< sw_convf[bmpihr.biHeight*bmpihr.biWidth+bmpihr.biWidth*y+x] << "}";
if (y==bmpihr.biHeight-1 && x==bmpihr.biWidth-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> conv_layer_out[" << bmpihr.biHeight*bmpihr.biWidth << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
OH << " {" << hw_convf[bmpihr.biWidth*y+x] << ", "
<< hw_convf[bmpihr.biHeight*bmpihr.biWidth+bmpihr.biWidth*y+x] << "}";
if (y==bmpihr.biHeight-1 && x==bmpihr.biWidth-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
free(rd_bmp);
free(hw_conv);
free(sw_conv);
free(hw_convf);
free(sw_convf);
return(0);
}
int conv_layer_soft(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs){
ap_axiu<32,1,1,1> pix;
float2_axis<1,1,1> conv_out;
hls::LineBuffer<ARRAY_SIZE-1, HORIZONTAL_PIXEL_WIDTH, float> linebuf;
hls::Window<ARRAY_SIZE, ARRAY_SIZE, float> mbuf;
float ap_uf_pix;
float val;
do {
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
ap_uf_pix = (float)(pix.data & 0xff) / 256.0;
//printf("ap_uf_pix_soft = %f\n", ap_uf_pix);
mbuf.shift_pixels_left(); // mbuf の列を1ビット左シフト
for(int i=0; i<ARRAY_SIZE-1; i++){
mbuf.insert_pixel(linebuf.getval(i,x), i, ARRAY_SIZE-1);
}
mbuf.insert_pixel(ap_uf_pix, ARRAY_SIZE-1, ARRAY_SIZE-1);
// LineBuffer の更新
linebuf.shift_pixels_up(x);
linebuf.insert_bottom_row(ap_uf_pix, x);
// conv_layer の演算
for (int k=0; k<NUMBER_OF_KERNEL; k++){
val=0.0;
for (int j=0; j<ARRAY_SIZE; j++){
for (int i=0; i<ARRAY_SIZE; i++){
val += mbuf.getval(j,i) * conv1_fweight[k][0][j][i];
}
}
val += conv1_fbias[k];
if(k==0)
conv_out.data.data0 = val;
else
conv_out.data.data1 = val;
}
// 最初のARRAY_SIZE-1行とその他の行の最初のARRAY_SIZE-1列は無効データなので出力しない
if (x<(ARRAY_SIZE-1) || y<(ARRAY_SIZE-1))
continue;
else { // 有効なデータの時
if (x==(ARRAY_SIZE-1) && y==(ARRAY_SIZE-1)){ // 最初のデータでは、TUSERをアサートする
conv_out.user = 1;
} else {
conv_out.user = 0;
}
if (x == (HORIZONTAL_PIXEL_WIDTH-1)){ // 行の最後で TLAST をアサートする
conv_out.last = 1;
} else {
conv_out.last = 0;
}
outs << conv_out;
}
}
}
return(0);
}
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
Compiling ../../../conv_layer_tb.cpp in debug mode
Compiling ../../../conv_layer.cpp in debug mode
Generating csim.exe
outs
HW and SW results i = 0, j = 0, HW = 4.335938, -0.017578, SW = 2.956438, -0.043737
HW and SW results i = 1, j = 0, HW = 4.266602, -0.016602, SW = 2.887825, -0.041697
HW and SW results i = 2, j = 0, HW = 4.193359, -0.016602, SW = 2.813908, -0.042108
HW and SW results i = 3, j = 0, HW = 4.140625, -0.016602, SW = 2.762030, -0.041928
HW and SW results i = 4, j = 0, HW = 4.064453, -0.015625, SW = 2.685595, -0.039677
HW and SW results i = 5, j = 0, HW = 4.011719, -0.014648, SW = 2.633010, -0.038917
HW and SW results i = 6, j = 0, HW = 3.991211, -0.014648, SW = 2.612745, -0.038658
HW and SW results i = 7, j = 0, HW = 4.005859, -0.013672, SW = 2.628183, -0.038179
HW and SW results i = 8, j = 0, HW = 4.014648, -0.013672, SW = 2.636920, -0.037909
HW and SW results i = 9, j = 0, HW = 4.036133, -0.013672, SW = 2.658426, -0.037599
HW and SW results i = 10, j = 0, HW = 4.024414, -0.013672, SW = 2.647083, -0.037274
HW and SW results i = 11, j = 0, HW = 4.015625, -0.013672, SW = 2.637882, -0.037213
HW and SW results i = 12, j = 0, HW = 4.033203, -0.012695, SW = 2.655350, -0.036282
HW and SW results i = 13, j = 0, HW = 4.222656, -0.016602, SW = 2.845468, -0.041951
HW and SW results i = 14, j = 0, HW = 4.448242, -0.017578, SW = 3.070912, -0.042820
HW and SW results i = 15, j = 0, HW = 4.807617, -0.013672, SW = 3.429559, -0.040354
HW and SW results i = 16, j = 0, HW = 5.202148, -0.019531, SW = 3.824572, -0.048534
HW and SW results i = 17, j = 0, HW = 5.367188, -0.020508, SW = 3.988451, -0.048833
HW and SW results i = 18, j = 0, HW = 5.325195, -0.026367, SW = 3.945544, -0.056148
HW and SW results i = 19, j = 0, HW = 5.275391, -0.022461, SW = 3.894094, -0.050489
HW and SW results i = 20, j = 0, HW = 5.178711, -0.021484, SW = 3.798736, -0.050703
HW and SW results i = 21, j = 0, HW = 5.010742, -0.020508, SW = 3.631046, -0.050160
HW and SW results i = 22, j = 0, HW = 4.715820, -0.019531, SW = 3.336445, -0.048055
HW and SW results i = 23, j = 0, HW = 4.457031, -0.019531, SW = 3.077446, -0.046769
HW and SW results i = 24, j = 0, HW = 4.337891, -0.018555, SW = 2.958363, -0.044824
HW and SW results i = 25, j = 0, HW = 4.344727, -0.019531, SW = 2.965589, -0.045536
HW and SW results i = 26, j = 0, HW = 4.338867, -0.018555, SW = 2.959619, -0.044858
HW and SW results i = 27, j = 0, HW = 4.303711, -0.018555, SW = 2.924359, -0.044479
HW and SW results i = 28, j = 0, HW = 4.276367, -0.018555, SW = 2.897588, -0.044321
HW and SW results i = 29, j = 0, HW = 4.258789, -0.017578, SW = 2.879802, -0.043693
HW and SW results i = 30, j = 0, HW = 4.239258, -0.018555, SW = 2.860393, -0.044097
HW and SW results i = 31, j = 0, HW = 4.186523, -0.018555, SW = 2.806980, -0.043727
HW and SW results i = 32, j = 0, HW = 4.125000, -0.018555, SW = 2.745974, -0.043355
HW and SW results i = 33, j = 0, HW = 4.058594, -0.017578, SW = 2.679494, -0.042717
HW and SW results i = 34, j = 0, HW = 3.978516, -0.018555, SW = 2.599331, -0.042859
HW and SW results i = 35, j = 0, HW = 3.731445, -0.018555, SW = 2.351973, -0.041944
HW and SW results i = 36, j = 0, HW = 3.646484, -0.018555, SW = 2.267337, -0.041979
HW and SW results i = 37, j = 0, HW = 3.750977, -0.018555, SW = 2.371555, -0.041249
HW and SW results i = 38, j = 0, HW = 3.694336, -0.015625, SW = 2.314963, -0.038042
HW and SW results i = 39, j = 0, HW = 3.620117, -0.015625, SW = 2.241181, -0.040116
HW and SW results i = 40, j = 0, HW = 4.099609, -0.017578, SW = 2.721145, -0.044066
HW and SW results i = 41, j = 0, HW = 4.369141, -0.022461, SW = 2.991379, -0.051047
HW and SW results i = 42, j = 0, HW = 4.442383, -0.018555, SW = 3.064429, -0.045425
HW and SW results i = 43, j = 0, HW = 4.265625, -0.014648, SW = 2.887242, -0.039155
HW and SW results i = 44, j = 0, HW = 3.979492, -0.012695, SW = 2.601302, -0.034550
HW and SW results i = 45, j = 0, HW = 3.593750, -0.013672, SW = 2.215396, -0.034609
HW and SW results i = 46, j = 0, HW = 3.423828, -0.013672, SW = 2.046252, -0.034175
HW and SW results i = 47, j = 0, HW = 3.249023, -0.013672, SW = 1.870888, -0.033923
HW and SW results i = 48, j = 0, HW = 3.207031, -0.013672, SW = 1.828757, -0.033767
HW and SW results i = 49, j = 0, HW = 3.158203, -0.012695, SW = 1.780369, -0.033276
HW and SW results i = 50, j = 0, HW = 3.140625, -0.012695, SW = 1.762675, -0.032938
HW and SW results i = 51, j = 0, HW = 3.123047, -0.012695, SW = 1.745412, -0.033239
HW and SW results i = 0, j = 1, HW = 4.738281, -0.021484, SW = 3.359004, -0.048814
HW and SW results i = 1, j = 1, HW = 4.651367, -0.021484, SW = 3.271967, -0.049351
HW and SW results i = 2, j = 1, HW = 4.590820, -0.021484, SW = 3.210930, -0.049440
HW and SW results i = 3, j = 1, HW = 4.591797, -0.021484, SW = 3.212094, -0.048609
中略
Success HW and SW results match
INFO: [SIM 1] CSim done with 0 errors.
INFO: [SIM 3] *************** CSIM finish ***************
// conv_layter.h
// 2018/02/06 by marsee
//
#ifndef __CONV_LAYER_H__
#define __CONV_LAYER_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int W, int I, int U, int TI, int TD>
struct ap_fixed2_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float2_axis{
struct data {
float data0;
float data1;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define HORIZONTAL_PIXEL_WIDTH 56
#define VERTICAL_PIXEL_WIDTH 10
#define ARRAY_SIZE 5
#define NUMBER_OF_KERNEL 2
typedef ap_ufixed<8, 0, AP_TRN, AP_WRAP> in_type;
typedef ap_fixed<22, 6, AP_TRN, AP_WRAP> val_type;
typedef ap_fixed<16, 6, AP_TRN, AP_WRAP> out_type;
#endif
// conv_layer.cpp (line_buf, pix_mat)
// 2018/02/06 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "conv_layer.h"
#include "conv1_weight.h"
#include "conv1_bias.h"
int conv_layer(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs){
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_axiu<32,1,1,1> pix;
ap_fixed2_axis<16,6,1,1,1> conv_out;
in_type line_buf[ARRAY_SIZE-1][HORIZONTAL_PIXEL_WIDTH];
#pragma HLS ARRAY_PARTITION variable=line_buf block factor=4 dim=1
#pragma HLS resource variable=line_buf core=RAM_2P
in_type pix_mat[ARRAY_SIZE][ARRAY_SIZE];
#pragma HLS array_partition variable=pix_mat complete
in_type ap_uf_pix;
val_type val;
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop2: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop3: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
ap_uf_pix = (in_type)((ap_ufixed<16, 8, AP_TRN, AP_WRAP>)(pix.data & 0xff) / 256);
// 2次元配列のデータを左シフト
Loop4 : for (int k=0; k<ARRAY_SIZE; k++){
Loop5 : for (int m=0; m<ARRAY_SIZE-1; m++){
#pragma HLS UNROLL
pix_mat[k][m] = pix_mat[k][m+1];
}
}
Loop6: for (int i=0; i<ARRAY_SIZE-1; i++){ // 以前の行のデータを line_buf から入力
pix_mat[i][ARRAY_SIZE-1] = line_buf[i][x];
}
pix_mat[ARRAY_SIZE-1][ARRAY_SIZE-1] = ap_uf_pix; // pix_mat の最後に新しいデータを入力
Loop7: for (int i=0; i<ARRAY_SIZE-2; i++){ // 行の入れ替え
line_buf[i][x] = line_buf[i+1][x];
}
line_buf[ARRAY_SIZE-2][x] = ap_uf_pix;
// conv_layer の演算
for (int k=0; k<NUMBER_OF_KERNEL; k++){
val = 0.0;
for (int j=0; j<ARRAY_SIZE; j++){
for (int i=0; i<ARRAY_SIZE; i++){
val += (val_type)pix_mat[j][i] * (val_type)conv1_weight[k][0][j][i];
}
}
val += (val_type)conv1_bias[k];
if(k==0)
conv_out.data.data0 = val;
else
conv_out.data.data1 = val;
}
// 最初のARRAY_SIZE-1行とその他の行の最初のARRAY_SIZE-1列は無効データなので出力しない
if (x<(ARRAY_SIZE-1) || y<(ARRAY_SIZE-1))
continue;
else { // 有効なデータの時
if (x==(ARRAY_SIZE-1) && y==(ARRAY_SIZE-1)){ // 最初のデータでは、TUSERをアサートする
conv_out.user = 1;
} else {
conv_out.user = 0;
}
if (x == (HORIZONTAL_PIXEL_WIDTH-1)){ // 行の最後で TLAST をアサートする
conv_out.last = 1;
} else {
conv_out.last = 0;
}
outs << conv_out;
}
}
}
return(0);
}
// conv_layer.cpp (hls::LineBuffer, hls::Window)
// 2018/02/06 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "conv_layer.h"
#include "conv1_weight.h"
#include "conv1_bias.h"
int conv_layer(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs){
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_axiu<32,1,1,1> pix;
ap_fixed2_axis<16,6,1,1,1> conv_out;
hls::LineBuffer<ARRAY_SIZE-1, HORIZONTAL_PIXEL_WIDTH, in_type> linebuf;
hls::Window<ARRAY_SIZE, ARRAY_SIZE, in_type> mbuf;
in_type ap_uf_pix;
val_type val;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
ap_uf_pix = (in_type)((ap_ufixed<16, 8, AP_TRN, AP_WRAP>)(pix.data & 0xff) / 256);
mbuf.shift_pixels_left(); // mbuf の列を1ビット左シフト
for(int i=0; i<ARRAY_SIZE-1; i++){
mbuf.insert_pixel(linebuf.getval(i,x), i, ARRAY_SIZE-1);
}
mbuf.insert_pixel(ap_uf_pix, ARRAY_SIZE-1, ARRAY_SIZE-1);
// LineBuffer の更新
linebuf.shift_pixels_up(x);
linebuf.insert_bottom_row(ap_uf_pix, x);
// conv_layer の演算
for (int k=0; k<NUMBER_OF_KERNEL; k++){
val=0.0;
for (int j=0; j<ARRAY_SIZE; j++){
for (int i=0; i<ARRAY_SIZE; i++){
val += (val_type)mbuf.getval(j,i) * (val_type)conv1_weight[k][0][j][i];
}
}
val += (val_type)conv1_bias[k];
if(k==0)
conv_out.data.data0 = val;
else
conv_out.data.data1 = val;
}
// 最初のARRAY_SIZE-1行とその他の行の最初のARRAY_SIZE-1列は無効データなので出力しない
if (x<(ARRAY_SIZE-1) || y<(ARRAY_SIZE-1))
continue;
else { // 有効なデータの時
if (x==(ARRAY_SIZE-1) && y==(ARRAY_SIZE-1)){ // 最初のデータでは、TUSERをアサートする
conv_out.user = 1;
} else {
conv_out.user = 0;
}
if (x == (HORIZONTAL_PIXEL_WIDTH-1)){ // 行の最後で TLAST をアサートする
conv_out.last = 1;
} else {
conv_out.last = 0;
}
outs << conv_out;
}
}
}
return(0);
}
// stream_test.h
// 2018/02/11 by marsee
//
#ifndef __STREAM_TEST_H__
#define __STREAM_TEST_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int W, int I, int U, int TI, int TD>
struct ap_fixed2_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int W, int I, int U, int TI, int TD>
struct ap_fixed4_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
ap_fixed<W,I,AP_TRN,AP_WRAP> data2;
ap_fixed<W,I,AP_TRN,AP_WRAP> data3;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#endif
// stream_test.cpp
// 2018/02/11 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "stream_test.h"
int stream_test(hls::stream<ap_fixed1_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs){
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE axis register both port=ins
#pragma HLS INTERFACE s_axilite port=return
ap_fixed1_axis<16,6,1,1,1> ins_t;
ap_fixed2_axis<16,6,1,1,1> outs_t;
for(int y=0; y<10; y++){
for(int x=0; x<56; x++){
ins >> ins_t;
outs_t.data.data0 = ins_t.data.data0 * (ap_fixed<16, 6, AP_TRN, AP_WRAP>)2.0;
outs_t.data.data1 = ins_t.data.data0 * (ap_fixed<16, 6, AP_TRN, AP_WRAP>)(-3.0);
outs_t.user = 1;
outs_t.last = 0;
outs << outs_t;
}
}
return(0);
}
// stream_test_tb.cpp
// 2018/02/11 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "stream_test.h"
#define DATASIZE 560
int stream_test(hls::stream<ap_fixed1_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int main(){
using namespace std;
ap_fixed1_axis<16,6,1,1,1> in_ts;
ap_fixed2_axis<16,6,1,1,1> out_ts;
hls::stream<ap_fixed1_axis<16,6,1,1,1> > instream;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outstream;
for(int i=0; i<DATASIZE; i++){
in_ts.data.data0 = i % 11;
instream << in_ts;
}
stream_test(instream, outstream);
for(int i=0; i<DATASIZE; i++){
outstream >> out_ts;
printf("i = %d, data1 = %f, data0 = %f\n", i, (float)out_ts.data.data1, (float)out_ts.data.data0);
}
return(0);
}
.F:/Xilinx/Vivado/2017.4/include/ap_stream.h:70:2: warning: #warning AP_STREAM macros are deprecated. Please use hls::stream<> from "hls_stream.h" instead. [-Wcpp]
apatb_stream_test.cpp: In function 'int AESL_WRAP_stream_test(hls::stream<ap_fixed1_axis<16, 6, 1, 1, 1> >&, hls::stream<ap_fixed2_axis<16, 6, 1, 1, 1> >&)':
apatb_stream_test.cpp:425:36: error: 'data' has no member named 'data1'
apatb_stream_test.cpp:478:36: error: 'data' has no member named 'data1'
apatb_stream_test.cpp:480:34: error: 'data' has no member named 'data1'
apatb_stream_test.cpp:2330:34: error: 'data' has no member named 'data1'
apatb_stream_test.cpp:2333:63: error: 'data' has no member named 'data1'
make: *** [obj/apatb_stream_test.o] Error 1
ERROR: [COSIM 212-317] C++ compile error.
ERROR: [COSIM 212-321] EXE file generate failed.
ERROR: [COSIM 212-321] EXE file generate failed.
ERROR: [COSIM 212-331] Aborting co-simulation: C simulation failed, compilation errors.
ERROR: [COSIM 212-4] *** C/RTL co-simulation finished: FAIL ***
command 'ap_source' returned error code
while executing
"source C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO_Z7-20/test/stream_test/solution1/cosim.tcl"
invoked from within
"hls::main C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO_Z7-20/test/stream_test/solution1/cosim.tcl"
("uplevel" body line 1)
invoked from within
"uplevel 1 hls::main {*}$args"
(procedure "hls_proc" line 5)
invoked from within
"hls_proc $argv"
Finished C/RTL cosimulation
// stream_test.h
// 2018/02/11 by marsee
//
#ifndef __STREAM_TEST_H__
#define __STREAM_TEST_H__
#include <ap_fixed.h>
template<int W, int I, int N, int U, int TI, int TD>
struct ap_fixed_axis{
ap_fixed<W, I, AP_TRN, AP_WRAP> data[N];
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int W, int I, int N, int U, int TI, int TD>
struct ap_ufixed_axis{
ap_ufixed<W, I, AP_TRN, AP_WRAP> data[N];
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#endif
// stream_test.cpp
// 2018/02/11 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "stream_test.h"
int stream_test(hls::stream<ap_fixed_axis<16,6,2,1,1,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1,1,1> >& outs){
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE axis register both port=ins
#pragma HLS INTERFACE s_axilite port=return
ap_fixed_axis<16,6,2,1,1,1> ins_t;
ap_fixed_axis<16,6,2,1,1,1> outs_t;
for(int y=0; y<10; y++){
for(int x=0; x<56; x++){
ins >> ins_t;
outs_t.data[0] = ins_t.data[0] * (ap_fixed<16, 6, AP_TRN, AP_WRAP>)2.0;
outs_t.data[1] = ins_t.data[1] * (ap_fixed<16, 6, AP_TRN, AP_WRAP>)3.0;
outs_t.user = 1;
outs_t.last = 0;
outs << outs_t;
}
}
return(0);
}
ERROR: [XFORM 203-103] Cannot partition array 'ins.V.data.V' (stream_test/stream_test.cpp:12): different array partition directive on the same group of AXI-Stream ports.
ERROR: [HLS 200-70] Pre-synthesis failed.
command 'ap_source' returned error code
while executing
"source C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO_Z7-20/test/stream_test/solution1/csynth.tcl"
invoked from within
"hls::main C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO_Z7-20/test/stream_test/solution1/csynth.tcl"
("uplevel" body line 1)
invoked from within
"uplevel 1 hls::main {*}$args"
(procedure "hls_proc" line 5)
invoked from within
"hls_proc $argv"
Finished C synthesis.
// bram_test2.cpp
// 2018/02/06 by marsee
//
class bram {
int array[1024];
public:
void bram_write(int &index, int &data);
void bram_read(int &index, int &data);
};
void bram::bram_write(int &index, int &data){
array[index] = data;
}
void bram::bram_read(int &index, int &data){
data = array[index];
}
int bram_test(int &index, int &wr, int &data){
#pragma HLS INTERFACE s_axilite port=data
#pragma HLS INTERFACE s_axilite port=wr
#pragma HLS INTERFACE s_axilite port=index
#pragma HLS INTERFACE s_axilite port=return
static bram bram_inst;
if(wr == 0){ // Read
bram_inst.bram_read(index, data);
}else{ // Write
bram_inst.bram_write(index, data);
}
return(0);
}
// bram_test1.cpp
// 2018/02/06 by marsee
//
int bram_test(int &index, int &wr, int &data){
#pragma HLS INTERFACE s_axilite port=data
#pragma HLS INTERFACE s_axilite port=wr
#pragma HLS INTERFACE s_axilite port=index
#pragma HLS INTERFACE s_axilite port=return
int array[1024];
if(wr == 0){ // Read
data = array[index];
}else{ // Write
array[index] = data;
}
return(0);
}
// bram_test1_tb.cpp
// 2018/02/06 by marsee
//
#include <stdio.h>
int bram_test(int &index, int &wr, int &data);
int main(){
int index, wr, data;
wr = 0; index = 0;
bram_test(index, wr, data);
printf("data = %x\n", data);
wr = 1; index = 0; data = 0x1;
bram_test(index, wr, data);
wr = 0; index = 0;
bram_test(index, wr, data);
printf("data = %x\n", data);
wr = 0; index = 1;
bram_test(index, wr, data);
printf("data = %x\n", data);
wr = 1; index = 1; data = 0x2;
bram_test(index, wr, data);
wr = 0; index = 1;
bram_test(index, wr, data);
printf("data = %x\n", data);
return(0);
}
// bram_test1.cpp
// 2018/02/06 by marsee
//
int bram_test(int &index, int &wr, int &data){
#pragma HLS INTERFACE s_axilite port=data
#pragma HLS INTERFACE s_axilite port=wr
#pragma HLS INTERFACE s_axilite port=index
#pragma HLS INTERFACE s_axilite port=return
static int array[1024];
if(wr == 0){ // Read
data = array[index];
}else{ // Write
array[index] = data;
}
return(0);
}
をコメントアウトした。#pragma HLS RESOURCE variable=out_temp core=AddSub
// multi_test4.cpp
// 2018/02/04 by marsee
//
#include <ap_fixed.h>
#include "multi_test4.h"
#include "conv1_weight.h"
int multi_test4(ap_ufixed_in in[25], ap_fixed_add &out0, ap_fixed_add &out1){
#pragma HLS ARRAY_PARTITION variable=in complete dim=1
ap_fixed_madd out_temp = 0.0;
#pragma HLS RESOURCE variable=out_temp core=AddSub
#pragma HLS PIPELINE II=1
conv0: for(int k=0; k<2; k++){
conv1: for(int m=0; m<5; m++){
conv2: for(int n=0; n<5; n++){
out_temp += in[m*5+n] * conv1_weight[k][0][m][n];
}
}
if(k==0)
out0 = out_temp;
else
out1 = out_temp;
out_temp = 0.0;
}
return(0);
}
// multi_test4.h
// 2018/02/04 by marsee
//
#ifndef __multi_test_H__
#define __multi_test_H__
#include <ap_fixed.h>
typedef ap_ufixed<8, 0, AP_TRN, AP_WRAP> ap_ufixed_in;
typedef ap_fixed<9, 1, AP_TRN, AP_WRAP> ap_fixed_weight;
typedef ap_fixed<22, 6, AP_TRN, AP_WRAP> ap_fixed_madd;
typedef ap_fixed<16, 6, AP_TRN_ZERO, AP_SAT> ap_fixed_add;
#endif
// multi_test4_tb.h
// 2018/02/04 by marsee
//
#include "multi_test4.h"
int multi_test4(ap_ufixed_in in[25], ap_fixed_add &out0, ap_fixed_add &out1);
int main(void){
ap_ufixed_in in[25];
ap_fixed_add out0, out1;
ap_ufixed_in v = 0.5;
for(int i=0; i<25; i=i++){
in[i] = (ap_ufixed_in)v;
v += (ap_ufixed_in)0.00390625;
printf("in[%d] = %f\n", i, (float)v);
}
multi_test4(in, out0, out1);
printf("out0 = %f\n", (float)out0);
printf("out1 = %f\n", (float)out1);
return(0);
}
をconv1_weight[0][0][m][n]
に変更して、2 個目の畳み込み演算の重みの配列を使用する。conv1_weight[1][0][m][n]
// multi_test3.cpp
// 2018/01/30 by marsee
//
#include <ap_fixed.h>
#include "multi_test3.h"
#include "conv1_weight.h"
int multi_test3(ap_ufixed_in in[25], ap_fixed_add &out){
#pragma HLS ARRAY_PARTITION variable=in complete dim=1
#pragma HLS PIPELINE II=1
ap_fixed_madd out_temp = 0.0;
#pragma HLS RESOURCE variable=out_temp core=AddSub
conv1: for(int m=0; m<5; m++){
#pragma HLS UNROLL
conv2: for(int n=0; n<5; n++){
out_temp += in[m*5+n] * conv1_weight[0][0][m][n];
}
}
out = out_temp;
return(0);
}
// multi_test3.h
// 2018/01/30 by marsee
//
#ifndef __multi_test_H__
#define __multi_test_H__
#include <ap_fixed.h>
typedef ap_ufixed<8, 0, AP_TRN, AP_WRAP> ap_ufixed_in;
typedef ap_fixed<9, 1, AP_TRN, AP_WRAP> ap_fixed_weight;
typedef ap_fixed<22, 6, AP_TRN, AP_WRAP> ap_fixed_madd;
typedef ap_fixed<16, 6, AP_TRN_ZERO, AP_SAT> ap_fixed_add;
#endif
// conv1_weight.h
// 2017/12/06 10:54:11 by marsee
const float conv1_fweight[2][1][5][5] =
{
{
{
{0.764403421227,0.658424746889,0.595604201652,0.554044871161,0.367767232883},
{0.582414155838,0.413274869036,0.31659268154,0.3508390519,0.331194144626},
{0.589182274309,0.462105790282,-0.241299390378,-0.10093021104,0.233291757594},
{0.792411286764,0.315893121865,0.0397628864727,0.356726636694,0.426826537165},
{0.634481192118,0.651475977113,0.688949928547,0.707285991358,0.681420943406}
}
}
,
{
{
{0.00564732125401,-0.012955272371,-0.0231571581103,-0.00289983746176,0.0281080593816},
{-0.0115360072012,0.00253310449813,-0.00860163957467,0.00112793810127,-0.01455040341},
{-0.00881717612899,-0.00902248113722,0.0004194288468,0.00110240651437,-0.0140454059394},
{0.00271556513713,-0.00307791921855,0.000117170379207,-0.00891721414879,0.0173026634286},
{0.000808453898046,0.000116327205532,-0.00275343050716,-0.00683461392689,-0.0169130858704}
}
}
};
const ap_fixed<9, 1, AP_TRN, AP_WRAP> conv1_weight[2][1][5][5] =
{
{
{
{0.765625,0.66015625,0.59375,0.5546875,0.3671875},
{0.58203125,0.4140625,0.31640625,0.3515625,0.33203125},
{0.58984375,0.4609375,-0.23828125,-0.09765625,0.234375},
{0.79296875,0.31640625,0.0390625,0.35546875,0.42578125},
{0.6328125,0.65234375,0.6875,0.70703125,0.6796875}
}
}
,
{
{
{0.00390625,-0.0078125,-0.01953125,0.0,0.02734375},
{-0.0078125,0.00390625,-0.00390625,0.0,-0.01171875},
{-0.00390625,-0.00390625,0.0,0.0,-0.01171875},
{0.00390625,0.0,0.0,-0.00390625,0.015625},
{0.0,0.0,0.0,-0.00390625,-0.01171875}
}
}
};
// multi_test3_tb.h
// 2018/01/30 by marsee
//
#include "multi_test3.h"
int multi_test3(ap_ufixed_in in[25], ap_fixed_add &out);
int main(void){
ap_ufixed_in in[25];
ap_fixed_add out;
ap_ufixed_in v = 0.5;
for(int i=0; i<25; i=i++){
in[i] = (ap_ufixed_in)v;
v += (ap_ufixed_in)0.00390625;
printf("in[%d] = %f\n", i, (float)v);
}
multi_test3(in, out);
printf("out = %f\n", (float)out);
return(0);
}
typedef ap_fixed<9, 1, AP_TRN, AP_WRAP> ap_fixed_multi;
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | 1 | 2 | 3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | 28 | - | - | - |