// max_pooling_template.h
// 2018/05/10 by marsee
// テンプレートを使用して汎用化した max_pooling
//
#ifndef __MAX_POOLING_TEMPLATE__
#define __MAX_POOLING_TEMPLATE__
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include <ap_fixed.h>
#include "layer_general.h"
#define TO_LITERAL(x) #x
#define PRAGMA_HLS(tok) _Pragma(TO_LITERAL(HLS tok)) // @hiyuhさんから
template<
const size_t W,
const size_t I,
const size_t NUMBER_OF_KERNEL,
const size_t ARRAY_SIZE, // ARRAY_SIZE x ARRAY_SIZE の領域からプーリングする
const size_t X_STRIDE,
const size_t Y_STRIDE,
const size_t VERTICAL_HIGHT_IN,
const size_t HORIZONTAL_WIDTH_IN
>int max_pooling_template(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs){
typedef ap_fixed<W, I, AP_TRN, AP_WRAP> conv_type;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> mp_out;
conv_type line_buf[NUMBER_OF_KERNEL][ARRAY_SIZE-1][HORIZONTAL_WIDTH_IN];
#pragma HLS ARRAY_PARTITION variable=line_buf block factor=2 dim=1
#pragma HLS ARRAY_PARTITION variable=line_buf block factor=1 dim=2
conv_type pix_mat[NUMBER_OF_KERNEL][ARRAY_SIZE][ARRAY_SIZE];
#pragma HLS array_partition variable=pix_mat complete
conv_type val[NUMBER_OF_KERNEL], conv_data;
Loop1: do {
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop2: for (int y=0; y<VERTICAL_HIGHT_IN; y++){
Loop3: for (int x=0; x<HORIZONTAL_WIDTH_IN; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
Loop4: for (int n=0; n<NUMBER_OF_KERNEL; n++){
#pragma HLS UNROLL
conv_data = pix.data[n];
// 2次元配列のデータを左シフト
Loop5 : for (int k=0; k<ARRAY_SIZE; k++){
#pragma HLS UNROLL
Loop6 : for (int m=0; m<ARRAY_SIZE-1; m++){
pix_mat[n][k][m] = pix_mat[n][k][m+1];
}
}
Loop7: for (int i=0; i<ARRAY_SIZE-1; i++){ // 以前の行のデータを line_buf から入力
pix_mat[n][i][ARRAY_SIZE-1] = line_buf[n][i][x];
}
pix_mat[n][ARRAY_SIZE-1][ARRAY_SIZE-1] = conv_data; // pix_mat の最後に新しいデータを入力
Loop8: for (int i=0; i<ARRAY_SIZE-2; i++){ // 行の入れ替え
line_buf[n][i][x] = line_buf[n][i+1][x];
}
line_buf[n][ARRAY_SIZE-2][x] = conv_data;
// max pooling の検索
Loop9 : for (int k=0; k<ARRAY_SIZE; k++){
#pragma HLS UNROLL
Loop10 : for (int m=0; m<ARRAY_SIZE; m++){
if (k==0 && m==0){
val[n] = pix_mat[n][k][m];
} else if (val[n] < pix_mat[n][k][m]){
val[n] = pix_mat[n][k][m];
}
}
}
mp_out.data[n] = val[n];
if (x==X_STRIDE-1 && y==Y_STRIDE-1){ // 最初のデータでは、TUSERをアサートする
mp_out.user = 1;
} else {
mp_out.user = 0;
}
if (x == HORIZONTAL_WIDTH_IN-1){ // 行の最後で TLAST をアサートする
mp_out.last = 1;
} else {
mp_out.last = 0;
}
}
if (x%X_STRIDE==X_STRIDE-1 && y%Y_STRIDE==Y_STRIDE-1){ // ストライド
outs << mp_out;
}
}
}
return(0);
}
#endif
// max_pooling.cpp
// 2018/05/10 by marsee
//
#include "max_pooling_template.h"
int max_pooling(hls::stream<ap_fixed_axis<16,6,2,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1> >& outs){
#pragma HLS DATA_PACK variable=outs
#pragma HLS DATA_PACK variable=ins
return(max_pooling_template<16,6,2,2,2,2,6,52>(ins, outs));
}
// max_pooling.h
// 2018/04/19 by marsee
//
#ifndef __MAX_POOLING_H__
#define __MAX_POOLING_H__
#include <ap_fixed.h>
static const size_t H_PIXEL_WIDTH_IN = 52;
static const size_t V_PIXEL_WIDTH_IN = 6;
static const size_t H_PIXEL_WIDTH_OUT = 26;
static const size_t V_PIXEL_WIDTH_OUT = 3;
static const size_t NUMBER_OF_KERNEL = 2;
static const size_t ARRAY_SIZE = 2;
static const size_t W = 16;
static const size_t I = 6;
static const size_t X_STRIDE = 2;
static const size_t Y_STRIDE = 2;
typedef ap_fixed<W, I, AP_TRN, AP_WRAP> conv_type;
#endif
// max_pooling_tb.cpp
// 2018/04/19 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "max_pooling.h"
#include "relu_output.h"
int max_pooling(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs);
int max_pooling2(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs);
int max_pooling_soft(hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > ins;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > ins2;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > ins_soft;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs2;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > outs_soft;
float mp_fout[H_PIXEL_WIDTH_OUT*V_PIXEL_WIDTH_OUT][NUMBER_OF_KERNEL];
conv_type mp_out[H_PIXEL_WIDTH_OUT*V_PIXEL_WIDTH_OUT][NUMBER_OF_KERNEL];
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix2;
float_axis<NUMBER_OF_KERNEL,1> fpix;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
pix.data[k] = (conv_type)i;
}
ins << pix;
ins2 << pix;
fpix.user = 0;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
fpix.data[k] = (float)i;
}
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
ofstream OHX("relu_output_X0.csv");
ofstream OHF("relu_output_F0.csv");
for(int j=0; j < V_PIXEL_WIDTH_IN; j++){
for(int i=0; i < H_PIXEL_WIDTH_IN; i++){
for(int k=0; k<NUMBER_OF_KERNEL; k++){
pix.data[k] = relu_out[j*H_PIXEL_WIDTH_IN+i][k];
fpix.data[k] = relu_fout[j*H_PIXEL_WIDTH_IN+i][k];
}
OHX << pix.data[0];
if(i != H_PIXEL_WIDTH_IN-1)
OHX << ",";
else
OHX << endl;
OHF << fpix.data[0];
if(i != H_PIXEL_WIDTH_IN-1)
OHF << ",";
else
OHF << endl;
if (j==0 && i==0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == H_PIXEL_WIDTH_IN-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins2 << pix;
ins_soft << fpix;
}
}
max_pooling(ins, outs);
max_pooling_soft(ins_soft, outs_soft);
max_pooling2(ins2, outs2);
// outs, outs_soft を mp_out[][], relu_fout[][] に出力する
int errcnt = 0;
for(int j=0; j < V_PIXEL_WIDTH_OUT; j++){
for(int i=0; i < H_PIXEL_WIDTH_OUT; i++){
outs >> pix;
outs2 >> pix2;
outs_soft >> fpix;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
mp_out[j*H_PIXEL_WIDTH_OUT+i][k] = pix.data[k];
mp_fout[j*H_PIXEL_WIDTH_OUT+i][k] = fpix.data[k];
printf("%d, %d, data[%d] = %f, data2[%d] = %f, fdata[%d] = %f\n", j, i, k, (float)pix.data[k], k, (float)pix2.data[k], k, fpix.data[k]);
if (pix.data[k] != pix2.data[k]){
printf("ERROR HW and SW results mismatch i = %ld, j = %ld, HW[%d] = %f, HW2[%d] = %f, SW[%d] = %f\n", i, j, k, (float)pix.data[k], k, (float)pix2.data[k], k,fpix.data[k]);
errcnt++;
//return(1);
}
}
}
}
cout << "Error Count = " << errcnt << endl;
cout << "Success HW and SW results match" << endl;
cout << endl;
// max_pooling の結果をヘッダファイルに出力
ofstream OH("max_pooling_output.h");
OH << "// max_pooling_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __MAX_POOLING_OUTPUT_H__" << endl;
OH << "#define __MAX_POOLING_OUTPUT_H__" << endl;
OH << endl;
OH << "const float mp_fout[" << V_PIXEL_WIDTH_OUT*H_PIXEL_WIDTH_OUT << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<V_PIXEL_WIDTH_OUT ; y++){
for (int x=0; x<H_PIXEL_WIDTH_OUT ; x++){
OH << " {" << fixed << setprecision(12) << mp_fout[H_PIXEL_WIDTH_OUT*y+x][0];
for (int i=1; i<NUMBER_OF_KERNEL; ++i)
{
OH << ", " << mp_fout[H_PIXEL_WIDTH_OUT*y+x][i];
}
OH << "}";
if (y==V_PIXEL_WIDTH_OUT-1 && x==H_PIXEL_WIDTH_OUT-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> mp_out[" << V_PIXEL_WIDTH_OUT*H_PIXEL_WIDTH_OUT << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<V_PIXEL_WIDTH_OUT ; y++){
for (int x=0; x<H_PIXEL_WIDTH_OUT ; x++){
OH << " {" << fixed << setprecision(12) << (float)mp_out[H_PIXEL_WIDTH_OUT*y+x][0];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << (float)mp_out[H_PIXEL_WIDTH_OUT*y+x][i];
}
OH << "}";
if (y==V_PIXEL_WIDTH_OUT -1 && x==H_PIXEL_WIDTH_OUT -1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int max_pooling_soft(hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs){
float_axis<NUMBER_OF_KERNEL,1> fpix;
float fpixd_ary[NUMBER_OF_KERNEL][V_PIXEL_WIDTH_IN][H_PIXEL_WIDTH_IN];
float fval[NUMBER_OF_KERNEL];
do {
// user が 1になった時にフレームがスタートする
ins >> fpix;
} while(fpix.user == 0);
for (int y=0; y<V_PIXEL_WIDTH_IN; y++){
for (int x=0; x<H_PIXEL_WIDTH_IN; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> fpix;
for(int i=0; i<NUMBER_OF_KERNEL; i++){
fpixd_ary[i][y][x] = fpix.data[i];
}
}
}
for (int y=0; y<V_PIXEL_WIDTH_IN-1; y+=Y_STRIDE){
for (int x=0; x<H_PIXEL_WIDTH_IN-1; x+=X_STRIDE){
for(int p=0; p<NUMBER_OF_KERNEL; p++){
for(int m=0; m<Y_STRIDE; m++){
for(int n=0; n<X_STRIDE; n++){
if(m==0 && n==0){
fval[p] = fpixd_ary[p][y][x];
} else if(fval[p] < fpixd_ary[p][y+m][x+n]){
fval[p] = fpixd_ary[p][y+m][x+n];
}
}
}
}
for(int i=0; i<NUMBER_OF_KERNEL; i++){
fpix.data[i] = fval[i];
}
if(x==0 && y==0)
fpix.user = 1;
else
fpix.user = 0;
if(x==V_PIXEL_WIDTH_OUT - X_STRIDE)
fpix.last = 1;
else
fpix.last = 0;
outs << fpix;
}
}
return(0);
}
int max_pooling2(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs){
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
conv_type maxp_val[NUMBER_OF_KERNEL][V_PIXEL_WIDTH_IN][H_PIXEL_WIDTH_IN];
conv_type pool_out[NUMBER_OF_KERNEL][V_PIXEL_WIDTH_OUT][H_PIXEL_WIDTH_OUT];
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> maxp_out;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
for (int y=0; y<V_PIXEL_WIDTH_IN; y++){
for (int x=0; x<H_PIXEL_WIDTH_IN; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
for (int i=0; i<NUMBER_OF_KERNEL; i++){
maxp_val[i][y][x] = pix.data[i];
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUMBER_OF_KERNEL; i++){
POOL2: for(int j=0; j<V_PIXEL_WIDTH_IN; j += Y_STRIDE){
POOL3: for(int k=0; k<H_PIXEL_WIDTH_IN; k += X_STRIDE){
POOL4: for(int m=0; m<Y_STRIDE; m++){
POOL5: for(int n=0; n<X_STRIDE; n++){
if(m==0 && n==0){
pool_out[i][j/Y_STRIDE][k/X_STRIDE] = maxp_val[i][j][k];
} else if(pool_out[i][j/Y_STRIDE][k/X_STRIDE] < maxp_val[i][j+m][k+n]){
pool_out[i][j/Y_STRIDE][k/X_STRIDE] = maxp_val[i][j+m][k+n];
}
}
}
}
}
}
for(int y=0; y<V_PIXEL_WIDTH_OUT; y++){
for(int x=0; x<H_PIXEL_WIDTH_OUT; x++){
for(int i=0; i<NUMBER_OF_KERNEL; i++){
maxp_out.data[i] = pool_out[i][y][x];
}
if (x==0 && y==0){ // 最初のデータでは、TUSERをアサートする
maxp_out.user = 1;
} else {
maxp_out.user = 0;
}
if (x == (H_PIXEL_WIDTH_OUT-1)){ // 行の最後で TLAST をアサートする
maxp_out.last = 1;
} else {
maxp_out.last = 0;
}
outs << maxp_out;
}
}
return(0);
}
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | - | 1 | 2 |
3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 |
31 | - | - | - | - | - | - |