// square_frame_gen.h
// 2017/06/28 by marsee
//
#ifndef __SQUARE_FRAME_GEN_H__
#define __SQUARE_FRAME_GEN_H__
#define HORIZONTAL_PIXEL_WIDTH 800
#define VERTICAL_PIXEL_WIDTH 600
#define ALL_PIXEL_VALUE (HORIZONTAL_PIXEL_WIDTH*VERTICAL_PIXEL_WIDTH)
#endif
// square_frame_gen.cpp
// 2017/06/28 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "square_frame_gen.h"
// AXI4-Streamで流れてきた画像に1ピクセルの幅の四角い枠を書く
// x_pos : 四角枠の内側の左上のx位置
// y_pos : 四角枠の内側の左上のy位置
// width : 四角枠内側のピクセルの幅
// height : 四角枠内側のピクセルの高さ
// off_on : 四角枠OFF - 0, ON - 1
int square_frame_gen(hls::stream<ap_axis<32,1,1,1> >& ins,
hls::stream<ap_axis<32,1,1,1> >& outs,
int x_pos, int y_pos, int width, int height, int off_on){
#pragma HLS INTERFACE s_axilite port=off_on
#pragma HLS INTERFACE s_axilite port=width
#pragma HLS INTERFACE s_axilite port=x_pos
#pragma HLS INTERFACE s_axilite port=height
#pragma HLS INTERFACE s_axilite port=y_pos
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE axis register both port=ins
ap_axis<32,1,1,1> pix;
int gray_pix, val, i, j, x, y;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
for (y=0; y<VERTICAL_PIXEL_WIDTH; y++){
for (x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
if (off_on){
if (y==y_pos-1 && x>=x_pos-1 && x<=(x_pos+width+1)) // 上の枠線
pix.data = (0xff << 16)+0xff; // R = 0xff, B = 0xff, pink
else if (y>=y_pos-1 && y<=y_pos+height+1 && (x==x_pos-1 || x==x_pos+width+1)) // 横枠線
pix.data = (0xff << 16)+0xff; // R = 0xff, B = 0xff, pink
else if (y==y_pos+width+1 && x>=x_pos-1 && x<=(x_pos+width+1)) // 下の枠線
pix.data = (0xff << 16)+0xff; // R = 0xff, B = 0xff, pink
}
outs << pix;
}
}
return(0);
}
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x18 : Data signal of x_pos
// bit 31~0 - x_pos[31:0] (Read/Write)
// 0x1c : reserved
// 0x20 : Data signal of y_pos
// bit 31~0 - y_pos[31:0] (Read/Write)
// 0x24 : reserved
// 0x28 : Data signal of width
// bit 31~0 - width[31:0] (Read/Write)
// 0x2c : reserved
// 0x30 : Data signal of height
// bit 31~0 - height[31:0] (Read/Write)
// 0x34 : reserved
// 0x38 : Data signal of off_on
// bit 31~0 - off_on[31:0] (Read/Write)
// 0x3c : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
// square_frame_gen_tb.cpp
// 2017/06/28 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "square_frame_gen.h"
#include "bmp_header.h"
int square_frame_gen(hls::stream<ap_axis<32,1,1,1> >& ins,
hls::stream<ap_axis<32,1,1,1> >& outs,
int x_pos, int y_pos, int width, int height, int off_on);
#define READ_BMP_FILE_NAME "bmp_file0.bmp"
#define WRITE_BMP_FILE_NAME "square_frame_gen.bmp"
#define X_POS 400
#define Y_POS 300
#define WIDTH 28
#define HEIGHT 28
#define ON 1
int main(){
using namespace std;
hls::stream<ap_axis<32,1,1,1> > ins;
hls::stream<ap_axis<32,1,1,1> > outs;
ap_axis<32,1,1,1> pix;
ap_axis<32,1,1,1> vals;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpw;
int *rd_bmp, *seq_frm;
int blue, green, red;
if ((fbmpr = fopen(READ_BMP_FILE_NAME, "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open ");
fprintf(stderr, READ_BMP_FILE_NAME);
fprintf(stderr, " by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
if ((seq_frm =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate seq_frm memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data = i;
ins << pix;
}
// BMP画像をins に入力する
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
pix.data = (ap_int<32>)rd_bmp[(j*bmpihr.biWidth)+i];
if (j==0 && i==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (i == bmpihr.biWidth-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
}
}
square_frame_gen(ins, outs, X_POS, Y_POS, WIDTH, HEIGHT, ON);
// 書き出すファイルをオープン
if ((fbmpw=fopen(WRITE_BMP_FILE_NAME, "wb")) == NULL){
fprintf(stderr, "Can't open ");
fprintf(stderr, WRITE_BMP_FILE_NAME);
fprintf(stderr, " by binary write mode\n");
exit(1);
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
outs >> vals;
ap_int<32> val = vals.data;
seq_frm[(j*bmpihr.biWidth)+i] = (int)val;
}
}
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = seq_frm[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = (seq_frm[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] >> 8) & 0xff;
red = (seq_frm[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]>>16) & 0xff;
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
return(0);
}
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
Compiling ../../../mnist_conv_nn_tb_bmp.cpp in debug mode
Compiling ../../../mnist_conv_nn10.cpp in debug mode
Generating csim.exe
test0.bmp
id = 0, max_id_hw = 0
id = 0, max_id_sw = 0
test1.bmp
id = 1, max_id_hw = 1
id = 1, max_id_sw = 1
test2.bmp
id = 2, max_id_hw = 3
id = 2, max_id_sw = 2
test3.bmp
id = 3, max_id_hw = 3
id = 3, max_id_sw = 3
test4.bmp
id = 4, max_id_hw = 4
id = 4, max_id_sw = 4
test5.bmp
id = 5, max_id_hw = 5
id = 5, max_id_sw = 5
test6.bmp
id = 6, max_id_hw = 6
id = 6, max_id_sw = 6
test7.bmp
id = 7, max_id_hw = 7
id = 7, max_id_sw = 7
test8.bmp
id = 8, max_id_hw = 8
id = 8, max_id_sw = 8
test9.bmp
id = 9, max_id_hw = 4
id = 9, max_id_sw = 4
INFO: [SIM 1] CSim done with 0 errors.
INFO: [SIM 3] *************** CSIM finish ***************
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
make: 'csim.exe' は更新済みです.
test0.bmp
id = 0, max_id_hw = 0
id = 0, max_id_sw = 6
test1.bmp
id = 1, max_id_hw = 5
id = 1, max_id_sw = 5
test2.bmp
id = 2, max_id_hw = 2
id = 2, max_id_sw = 2
test3.bmp
id = 3, max_id_hw = 3
id = 3, max_id_sw = 3
test4.bmp
id = 4, max_id_hw = 4
id = 4, max_id_sw = 4
test5.bmp
id = 5, max_id_hw = 5
id = 5, max_id_sw = 5
test6.bmp
id = 6, max_id_hw = 5
id = 6, max_id_sw = 5
test7.bmp
id = 7, max_id_hw = 7
id = 7, max_id_sw = 7
test8.bmp
id = 8, max_id_hw = 8
id = 8, max_id_sw = 8
test9.bmp
id = 9, max_id_hw = 2
id = 9, max_id_sw = 2
INFO: [SIM 1] CSim done with 0 errors.
INFO: [SIM 3] *************** CSIM finish ***************
// mnist_conv_nn_tb_bmp.cpp
// 2017/06/14 by marsee
// 2017/06/21 : test0.bmp ~ test9.bmpファイルを読み込んで結果を出力する
//
#include <stdio.h>
#include <ap_fixed.h>
#include <string.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#include "bmp_header.h"
int mnist_conv_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]);
int mnist_conv_nn_float(float in[784], float out[10]);
int max_ap_fixed(ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]);
int max_float(float out[10]);
int conv_rgb2y(int rgb);
#define NUM_ITERATIONS 10 // C Simulation
int main(){
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> result_ap_fixed[NUM_ITERATIONS][10];
float result_float[NUM_ITERATIONS][10];
int max_id_hw, max_id_sw, max_id_ref;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
char namestr[100];
FILE *fbmpr;
int *rd_bmp;
float *rd_bmpf;
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> *rd_bmp_apf;
int blue, green, red;
for(int i=0; i<NUM_ITERATIONS; i++){
sprintf(namestr, "test%d.bmp", i);
printf("%s\n", namestr);
if ((fbmpr = fopen(namestr, "rb")) == NULL){ // test??.bmp をオープン
fprintf(stderr, "Can't open test%d.bmp by binary read mode\n", i);
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
if ((rd_bmpf =(float *)malloc(sizeof(float) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmpf memory\n");
exit(1);
}
if ((rd_bmp_apf =(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> *)malloc(sizeof(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp_apf memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = conv_rgb2y(rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]);
rd_bmpf[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = 1.0 - (float)rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]/256.0; // 白地に黒文字から黒字に白文字に変換
//if (rd_bmpf[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] < 0.01)
//rd_bmpf[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = 0.0;
rd_bmp_apf[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)rd_bmpf[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x];
}
}
fclose(fbmpr);
/* for (int y=0; y<bmpihr.biHeight; y++){ for (int x=0; x<bmpihr.biWidth; x++){ printf("%f ", (float)rd_bmp_apf[y*bmpihr.biWidth+x]); } printf("\n"); } printf("\n"); */
mnist_conv_nn(rd_bmp_apf, &result_ap_fixed[i][0]);
mnist_conv_nn_float(rd_bmpf, &result_float[i][0]);
max_id_hw = max_ap_fixed(&result_ap_fixed[i][0]);
max_id_sw = max_float(&result_float[i][0]);
printf("id = %d, max_id_hw = %d\n", i, max_id_hw);
printf("id = %d, max_id_sw = %d\n", i, max_id_sw);
free(rd_bmp);
free(rd_bmpf);
free(rd_bmp_apf);
}
return(0);
}
int mnist_conv_nn_float(float in[784], float out[10]){
float buf[28][28];
float conv_out[10][24][24];
float pool_out[10][12][12];
float dot1[100];
float dot2[10];
buf_copy1: for(int i=0; i<28; i++)
buf_copy2: for(int j=0; j<28; j++)
buf[i][j] = in[i*28+j];
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<10; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_fweight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_fbias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<10; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<10; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
dot1[col] += pool_out[i][j][k]*af1_fweight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
out[col] = dot2[col];
}
return(0);
}
int max_ap_fixed(ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]){
int max_id;
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
int max_float(float out[10]){
int max_id;
float max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
// RGBからYへの変換
// RGBのフォーマットは、{8'd0, R(8bits), G(8bits), B(8bits)}, 1pixel = 32bits
// 輝度信号Yのみに変換する。変換式は、Y = 0.299R + 0.587G + 0.114B
// "YUVフォーマット及び YUV<->RGB変換"を参考にした。http://vision.kuee.kyoto-u.ac.jp/~hiroaki/firewire/yuv.html
// 2013/09/27 : float を止めて、すべてint にした
int conv_rgb2y(int rgb){
int r, g, b, y_f;
int y;
b = rgb & 0xff;
g = (rgb>>8) & 0xff;
r = (rgb>>16) & 0xff;
y_f = 77*r + 150*g + 29*b; //y_f = 0.299*r + 0.587*g + 0.114*b;の係数に256倍した
y = y_f >> 8; // 256で割る
return(y);
}
だそうです。sudo [installPath]/.xinstall/Vivado_[バージョン番号]/ xsetup –Uninstall
// mnist_conv_nn10.cpp
// 2017/06/12 by marsee
// 畳み込み層のカーネル数 10
//
#include <ap_fixed.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
int mnist_conv_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]){
#pragma HLS INTERFACE s_axilite register port=out
#pragma HLS INTERFACE axis register both port=in
#pragma HLS INTERFACE s_axilite port=return
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[28][28];
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> conv_out[10][24][24];
#pragma HLS ARRAY_PARTITION variable=conv_out cyclic factor=2 dim=3
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> pool_out[10][12][12];
#pragma HLS ARRAY_PARTITION variable=pool_out block factor=12 dim=3
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot2[10];
buf_copy1: for(int i=0; i<28; i++)
buf_copy2: for(int j=0; j<28; j++)
#pragma HLS PIPELINE II=1
buf[i][j] = in[i*28+j];
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<10; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
#pragma HLS PIPELINE II=7
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<10; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
#pragma HLS PIPELINE II=4
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<10; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
#pragma HLS PIPELINE II=3
dot1[col] += pool_out[i][j][k]*af1_weight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
#pragma HLS PIPELINE II=4
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
out[col] = dot2[col];
}
return(0);
}
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x20 ~
// 0x3f : Memory 'out_V' (10 * 12b)
// Word n : bit [11: 0] - out_V[2n]
// bit [27:16] - out_V[2n+1]
// others - reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
// mnist_conv_nn10.cpp
// 2017/06/12 by marsee
// 畳み込み層のカーネル数 10
//
#include <ap_fixed.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
int mnist_conv_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]){
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[28][28];
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> conv_out[10][24][24];
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> pool_out[10][12][12];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot2[10];
buf_copy1: for(int i=0; i<28; i++)
buf_copy2: for(int j=0; j<28; j++)
buf[i][j] = in[i*28+j];
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<10; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<10; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<10; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
dot1[col] += pool_out[i][j][k]*af1_weight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
out[col] = dot2[col];
}
return(0);
}
// mnist_conv_nn_tb.cpp
// 2017/06/14 by marsee
// 畳み込み層のカーネル数 10
//
#include <stdio.h>
#include <ap_fixed.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#include "mnist_data.h"
int mnist_conv_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]);
int mnist_conv_nn_float(float in[784], float out[10]);
int max_ap_fixed(ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]);
int max_float(float out[10]);
#define NUM_ITERATIONS 100 // C Simulation
//#define NUM_ITERATIONS 2 // C/RTL CoSimulation
int main(){
float t_tran_float[NUM_ITERATIONS][784];
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> result_ap_fixed[NUM_ITERATIONS][10];
float result_float[NUM_ITERATIONS][10];
int max_id_hw, max_id_sw, max_id_ref;
for(int i=0; i<NUM_ITERATIONS; i++)
for(int j=0; j<784; j++)
t_tran_float[i][j] = (float)t_train[i][j];
for(int i=0; i<NUM_ITERATIONS; i++){
mnist_conv_nn(&t_train[i][0], &result_ap_fixed[i][0]);
mnist_conv_nn_float(&t_tran_float[i][0], &result_float[i][0]);
}
int errflag=0;
for(int i=0; i<NUM_ITERATIONS; i++){
max_id_hw = max_ap_fixed(&result_ap_fixed[i][0]);
max_id_sw = max_float(&result_float[i][0]);
max_id_ref = max_float(&t_test[i][0]);
if(max_id_ref != max_id_hw){
printf("id = %d, max_id_ref = %d, max_id_hw = %d\n", i, max_id_ref, max_id_hw);
errflag = 1;
}
if(max_id_ref != max_id_sw){
printf("id = %d, max_id_ref = %d, max_id_sw = %d\n", i, max_id_ref, max_id_sw);
errflag = 1;
}
}
if(errflag == 0)
printf("No Error\n");
return(0);
}
int mnist_conv_nn_float(float in[784], float out[10]){
float buf[28][28];
float conv_out[10][24][24];
float pool_out[10][12][12];
float dot1[100];
float dot2[10];
buf_copy1: for(int i=0; i<28; i++)
buf_copy2: for(int j=0; j<28; j++)
buf[i][j] = in[i*28+j];
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<10; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_fweight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_fbias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<10; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<10; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
dot1[col] += pool_out[i][j][k]*af1_fweight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
out[col] = dot2[col];
}
return(0);
}
int max_ap_fixed(ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]){
int max_id;
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
int max_float(float out[10]){
int max_id;
float max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
conv_param = {'filter_num': 10,
# train_convnet.py
# 2017/06/06 FPGAによるハードウェア化をにらんで、量子化を行う by marsee
# 元になったコードは、https://github.com/oreilly-japan/deep-learning-from-scratch にあります。
# 改変したコードもMITライセンスとします。 2017/06/19 by marsee
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from trainer_int import Trainer
from simple_convnet_int import SimpleConvNet
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(flatten=False)
# 処理に時間のかかる場合はデータを削減
#x_train, t_train = x_train[:5000], t_train[:5000]
#x_test, t_test = x_test[:1000], t_test[:1000]
#max_epochs = 5
max_epochs = 20
network = SimpleConvNet(input_dim=(1,28,28),
conv_param = {'filter_num': 10, 'filter_size': 5, 'pad': 0, 'stride': 1},
#conv_param = {'filter_num': 30, 'filter_size': 5, 'pad': 0, 'stride': 1},
hidden_size=100, output_size=10, weight_init_std=0.01)
trainer = Trainer(network, x_train, t_train, x_test, t_test,
epochs=max_epochs, mini_batch_size=100,
optimizer='Adam', optimizer_param={'lr': 0.001},
evaluate_sample_num_per_epoch=1000)
trainer.train()
'''x_testn, t_testn = x_test[:500], t_test[:500]
test_accn = network.accuracy_msg(x_testn, t_testn)
print(test_accn)'''
'''train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
print(train_acc, test_acc)
train_acc_int = network.accuracy_int(x_train, t_train)'''
#test_acc_int = network.accuracy_int(x_test, t_test)
#print(test_acc_int)
# パラメータの保存
network.save_params("params.pkl")
print("Saved Network Parameters!")
# グラフの描画
markers = {'train': 'o', 'test': 's'}
x = np.arange(max_epochs)
plt.plot(x, trainer.train_acc_list, marker='o', label='train', markevery=2)
plt.plot(x, trainer.test_acc_list, marker='s', label='test', markevery=2)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.ylim(0, 1.0)
plt.legend(loc='lower right')
plt.show()
AF_OUT_MAG = 2 ** 5 # 出力の小数部
AF_OUT_INT = 2 ** 6 # 出力の整数部(+符号1ビット)
AF_WB_MAG = 2 ** 8 # 重みとバイアスの小数部
AF_WB_INT = 2 ** 1 # 重みとバイアスの整数部(+符号1ビット)
COV_OUT_MAG = 2 ** 7 # 出力の小数部
COV_OUT_INT = 2 ** 2 # 出力の整数部(+符号1ビット)
COV_WB_MAG = 2 ** 8 # 重みとバイアスの小数部
COV_WB_INT = 2 ** 1 # 重みとバイアスの整数部(+符号1ビット)
// mnist_conv_nn.cpp
// 2017/06/12 by marsee
// 畳み込み層のカーネル数 30
//
#include <ap_fixed.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
int mnist_conv_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]){
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[28][28];
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> conv_out[10][24][24];
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> pool_out[10][12][12];
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> dot2[10];
buf_copy1: for(int i=0; i<28; i++)
buf_copy2: for(int j=0; j<28; j++)
buf[i][j] = in[i*28+j];
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<30; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<30; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<30; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
dot1[col] += pool_out[i][j][k]*af1_weight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
out[col] = dot2[col];
}
return(0);
}
# 畳み込み層の重みをCヘッダファイルに書き出す
def fwrite_conv_weight(weight, wfile_name, float_wt_name, fixed_wt_name, MAGNIFICATION):
import datetime
import numpy as np
f = open(wfile_name, 'w')
todaytime = datetime.datetime.today()
f.write('// '+wfile_name+'\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('const float '+float_wt_name+'['+str(weight.shape[0])+']['+str(weight.shape[1])+']['+str(weight.shape[2])+']['+str(weight.shape[3])+'] = \n{\n')
for i in range(weight.shape[0]):
f.write("\t{\n")
for j in range(weight.shape[1]):
f.write("\t\t{\n")
for k in range(weight.shape[2]):
f.write("\t\t\t{")
for m in range(weight.shape[3]):
f.write(str(weight[i][j][k][m]))
if (m==weight.shape[3]-1):
f.write("}")
else:
f.write(",")
if (k==weight.shape[2]-1):
f.write("\n\t\t}\n")
else:
f.write(",\n")
if (j==weight.shape[1]-1):
f.write("\t}\n")
else:
f.write(",\n")
if (i==weight.shape[0]-1):
f.write("};\n")
else:
f.write("\t,\n")
f.write("\n")
f.write('const ap_fixed<'+str(int(np.log2(MAGNIFICATION))+1)+', 1, AP_TRN_ZERO, AP_SAT> '+fixed_wt_name+'['+str(weight.shape[0])+']['+str(weight.shape[1])+']['+str(weight.shape[2])+']['+str(weight.shape[3])+'] = \n{\n')
for i in range(weight.shape[0]):
f.write("\t{\n")
for j in range(weight.shape[1]):
f.write("\t\t{\n")
for k in range(weight.shape[2]):
f.write("\t\t\t{")
for m in range(weight.shape[3]):
w_int = int(weight[i][j][k][m]*MAGNIFICATION+0.5)
if (w_int > MAGNIFICATION-1):
w_int = MAGNIFICATION-1
elif (w_int < -MAGNIFICATION):
w_int = -MAGNIFICATION
f.write(str(w_int/MAGNIFICATION))
if (m==weight.shape[3]-1):
f.write("}")
else:
f.write(",")
if (k==weight.shape[2]-1):
f.write("\n\t\t}\n")
else:
f.write(",\n")
if (j==weight.shape[1]-1):
f.write("\t}\n")
else:
f.write(",\n")
if (i==weight.shape[0]-1):
f.write("};\n")
else:
f.write("\t,\n")
f.close()
MAGNIFICATION = 2 ** (9-1)
fwrite_conv_weight(network.params['W1'], 'conv1_weight.h', 'conv1_fweight', 'conv1_weight', MAGNIFICATION)
# 畳み込み層と全結合層のバイアスをCヘッダファイルに書き出す
def fwrite_bias(bias, wfile_name, float_b_name, fixed_wt_name, MAGNIFICATION):
import datetime
import numpy as np
f = open(wfile_name, 'w')
todaytime = datetime.datetime.today()
f.write('// '+wfile_name+'\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('const float '+float_b_name+'['+str(bias.shape[0])+'] = {\n\t')
for i in range(bias.shape[0]):
f.write(str(bias[i]))
if (i < bias.shape[0]-1):
f.write(", ")
f.write("\n};\n")
f.write("\n")
f.write('const ap_fixed<'+str(int(np.log2(MAGNIFICATION))+1)+', 1, AP_TRN_ZERO, AP_SAT> '+fixed_wt_name+'['+str(bias.shape[0])+'] = {\n\t')
for i in range(bias.shape[0]):
b_int = int(bias[i]*MAGNIFICATION+0.5)
if (b_int > MAGNIFICATION-1):
b_int = MAGNIFICATION-1
elif (b_int < -MAGNIFICATION):
b_int = -MAGNIFICATION
f.write(str(b_int/MAGNIFICATION))
if (i < bias.shape[0]-1):
f.write(", ")
f.write("\n};\n")
f.close()
fwrite_bias(network.params['b1'], 'conv1_bias.h', 'conv1_fbias', 'conv1_bias', MAGNIFICATION)
fwrite_bias(network.params['b2'], 'af1_bias.h', 'af1_fbias', 'af1_bias', MAGNIFICATION)
fwrite_bias(network.params['b3'], 'af2_bias.h', 'af2_fbias', 'af2_bias', MAGNIFICATION)
# 全結合層の重みをCヘッダファイルに書き出す
def fwrite_af_weight(weight, wfile_name, float_wt_name, fixed_wt_name, MAGNIFICATION):
import datetime
import numpy as np
f = open(wfile_name, 'w')
todaytime = datetime.datetime.today()
f.write('// '+wfile_name+'\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('const float '+float_wt_name+'['+str(weight.shape[0])+']['+str(weight.shape[1])+'] = {\n')
for i in range(weight.shape[0]):
f.write("\t{")
for j in range(weight.shape[1]):
f.write(str(weight[i][j]))
if (j==weight.shape[1]-1):
if (i==weight.shape[0]-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.write("\n")
f.write('const ap_fixed<'+str(int(np.log2(MAGNIFICATION))+1)+', 1, AP_TRN_ZERO, AP_SAT> '+fixed_wt_name+'['+str(weight.shape[0])+']['+str(weight.shape[1])+'] = {\n')
for i in range(weight.shape[0]):
f.write("\t{")
for j in range(weight.shape[1]):
w_int = int(weight[i][j]*MAGNIFICATION+0.5)
if (w_int > MAGNIFICATION-1):
w_int = MAGNIFICATION-1
elif (w_int < -MAGNIFICATION):
w_int = -MAGNIFICATION
f.write(str(w_int/MAGNIFICATION))
if (j==weight.shape[1]-1):
if(i==weight.shape[0]-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.close()
fwrite_af_weight(network.params['W2'], 'af1_weight.h', 'af1_fweight', 'af1_weight', MAGNIFICATION)
fwrite_af_weight(network.params['W3'], 'af2_weight.h', 'af2_fweight', 'af2_weight', MAGNIFICATION)
# layers_int.py
# 元になったコードは、https://github.com/oreilly-japan/deep-learning-from-scratch にあります。
# 改変したコードもMITライセンスとします。 2017/06/14 by marsee
# coding: utf-8
import numpy as np
from common.functions import *
from common.util import im2col, col2im
AF_OUT_MAG = 2 ** 5 # 出力の小数部
AF_OUT_INT = 2 ** 6 # 出力の整数部(+符号1ビット)
AF_WB_MAG = 2 ** 8 # 重みとバイアスの小数部
AF_WB_INT = 2 ** 1 # 重みとバイアスの整数部(+符号1ビット)
COV_OUT_MAG = 2 ** 7 # 出力の小数部
COV_OUT_INT = 2 ** 2 # 出力の整数部(+符号1ビット)
COV_WB_MAG = 2 ** 8 # 重みとバイアスの小数部
COV_WB_INT = 2 ** 1 # 重みとバイアスの整数部(+符号1ビット)
DEBUG = 1;
class Relu:
def __init__(self):
self.mask = None
def forward_int(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def forward(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def forward_msg(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def backward(self, dout):
dout[self.mask] = 0
dx = dout
return dx
class Sigmoid:
def __init__(self):
self.out = None
def forward_int(self, x):
out = sigmoid(x)
self.out = out
return out
def forward(self, x):
out = sigmoid(x)
self.out = out
return out
def forward_msg(self, x):
out = sigmoid(x)
self.out = out
return out
def backward(self, dout):
dx = dout * (1.0 - self.out) * self.out
return dx
class Affine:
def __init__(self, W, b):
self.W = W
self.b = b
self.W_int = W
self.b_int = b
self.x = None
self.original_x_shape = None
# 重み・バイアスパラメータの微分
self.dW = None
self.db = None
def forward_int(self, x):
if (DEBUG == 1):
print("x shape ={0}".format(x.shape))
print("np.max(self.W) = {0}".format(np.max(self.W)))
# テンソル対応
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
# x は量子化できているはず
# wとbをINT8の範囲に修正 2017/05/22 by marsee
self.W_int = np.array(self.W*AF_WB_MAG+0.5, dtype=int)
self.b_int = np.array(self.b*AF_WB_MAG+0.5, dtype=int)
for i in range(self.W_int.shape[0]):
for j in range(self.W_int.shape[1]):
if (self.W_int[i][j] > AF_WB_MAG*AF_WB_INT/2-1):
self.W_int[i][j] = AF_WB_MAG*AF_WB_INT/2-1
elif (self.W_int[i][j] < -AF_WB_MAG*AF_WB_INT/2):
self.W_int[i][j] = -AF_WB_MAG*AF_WB_INT/2;
for i in range(self.b_int.shape[0]):
if (self.b_int[i] > AF_WB_MAG*AF_WB_INT/2-1):
self.b_int[i] = AF_WB_MAG*AF_WB_INT/2-1
elif (self.b_int[i] < -AF_WB_MAG*AF_WB_INT/2):
self.b_int[i] = -AF_WB_MAG*AF_WB_INT/2
self.W_int = np.array(self.W_int, dtype=float)
self.b_int = np.array(self.b_int, dtype=float)
self.W_int = self.W_int/AF_WB_MAG
self.b_int = self.b_int/AF_WB_MAG
out = np.dot(self.x, self.W_int) + self.b_int
if (DEBUG == 1):
print("np.max(self.W) = {0}".format(np.max(self.W)))
print("np.max(self.b) = {0}".format(np.max(self.b)))
print("x reshape ={0}".format(x.shape))
print("np.max(x) = {0}".format(np.max(x)))
print("np.min(x) = {0}".format(np.min(x)))
#print("x = {0}".format(self.x))
print(self.W_int.shape)
print("np.max(self.W_int) = {0}".format(np.max(self.W_int)))
print("np.min(self.W_int) = {0}".format(np.min(self.W_int)))
print(self.b_int.shape)
print("np.max(self.b_int) = {0}".format(np.max(self.b_int)))
print("np.min(self.b_int) = {0}".format(np.min(self.b_int)))
print(out.shape)
print("np.max(out) = {0}".format(np.max(out)))
print("np.min(out) = {0}".format(np.min(out)))
#print("out = {0}".format(out))
out = np.array(out*AF_OUT_MAG+0.5, dtype=int)
for i in range(out.shape[0]):
for j in range(out.shape[1]):
if (out[i][j] > AF_OUT_MAG*AF_OUT_INT/2-1):
out[i][j] = AF_OUT_MAG*AF_OUT_INT/2-1
elif (out[i][j] < -AF_OUT_MAG*AF_OUT_INT/2):
out[i][j] = -AF_OUT_MAG*AF_OUT_INT/2
out = np.array(out, dtype=float)
out = out/AF_OUT_MAG
if (DEBUG == 1):
print("np.max(out2) = {0}".format(np.max(out)))
print("np.min(out2) = {0}".format(np.min(out)))
return out
def forward(self, x):
# テンソル対応
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
out = np.dot(self.x, self.W) + self.b
return out
def forward_msg(self, x):
print("x shape ={0}".format(x.shape))
# テンソル対応
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
out = np.dot(self.x, self.W) + self.b
print("x reshape ={0}".format(x.shape))
print("np.max(x) = {0}".format(np.max(x)))
print("np.min(x) = {0}".format(np.min(x)))
#print("x = {0}".format(self.x))
print(self.W.shape)
print("np.max(self.W) = {0}".format(np.max(self.W)))
print("np.min(self.W) = {0}".format(np.min(self.W)))
print(self.b.shape)
print("np.max(self.b) = {0}".format(np.max(self.b)))
print("np.min(self.b) = {0}".format(np.min(self.b)))
print(out.shape)
print("np.max(out) = {0}".format(np.max(out)))
print("np.min(out) = {0}".format(np.min(out)))
#print("out = {0}".format(out))
return out
def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis=0)
dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応)
return dx
class SoftmaxWithLoss:
def __init__(self):
self.loss = None
self.y = None # softmaxの出力
self.t = None # 教師データ
def forward(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def forward_msg(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def forward_int(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def backward(self, dout=1):
batch_size = self.t.shape[0]
if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合
dx = (self.y - self.t) / batch_size
else:
dx = self.y.copy()
dx[np.arange(batch_size), self.t] -= 1
dx = dx / batch_size
return dx
class Dropout:
"""
http://arxiv.org/abs/1207.0580
"""
def __init__(self, dropout_ratio=0.5):
self.dropout_ratio = dropout_ratio
self.mask = None
def forward_int(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def forward_msg(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def forward(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def backward(self, dout):
return dout * self.mask
class BatchNormalization:
"""
http://arxiv.org/abs/1502.03167
"""
def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
self.gamma = gamma
self.beta = beta
self.momentum = momentum
self.input_shape = None # Conv層の場合は4次元、全結合層の場合は2次元
# テスト時に使用する平均と分散
self.running_mean = running_mean
self.running_var = running_var
# backward時に使用する中間データ
self.batch_size = None
self.xc = None
self.std = None
self.dgamma = None
self.dbeta = None
def forward_int(self, x, train_flg=True):
self.input_shape = x.shape
if x.ndim != 2:
N, C, H, W = x.shape
x = x.reshape(N, -1)
out = self.__forward(x, train_flg)
return out.reshape(*self.input_shape)
def forward_msg(self, x, train_flg=True):
self.input_shape = x.shape
if x.ndim != 2:
N, C, H, W = x.shape
x = x.reshape(N, -1)
out = self.__forward(x, train_flg)
return out.reshape(*self.input_shape)
def forward(self, x, train_flg=True):
self.input_shape = x.shape
if x.ndim != 2:
N, C, H, W = x.shape
x = x.reshape(N, -1)
out = self.__forward(x, train_flg)
return out.reshape(*self.input_shape)
def __forward(self, x, train_flg):
if self.running_mean is None:
N, D = x.shape
self.running_mean = np.zeros(D)
self.running_var = np.zeros(D)
if train_flg:
mu = x.mean(axis=0)
xc = x - mu
var = np.mean(xc**2, axis=0)
std = np.sqrt(var + 10e-7)
xn = xc / std
self.batch_size = x.shape[0]
self.xc = xc
self.xn = xn
self.std = std
self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
self.running_var = self.momentum * self.running_var + (1-self.momentum) * var
else:
xc = x - self.running_mean
xn = xc / ((np.sqrt(self.running_var + 10e-7)))
out = self.gamma * xn + self.beta
return out
def backward(self, dout):
if dout.ndim != 2:
N, C, H, W = dout.shape
dout = dout.reshape(N, -1)
dx = self.__backward(dout)
dx = dx.reshape(*self.input_shape)
return dx
def __backward(self, dout):
dbeta = dout.sum(axis=0)
dgamma = np.sum(self.xn * dout, axis=0)
dxn = self.gamma * dout
dxc = dxn / self.std
dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
dvar = 0.5 * dstd / self.std
dxc += (2.0 / self.batch_size) * self.xc * dvar
dmu = np.sum(dxc, axis=0)
dx = dxc - dmu / self.batch_size
self.dgamma = dgamma
self.dbeta = dbeta
return dx
class Convolution:
def __init__(self, W, b, stride=1, pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
self.W_int = W
self.b_int = b
# 中間データ(backward時に使用)
self.x = None
self.col = None
self.col_W = None
self.col_W_int = None
# 重み・バイアスパラメータの勾配
self.dW = None
self.db = None
def forward_int(self, x):
# wとbをINT8の範囲に修正 2017/06/06 by marsee
self.W_int = np.array(self.W*COV_WB_MAG+0.5, dtype=int)
self.b_int = np.array(self.b*COV_WB_MAG+0.5, dtype=int)
for i in range(self.W_int.shape[0]):
for j in range(self.W_int.shape[1]):
for k in range(self.W_int.shape[2]):
for m in range(self.W_int.shape[3]):
if (self.W_int[i][j][k][m] > COV_WB_MAG*COV_WB_INT/2-1):
self.W_int[i][j][k][m] = COV_WB_MAG*COV_WB_INT/2-1
elif (self.W_int[i][j][k][m] < -COV_WB_MAG*COV_WB_INT/2):
self.W_int[i][j][k][m] = -COV_WB_MAG*COV_WB_INT/2;
for i in range(self.b_int.shape[0]):
if (self.b_int[i] > COV_WB_MAG*COV_WB_INT/2-1):
self.b_int[i] = COV_WB_MAG*COV_WB_INT/2-1
elif (self.b_int[i] < -COV_WB_MAG*COV_WB_INT/2):
self.b_int[i] = -COV_WB_MAG*COV_WB_INT/2
self.W_int = np.array(self.W_int, dtype=float)
self.b_int = np.array(self.b_int, dtype=float)
self.W_int = self.W_int/COV_WB_MAG
self.b_int = self.b_int/COV_WB_MAG
FN, C, FH, FW = self.W_int.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W_int = self.W_int.reshape(FN, -1).T
out = np.dot(col, col_W_int) + self.b_int
if (DEBUG == 1):
print(x.shape)
print("Conv col.shape = {0}".format(col.shape))
print("Conv col_W.shape = {0}".format(col_W_int.shape))
print("Conv np.max(x) = {0}".format(np.max(x)))
print("Conv np.min(x) = {0}".format(np.min(x)))
#print("Conv x = {0}".format(self.x))
print(self.W_int.shape)
print("Conv np.max(self.W_int) = {0}".format(np.max(self.W_int)))
print("Conv np.min(self.W_int) = {0}".format(np.min(self.W_int)))
print(self.b_int.shape)
print("Conv np.max(self.b_int) = {0}".format(np.max(self.b_int)))
print("Conv np.min(self.b_int) = {0}".format(np.min(self.b_int)))
print("Conv out.shape = {0}".format(out.shape))
print("Conv np.max(out) = {0}".format(np.max(out)))
print("Conv np.min(out) = {0}".format(np.min(out)))
#print("Conv out = {0}".format(out))
out = np.array(out*COV_OUT_MAG+0.5, dtype=int)
for i in range(out.shape[0]):
for j in range(out.shape[1]):
if (out[i][j] > COV_OUT_MAG*COV_OUT_INT/2-1):
out[i][j] = COV_OUT_MAG*COV_OUT_INT/2-1
elif (out[i][j] < -COV_OUT_MAG*COV_OUT_INT/2):
out[i][j] = -COV_OUT_MAG*COV_OUT_INT/2
out = np.array(out, dtype=float)
out = out/COV_OUT_MAG
if (DEBUG == 1):
print("Conv np.max(out2) = {0}".format(np.max(out)))
print("Conv np.min(out2) = {0}".format(np.min(out)))
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
if (DEBUG == 1):
print("Conv out.reshape = {0}".format(out.shape))
self.x = x
self.col = col
self.col_W_int = col_W_int
return out
def forward(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.col = col
self.col_W = col_W
return out
def forward_msg(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
print(x.shape)
print("Conv col.shape = {0}".format(col.shape))
print("Conv col_W.shape = {0}".format(col_W.shape))
print("Conv np.max(x) = {0}".format(np.max(x)))
print("Conv np.min(x) = {0}".format(np.min(x)))
#print("Conv x = {0}".format(self.x))
print(self.W.shape)
print("Conv np.max(self.W) = {0}".format(np.max(self.W)))
print("Conv np.min(self.W) = {0}".format(np.min(self.W)))
print(self.b.shape)
print("Conv np.max(self.b) = {0}".format(np.max(self.b)))
print("Conv np.min(self.b) = {0}".format(np.min(self.b)))
print("Conv out.shape = {0}".format(out.shape))
print("Conv np.max(out) = {0}".format(np.max(out)))
print("Conv np.min(out) = {0}".format(np.min(out)))
#print("Conv out = {0}".format(out))
print("Conv np.max(out2) = {0}".format(np.max(out)))
print("Conv np.min(out2) = {0}".format(np.min(out)))
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
print("Conv out.reshape = {0}".format(out.shape))
self.x = x
self.col = col
self.col_W = col_W
return out
def backward(self, dout):
FN, C, FH, FW = self.W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
self.db = np.sum(dout, axis=0)
self.dW = np.dot(self.col.T, dout)
self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
dcol = np.dot(dout, self.col_W.T)
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
return dx
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.x = None
self.arg_max = None
def forward_int(self, x):
if (DEBUG == 1):
print("Pooling x.shape = {0}".format(x.shape))
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
if (DEBUG == 1):
print("Pooling out.shape = {0}".format(out.shape))
return out
def forward_msg(self, x):
print("Pooling x.shape = {0}".format(x.shape))
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
print("Pooling out.shape = {0}".format(out.shape))
return out
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool_h * self.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
return dx
# simple_convnet_int.py
# 元になったコードは、https://github.com/oreilly-japan/deep-learning-from-scratch にあります。
# 改変したコードもMITライセンスとします。 2017/06/14 by marsee
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
import pickle
import numpy as np
from layers_int import *
#from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict
class SimpleConvNet:
"""単純なConvNet
conv - relu - pool - affine - relu - affine - softmax
Parameters
----------
input_size : 入力サイズ(MNISTの場合は784)
hidden_size_list : 隠れ層のニューロンの数のリスト(e.g. [100, 100, 100])
output_size : 出力サイズ(MNISTの場合は10)
activation : 'relu' or 'sigmoid'
weight_init_std : 重みの標準偏差を指定(e.g. 0.01)
'relu'または'he'を指定した場合は「Heの初期値」を設定
'sigmoid'または'xavier'を指定した場合は「Xavierの初期値」を設定
"""
def __init__(self, input_dim=(1, 28, 28),
conv_param={'filter_num':30, 'filter_size':5, 'pad':0, 'stride':1},
hidden_size=100, output_size=10, weight_init_std=0.01):
filter_num = conv_param['filter_num']
filter_size = conv_param['filter_size']
filter_pad = conv_param['pad']
filter_stride = conv_param['stride']
input_size = input_dim[1]
conv_output_size = (input_size - filter_size + 2*filter_pad) / filter_stride + 1
pool_output_size = int(filter_num * (conv_output_size/2) * (conv_output_size/2))
# 重みの初期化
self.params = {}
self.params['W1'] = weight_init_std * \
np.random.randn(filter_num, input_dim[0], filter_size, filter_size)
self.params['b1'] = np.zeros(filter_num)
self.params['W2'] = weight_init_std * \
np.random.randn(pool_output_size, hidden_size)
self.params['b2'] = np.zeros(hidden_size)
self.params['W3'] = weight_init_std * \
np.random.randn(hidden_size, output_size)
self.params['b3'] = np.zeros(output_size)
# レイヤの生成
self.layers = OrderedDict()
self.layers['Conv1'] = Convolution(self.params['W1'], self.params['b1'],
conv_param['stride'], conv_param['pad'])
self.layers['Relu1'] = Relu()
self.layers['Pool1'] = Pooling(pool_h=2, pool_w=2, stride=2)
self.layers['Affine1'] = Affine(self.params['W2'], self.params['b2'])
self.layers['Relu2'] = Relu()
self.layers['Affine2'] = Affine(self.params['W3'], self.params['b3'])
self.last_layer = SoftmaxWithLoss()
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def predict_msg(self, x):
for layer in self.layers.values():
x = layer.forward_msg(x)
return x
def predict_int(self, x):
for layer in self.layers.values():
x = layer.forward_int(x)
return x
def loss(self, x, t):
"""損失関数を求める
引数のxは入力データ、tは教師ラベル
"""
y = self.predict(x)
return self.last_layer.forward(y, t)
def loss_int(self, x, t):
"""損失関数を求める
引数のxは入力データ、tは教師ラベル
"""
y = self.predict_int(x)
return self.last_layer.forward_int(y, t)
def accuracy(self, x, t, batch_size=100):
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def accuracy_msg(self, x, t, batch_size=100):
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict_msg(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def accuracy_int(self, x, t, batch_size=100):
if t.ndim != 1 : t = np.argmax(t, axis=1)
acc = 0.0
for i in range(int(x.shape[0] / batch_size)):
tx = x[i*batch_size:(i+1)*batch_size]
tt = t[i*batch_size:(i+1)*batch_size]
y = self.predict_int(tx)
y = np.argmax(y, axis=1)
acc += np.sum(y == tt)
return acc / x.shape[0]
def numerical_gradient(self, x, t):
"""勾配を求める(数値微分)
Parameters
----------
x : 入力データ
t : 教師ラベル
Returns
-------
各層の勾配を持ったディクショナリ変数
grads['W1']、grads['W2']、...は各層の重み
grads['b1']、grads['b2']、...は各層のバイアス
"""
loss_w = lambda w: self.loss(x, t)
grads = {}
for idx in (1, 2, 3):
grads['W' + str(idx)] = numerical_gradient(loss_w, self.params['W' + str(idx)])
grads['b' + str(idx)] = numerical_gradient(loss_w, self.params['b' + str(idx)])
return grads
def gradient(self, x, t):
"""勾配を求める(誤差逆伝搬法)
Parameters
----------
x : 入力データ
t : 教師ラベル
Returns
-------
各層の勾配を持ったディクショナリ変数
grads['W1']、grads['W2']、...は各層の重み
grads['b1']、grads['b2']、...は各層のバイアス
"""
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.last_layer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 設定
grads = {}
grads['W1'], grads['b1'] = self.layers['Conv1'].dW, self.layers['Conv1'].db
grads['W2'], grads['b2'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W3'], grads['b3'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads
def save_params(self, file_name="params.pkl"):
params = {}
for key, val in self.params.items():
params[key] = val
with open(file_name, 'wb') as f:
pickle.dump(params, f)
def load_params(self, file_name="params.pkl"):
with open(file_name, 'rb') as f:
params = pickle.load(f)
for key, val in params.items():
self.params[key] = val
for i, key in enumerate(['Conv1', 'Affine1', 'Affine2']):
self.layers[key].W = self.params['W' + str(i+1)]
self.layers[key].b = self.params['b' + str(i+1)]
(100, 1, 28, 28)
Conv col.shape = (57600, 25)
Conv col_W.shape = (25, 30)
Conv np.max(x) = 1.0
Conv np.min(x) = 0.0
(30, 1, 5, 5)
Conv np.max(self.W_int) = 0.50390625
Conv np.min(self.W_int) = -0.8046875
(30,)
Conv np.max(self.b_int) = -0.01171875
Conv np.min(self.b_int) = -0.37890625
Conv out.shape = (57600, 30)
Conv np.max(out) = 3.2695772065781057
Conv np.min(out) = -4.958961396710947
Conv np.max(out2) = 1.9921875
Conv np.min(out2) = -2.0
Conv out.reshape = (100, 30, 24, 24)
Pooling x.shape = (100, 30, 24, 24)
Pooling out.shape = (100, 30, 12, 12)
x shape =(100, 30, 12, 12)
np.max(self.W) = 0.7367957391676244
np.max(self.W) = 0.7367957391676244
np.max(self.b) = 0.13286493647098715
x reshape =(100, 4320)
np.max(x) = 1.9921875
np.min(x) = 0.0
(4320, 100)
np.max(self.W_int) = 0.73828125
np.min(self.W_int) = -0.78125
(100,)
np.max(self.b_int) = 0.1328125
np.min(self.b_int) = -0.0859375
(100, 100)
np.max(out) = 38.628021240234375
np.min(out) = -45.4913330078125
np.max(out2) = 31.96875
np.min(out2) = -32.0
x shape =(100, 100)
np.max(self.W) = 0.34009935565012406
np.max(self.W) = 0.34009935565012406
np.max(self.b) = 0.06031450057979193
x reshape =(100, 100)
np.max(x) = 31.96875
np.min(x) = 0.0
(100, 10)
np.max(self.W_int) = 0.33984375
np.min(self.W_int) = -0.5234375
(10,)
np.max(self.b_int) = 0.05859375
np.min(self.b_int) = -0.0703125
(100, 10)
np.max(out) = 33.219970703125
np.min(out) = -61.742919921875
np.max(out2) = 31.96875
np.min(out2) = -32.0
// mnist_nn.cpp
// 2017/06/01 by marsee
//
#include <stdio.h>
#include <ap_fixed.h>
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
int mnist_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[10]){
#pragma HLS INTERFACE s_axilite port=out
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=in
float buf[784];
float dot1[50];
float dot2[10];
buf_copy: for(int i=0; i<784; i++)
buf[i] = (float)in[i];
af1_dot1: for(int col=0; col<50; col++){
dot1[col] = 0;
af1_dot2: for(int row=0; row<784; row++){
dot1[col] += buf[row]*af1_fweight[row][col];
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<50; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
if(dot2[col] < 0) // ReLU
dot2[col] = 0;
out[col] = (ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT>)dot2[col];
}
return(0);
}
キーでリモートデスクトップの全画面、ウインドウ表示を切り替えられるというのを見て試してみたが、とっても具合が良い。Ctrl + Alt + Break
id = 15, max_id_ref = 4, max_id_sw = 9
id = 24, max_id_ref = 7, max_id_hw = 4
id = 49, max_id_ref = 2, max_id_hw = 4
id = 49, max_id_ref = 2, max_id_sw = 4
id = 33, max_id_ref = 8, max_id_sw = 7
id = 41, max_id_ref = 9, max_id_hw = 5
id = 47, max_id_ref = 4, max_id_hw = 2
id = 47, max_id_ref = 4, max_id_sw = 2
id = 59, max_id_ref = 6, max_id_hw = 0
id = 59, max_id_ref = 6, max_id_sw = 0
id = 90, max_id_ref = 8, max_id_hw = 4
id = 90, max_id_ref = 8, max_id_sw = 4
# MNISTのデータをCの配列に出力し、ファイルに書き込み
# coding: utf-8
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
import datetime
OUTPUT_DATA_NUM = 100 # 出力するMNISTのテストデータ数 10000までの数
OFFSET = 100 # MNISTデータセットのオフセット、100だったら100番目からOUTPUT_DATA_NUM個を出力する
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
f = open("mnist_data.h", 'w')
todaytime = datetime.datetime.today()
f.write('// mnist_data.h\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> t_train['+str(OUTPUT_DATA_NUM)+']['+str(x_test.shape[1])+'] = {\n')
for i in range(OFFSET, OFFSET+OUTPUT_DATA_NUM):
f.write("\t{")
for j in range(x_test.shape[1]):
f.write(str(x_test[i][j]))
if (j==x_test.shape[1]-1):
if (i==OUTPUT_DATA_NUM-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.write('int t_train_256['+str(OUTPUT_DATA_NUM)+']['+str(x_test.shape[1])+'] = {\n')
for i in range(OFFSET, OFFSET+OUTPUT_DATA_NUM):
f.write("\t{")
for j in range(x_test.shape[1]):
f.write(str(int(x_test[i][j]*256)))
if (j==x_test.shape[1]-1):
if (i==OUTPUT_DATA_NUM-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.write("\n")
f.write('float t_test['+str(OUTPUT_DATA_NUM)+']['+str(t_test.shape[1])+'] = {\n')
for i in range(OFFSET, OFFSET+OUTPUT_DATA_NUM):
f.write("\t{")
for j in range(t_test.shape[1]):
f.write(str(t_test[i][j]))
if (j==t_test.shape[1]-1):
if (i==OUTPUT_DATA_NUM-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.close()
// mnist_nn.cpp
// 2017/06/01 by marsee
//
#include <stdio.h>
#include <ap_fixed.h>
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
int mnist_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[10]){
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[784];
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> dot1[50];
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> dot2[10];
buf_copy: for(int i=0; i<784; i++)
buf[i] = in[i];
af1_dot1: for(int col=0; col<50; col++){
dot1[col] = 0;
af1_dot2: for(int row=0; row<784; row++){
dot1[col] += buf[row]*af1_weight[row][col];
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<50; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
if(dot2[col] < 0) // ReLU
dot2[col] = 0;
out[col] = dot2[col];
}
return(0);
}
// mnist_nn_tb.cpp
// 2017/06/02 by marsee
//
#include <stdio.h>
#include <ap_fixed.h>
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#include "mnist_data.h"
int mnist_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[784], ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[10]);
int mnist_nn_float(float in[784], float out[10]);
int max_ap_fixed(ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[10]);
int max_float(float out[10]);
#define NUM_ITERATIONS 100 // C Simulation
// #define NUM_ITERATIONS 2 // C/RTL CoSimulation
int main(){
float t_tran_float[NUM_ITERATIONS][784];
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> result_ap_fixed[NUM_ITERATIONS][10];
float result_float[NUM_ITERATIONS][10];
int max_id_hw, max_id_sw, max_id_ref;
for(int i=0; i<NUM_ITERATIONS; i++)
for(int j=0; j<784; j++)
t_tran_float[i][j] = (float)t_train[i][j];
for(int i=0; i<NUM_ITERATIONS; i++){
mnist_nn(&t_train[i][0], &result_ap_fixed[i][0]);
mnist_nn_float(&t_tran_float[i][0], &result_float[i][0]);
}
int errflag=0;
for(int i=0; i<NUM_ITERATIONS; i++){
max_id_hw = max_ap_fixed(&result_ap_fixed[i][0]);
max_id_sw = max_float(&result_float[i][0]);
max_id_ref = max_float(&t_test[i][0]);
if(max_id_ref != max_id_hw){
printf("id = %d, max_id_ref = %d, max_id_hw = %d\n", i, max_id_ref, max_id_hw);
errflag = 1;
}
if(max_id_ref != max_id_sw){
printf("id = %d, max_id_ref = %d, max_id_sw = %d\n", i, max_id_ref, max_id_sw);
errflag = 1;
}
}
if(errflag == 0)
printf("No Error\n");
return(0);
}
int mnist_nn_float(float in[784], float out[10]){
float dot1[50];
float dot2[10];
af1_dot1: for(int col=0; col<50; col++){
dot1[col] = 0;
af1_dot2: for(int row=0; row<784; row++){
dot1[col] += in[row]*af1_fweight[row][col];
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<50; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
if(dot2[col] < 0) // ReLU
dot2[col] = 0;
out[col] = dot2[col];
}
return(0);
}
int max_ap_fixed(ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[10]){
int max_id;
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
int max_float(float out[10]){
int max_id;
float max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
というようにimport の順番を逆にしたらエラーが解消したという投稿があったので、真似をしてみることにした。import lasagne
import theano
def fwrite_weight(weight, wfile_name, float_wt_name, fixed_wt_name, MAGNIFICATION, row_size, column_size):
import datetime
import numpy as np
f = open(wfile_name, 'w')
todaytime = datetime.datetime.today()
f.write('// '+wfile_name+'\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('const float '+float_wt_name+'['+str(row_size)+']['+str(column_size)+'] = {\n')
for i in range(weight.shape[0]):
f.write("\t{")
for j in range(weight.shape[1]):
f.write(str(weight[i][j]))
if (j==weight.shape[1]-1):
if (i==weight.shape[0]-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.write("\n")
f.write('const ap_fixed<'+str(int(np.log2(MAGNIFICATION))+1)+', 1, AP_TRN_ZERO, AP_SAT> '+fixed_wt_name+'['+str(row_size)+']['+str(column_size)+'] = {\n')
for i in range(weight.shape[0]):
f.write("\t{")
for j in range(weight.shape[1]):
w_int = int(weight[i][j]*MAGNIFICATION+0.5)
if (w_int > MAGNIFICATION-1):
w_int = MAGNIFICATION-1
elif (w_int < -MAGNIFICATION):
w_int = -MAGNIFICATION
f.write(str(w_int/MAGNIFICATION))
if (j==weight.shape[1]-1):
if(i==weight.shape[0]-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.close()
MAGNIFICATION = 2 ** (9-1)
fwrite_weight(network.params['W1'], 'af1_weight.h', 'af1_fweight', 'af1_weight', MAGNIFICATION, 784, 50)
fwrite_weight(network.params['W2'], 'af2_weight.h', 'af2_fweight', 'af2_weight', MAGNIFICATION, 50, 10)
def fwrite_bias(bias, wfile_name, float_b_name, fixed_wt_name, MAGNIFICATION, size):
import datetime
import numpy as np
f = open(wfile_name, 'w')
todaytime = datetime.datetime.today()
f.write('// '+wfile_name+'\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('const float '+float_b_name+'['+str(size)+'] = {\n\t')
for i in range(bias.shape[0]):
f.write(str(bias[i]))
if (i < bias.shape[0]-1):
f.write(", ")
f.write("\n};\n")
f.write("\n")
f.write('const ap_fixed<'+str(int(np.log2(MAGNIFICATION))+1)+', 1, AP_TRN_ZERO, AP_SAT> '+fixed_wt_name+'['+str(size)+'] = {\n\t')
for i in range(bias.shape[0]):
b_int = int(bias[i]*MAGNIFICATION+0.5)
if (b_int > MAGNIFICATION-1):
b_int = MAGNIFICATION-1
elif (b_int < -MAGNIFICATION):
b_int = -MAGNIFICATION
f.write(str(b_int/MAGNIFICATION))
if (i < bias.shape[0]-1):
f.write(", ")
f.write("\n};\n")
f.close()
fwrite_bias(network.params['b1'], 'af1_bias.h', 'af1_fbias', 'af1_bias', MAGNIFICATION, 50)
fwrite_bias(network.params['b2'], 'af2_bias.h', 'af2_fbias', 'af2_bias', MAGNIFICATION, 10)
# MNISTのデータをCの配列に出力し、ファイルに書き込み
# coding: utf-8
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
import datetime
OUTPUT_DATA_NUM = 100 # 出力するMNISTのテストデータ数 10000までの数
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
f = open("mnist_data.h", 'w')
todaytime = datetime.datetime.today()
f.write('// mnist_data.h\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> t_train['+str(OUTPUT_DATA_NUM)+'][784] = {\n')
for i in range(OUTPUT_DATA_NUM):
f.write("\t{")
for j in range(x_test.shape[1]):
f.write(str(x_test[i][j]))
if (j==x_test.shape[1]-1):
if (i==OUTPUT_DATA_NUM-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.write('int t_train_256['+str(OUTPUT_DATA_NUM)+'][784] = {\n')
for i in range(OUTPUT_DATA_NUM):
f.write("\t{")
for j in range(x_test.shape[1]):
f.write(str(int(x_test[i][j]*256)))
if (j==x_test.shape[1]-1):
if (i==OUTPUT_DATA_NUM-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.write("\n")
f.write('float t_test['+str(OUTPUT_DATA_NUM)+'][784] = {\n')
for i in range(OUTPUT_DATA_NUM):
f.write("\t{")
for j in range(t_test.shape[1]):
f.write(str(t_test[i][j]))
if (j==t_test.shape[1]-1):
if (i==OUTPUT_DATA_NUM-1):
f.write("}\n")
else:
f.write("},\n")
else:
f.write(", ")
f.write("};\n")
f.close()
def view_mnist(first_offset, last_offset):
# MNISTデータセットのfirst_offset(画像の配列の番号)からlast_offset-1までの画像を表示する
# 「ゼロから作るDeep_Learning」第8章のコードを一部引用しています
# coding: utf-8
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
import matplotlib.pyplot as plt
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=False, one_hot_label=True)
fig = plt.figure()
fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.2, wspace=0.2)
current_view = 1
for i in range(first_offset, last_offset):
ax = fig.add_subplot(4, 5, current_view, xticks=[], yticks=[])
ax.imshow(x_test[i].reshape(28, 28), cmap=plt.cm.gray_r, interpolation='nearest')
current_view += 1
plt.show()
view_mnist(0, 10)
# layers_int.py
# 元になったコードは、https://github.com/oreilly-japan/deep-learning-from-scratch にあります。
# 改変したコードもMITライセンスとします。 2017/06/02 by marsee
# coding: utf-8
import numpy as np
from common.functions import *
from common.util import im2col, col2im
MAGNIFICATION = 2 ** (9-1)
RANGE = 2 ** 4
class Relu:
def __init__(self):
self.mask = None
def forward_int(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def forward(self, x):
self.mask = (x <= 0)
out = x.copy()
out[self.mask] = 0
return out
def backward(self, dout):
dout[self.mask] = 0
dx = dout
return dx
class Sigmoid:
def __init__(self):
self.out = None
def forward_int(self, x):
out = sigmoid(x)
self.out = out
return out
def forward(self, x):
out = sigmoid(x)
self.out = out
return out
def backward(self, dout):
dx = dout * (1.0 - self.out) * self.out
return dx
class Affine:
def __init__(self, W, b):
self.W =W
self.b = b
self.x = None
self.original_x_shape = None
# 重み・バイアスパラメータの微分
self.dW = None
self.db = None
self.bw=MAGNIFICATION
def forward_int(self, x):
# テンソル対応
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
# x は量子化できているはず
# wとbをINT8の範囲に修正 2017/05/22 by marsee
self.W = np.array(self.W*self.bw+0.5, dtype=int)
self.b = np.array(self.b*self.bw+0.5, dtype=int)
for i in range(self.W.shape[0]):
for j in range(self.W.shape[1]):
if (self.W[i][j] > self.bw-1):
self.W[i][j] = self.bw-1
elif (self.W[i][j] < -self.bw):
self.W[i][j] = -self.bw;
for i in range(self.b.shape[0]):
if (self.b[i] > self.bw-1):
self.b[i] = self.bw-1
elif (self.b[i] < -self.bw):
self.b[i] = -self.bw
self.W = np.array(self.W, dtype=float)
self.b = np.array(self.b, dtype=float)
self.W = self.W/self.bw
self.b = self.b/self.bw
out = np.dot(self.x, self.W) + self.b
print(x.shape)
print("np.max(x) = {0}".format(np.max(x)))
print("np.min(x) = {0}".format(np.min(x)))
#print("x = {0}".format(self.x))
print(self.W.shape)
print("np.max(self.W) = {0}".format(np.max(self.W)))
print("np.min(self.W) = {0}".format(np.min(self.W)))
print(self.b.shape)
print("np.max(self.b) = {0}".format(np.max(self.b)))
print("np.min(self.b) = {0}".format(np.min(self.b)))
print(out.shape)
print("np.max(out) = {0}".format(np.max(out)))
print("np.min(out) = {0}".format(np.min(out)))
#print("out = {0}".format(out))
out = np.array(out*self.bw+0.5, dtype=int)
for i in range(out.shape[0]):
for j in range(out.shape[1]):
if (out[i][j] > self.bw*RANGE-1):
out[i][j] = self.bw*RANGE-1
elif (out[i][j] < -self.bw*RANGE):
out[i][j] = -self.bw*RANGE
out = np.array(out, dtype=float)
out = out/self.bw
print("np.max(out2) = {0}".format(np.max(out)))
print("np.min(out2) = {0}".format(np.min(out)))
return out
def forward(self, x):
# テンソル対応
self.original_x_shape = x.shape
x = x.reshape(x.shape[0], -1)
self.x = x
out = np.dot(self.x, self.W) + self.b
return out
def backward(self, dout):
dx = np.dot(dout, self.W.T)
self.dW = np.dot(self.x.T, dout)
self.db = np.sum(dout, axis=0)
dx = dx.reshape(*self.original_x_shape) # 入力データの形状に戻す(テンソル対応)
return dx
class SoftmaxWithLoss:
def __init__(self):
self.loss = None
self.y = None # softmaxの出力
self.t = None # 教師データ
def forward(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def forward_int(self, x, t):
self.t = t
self.y = softmax(x)
self.loss = cross_entropy_error(self.y, self.t)
return self.loss
def backward(self, dout=1):
batch_size = self.t.shape[0]
if self.t.size == self.y.size: # 教師データがone-hot-vectorの場合
dx = (self.y - self.t) / batch_size
else:
dx = self.y.copy()
dx[np.arange(batch_size), self.t] -= 1
dx = dx / batch_size
return dx
class Dropout:
"""
http://arxiv.org/abs/1207.0580
"""
def __init__(self, dropout_ratio=0.5):
self.dropout_ratio = dropout_ratio
self.mask = None
def forward_int(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def forward(self, x, train_flg=True):
if train_flg:
self.mask = np.random.rand(*x.shape) > self.dropout_ratio
return x * self.mask
else:
return x * (1.0 - self.dropout_ratio)
def backward(self, dout):
return dout * self.mask
class BatchNormalization:
"""
http://arxiv.org/abs/1502.03167
"""
def __init__(self, gamma, beta, momentum=0.9, running_mean=None, running_var=None):
self.gamma = gamma
self.beta = beta
self.momentum = momentum
self.input_shape = None # Conv層の場合は4次元、全結合層の場合は2次元
# テスト時に使用する平均と分散
self.running_mean = running_mean
self.running_var = running_var
# backward時に使用する中間データ
self.batch_size = None
self.xc = None
self.std = None
self.dgamma = None
self.dbeta = None
def forward_int(self, x, train_flg=True):
self.input_shape = x.shape
if x.ndim != 2:
N, C, H, W = x.shape
x = x.reshape(N, -1)
out = self.__forward(x, train_flg)
return out.reshape(*self.input_shape)
def forward(self, x, train_flg=True):
self.input_shape = x.shape
if x.ndim != 2:
N, C, H, W = x.shape
x = x.reshape(N, -1)
out = self.__forward(x, train_flg)
return out.reshape(*self.input_shape)
def __forward(self, x, train_flg):
if self.running_mean is None:
N, D = x.shape
self.running_mean = np.zeros(D)
self.running_var = np.zeros(D)
if train_flg:
mu = x.mean(axis=0)
xc = x - mu
var = np.mean(xc**2, axis=0)
std = np.sqrt(var + 10e-7)
xn = xc / std
self.batch_size = x.shape[0]
self.xc = xc
self.xn = xn
self.std = std
self.running_mean = self.momentum * self.running_mean + (1-self.momentum) * mu
self.running_var = self.momentum * self.running_var + (1-self.momentum) * var
else:
xc = x - self.running_mean
xn = xc / ((np.sqrt(self.running_var + 10e-7)))
out = self.gamma * xn + self.beta
return out
def backward(self, dout):
if dout.ndim != 2:
N, C, H, W = dout.shape
dout = dout.reshape(N, -1)
dx = self.__backward(dout)
dx = dx.reshape(*self.input_shape)
return dx
def __backward(self, dout):
dbeta = dout.sum(axis=0)
dgamma = np.sum(self.xn * dout, axis=0)
dxn = self.gamma * dout
dxc = dxn / self.std
dstd = -np.sum((dxn * self.xc) / (self.std * self.std), axis=0)
dvar = 0.5 * dstd / self.std
dxc += (2.0 / self.batch_size) * self.xc * dvar
dmu = np.sum(dxc, axis=0)
dx = dxc - dmu / self.batch_size
self.dgamma = dgamma
self.dbeta = dbeta
return dx
class Convolution:
def __init__(self, W, b, stride=1, pad=0):
self.W = W
self.b = b
self.stride = stride
self.pad = pad
# 中間データ(backward時に使用)
self.x = None
self.col = None
self.col_W = None
# 重み・バイアスパラメータの勾配
self.dW = None
self.db = None
def forward_int(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.col = col
self.col_W = col_W
return out
def forward(self, x):
FN, C, FH, FW = self.W.shape
N, C, H, W = x.shape
out_h = 1 + int((H + 2*self.pad - FH) / self.stride)
out_w = 1 + int((W + 2*self.pad - FW) / self.stride)
col = im2col(x, FH, FW, self.stride, self.pad)
col_W = self.W.reshape(FN, -1).T
out = np.dot(col, col_W) + self.b
out = out.reshape(N, out_h, out_w, -1).transpose(0, 3, 1, 2)
self.x = x
self.col = col
self.col_W = col_W
return out
def backward(self, dout):
FN, C, FH, FW = self.W.shape
dout = dout.transpose(0,2,3,1).reshape(-1, FN)
self.db = np.sum(dout, axis=0)
self.dW = np.dot(self.col.T, dout)
self.dW = self.dW.transpose(1, 0).reshape(FN, C, FH, FW)
dcol = np.dot(dout, self.col_W.T)
dx = col2im(dcol, self.x.shape, FH, FW, self.stride, self.pad)
return dx
class Pooling:
def __init__(self, pool_h, pool_w, stride=1, pad=0):
self.pool_h = pool_h
self.pool_w = pool_w
self.stride = stride
self.pad = pad
self.x = None
self.arg_max = None
def forward_int(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def forward(self, x):
N, C, H, W = x.shape
out_h = int(1 + (H - self.pool_h) / self.stride)
out_w = int(1 + (W - self.pool_w) / self.stride)
col = im2col(x, self.pool_h, self.pool_w, self.stride, self.pad)
col = col.reshape(-1, self.pool_h*self.pool_w)
arg_max = np.argmax(col, axis=1)
out = np.max(col, axis=1)
out = out.reshape(N, out_h, out_w, C).transpose(0, 3, 1, 2)
self.x = x
self.arg_max = arg_max
return out
def backward(self, dout):
dout = dout.transpose(0, 2, 3, 1)
pool_size = self.pool_h * self.pool_w
dmax = np.zeros((dout.size, pool_size))
dmax[np.arange(self.arg_max.size), self.arg_max.flatten()] = dout.flatten()
dmax = dmax.reshape(dout.shape + (pool_size,))
dcol = dmax.reshape(dmax.shape[0] * dmax.shape[1] * dmax.shape[2], -1)
dx = col2im(dcol, self.x.shape, self.pool_h, self.pool_w, self.stride, self.pad)
return dx
# two_layer_net_int.py
# 元になったコードは、https://github.com/oreilly-japan/deep-learning-from-scratch にあります。
# 改変したコードもMITライセンスとします。 2017/06/02 by marsee
# coding: utf-8
import sys, os
sys.path.append(os.pardir) # 親ディレクトリのファイルをインポートするための設定
import numpy as np
from layers_int import *
#from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict
class TwoLayerNet:
def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
# 重みの初期化
self.params = {}
self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
# レイヤの生成
self.layers = OrderedDict()
self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
self.layers['Relu1'] = Relu()
self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
self.lastLayer = SoftmaxWithLoss()
def predict(self, x):
for layer in self.layers.values():
x = layer.forward(x)
return x
def predict_int(self, x):
for layer in self.layers.values():
x = layer.forward_int(x)
return x
# x:入力データ, t:教師データ
def loss_int(self, x, t):
y = self.predict_int(x)
return self.lastLayer.forward_int(y, t)
def loss(self, x, t):
y = self.predict(x)
return self.lastLayer.forward(y, t)
def accuracy_int(self, x, t):
y = self.predict_int(x)
y = np.argmax(y, axis=1)
if t.ndim != 1 : t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
def accuracy(self, x, t):
y = self.predict(x)
y = np.argmax(y, axis=1)
if t.ndim != 1 : t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0])
return accuracy
# x:入力データ, t:教師データ
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t)
grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads
def gradient(self, x, t):
# forward
self.loss(x, t)
# backward
dout = 1
dout = self.lastLayer.backward(dout)
layers = list(self.layers.values())
layers.reverse()
for layer in layers:
dout = layer.backward(dout)
# 設定
grads = {}
grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db
return grads
# train_neuralnet.py
# 元になったコードは、https://github.com/oreilly-japan/deep-learning-from-scratch にあります。
# 改変したコードもMITライセンスとします。 2017/06/02 by marsee
# coding: utf-8
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net_int import TwoLayerNet
# データの読み込み
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
train_loss_list = []
train_acc_list = []
test_acc_list = []
iter_per_epoch = max(train_size / batch_size, 1)
for i in range(iters_num):
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# 勾配
#grad = network.numerical_gradient(x_batch, t_batch)
grad = network.gradient(x_batch, t_batch)
# 更新
for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)
if i % iter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print(train_acc, test_acc)
test_mask = np.random.choice(train_size, 1)
xtest_batch = x_train[test_mask]
ttest_batch = t_train[test_mask]
xtest_data = network.predict(xtest_batch)
#print(train_acc, test_acc)
#print(xtest_data)
#print(ttest_batch)
"""print(network.params['W1'])
print(network.params['b1'])
print(network.params['W2'])
print(network.params['b2'])"""
print()
train_acc_int = network.accuracy_int(x_train, t_train)
test_acc_int = network.accuracy_int(x_test, t_test)
print(train_acc_int, test_acc_int)
0.17975 0.1722
0.904483333333 0.9106
0.9247 0.9279
0.9357 0.9352
0.94515 0.9425
0.95155 0.9506
0.956733333333 0.9527
0.95995 0.9564
0.9649 0.9615
0.9663 0.9609
0.968883333333 0.9632
0.972766666667 0.9655
0.973883333333 0.9665
0.97535 0.968
0.97635 0.9678
0.9784 0.9696
0.979483333333 0.9692
(60000, 784)
np.max(x) = 1.0
np.min(x) = 0.0
(784, 50)
np.max(self.W) = 0.42578125
np.min(self.W) = -0.53515625
(50,)
np.max(self.b) = 0.28515625
np.min(self.b) = -0.22265625
(60000, 50)
np.max(out) = 10.925199185698148
np.min(out) = -12.404212626584922
np.max(out2) = 10.92578125
np.min(out2) = -12.3984375
(60000, 50)
np.max(x) = 10.92578125
np.min(x) = 0.0
(50, 10)
np.max(self.W) = 0.99609375
np.min(self.W) = -1.0
(10,)
np.max(self.b) = 0.421875
np.min(self.b) = -0.39453125
(60000, 10)
np.max(out) = 30.527862548828125
np.min(out) = -28.802261352539062
np.max(out2) = 15.99609375
np.min(out2) = -16.0
(10000, 784)
np.max(x) = 1.0
np.min(x) = 0.0
(784, 50)
np.max(self.W) = 0.42578125
np.min(self.W) = -0.53125
(50,)
np.max(self.b) = 0.28515625
np.min(self.b) = -0.21875
(10000, 50)
np.max(out) = 10.473299672572466
np.min(out) = -10.355300247856576
np.max(out2) = 10.47265625
np.min(out2) = -10.3515625
(10000, 50)
np.max(x) = 10.47265625
np.min(x) = 0.0
(50, 10)
np.max(self.W) = 0.99609375
np.min(self.W) = -0.99609375
(10,)
np.max(self.b) = 0.421875
np.min(self.b) = -0.390625
(10000, 10)
np.max(out) = 29.088897705078125
np.min(out) = -24.532501220703125
np.max(out2) = 15.99609375
np.min(out2) = -16.0
0.9774 0.9636
// nn_test_float.h
// 2017/05/31 by marsee
//
#ifndef __NN_TEST_H__
#define __NN_TEST_H__
#include <ap_fixed.h>
const float wt[3][4]={
{-0.1, 0.2, -0.3, 0.4},
{-0.5, 0.6, -0.7, 0.8},
{-0.2, 0.4, -0.5, 0.6}
};
const float b[4] ={
-0.1, 0.4, -0.3, 0.5
};
#endif
// nn_test_float.cpp
// 2017/05/29 by marsee
//
#include <stdio.h>
#include "nn_test_float.h"
int nn_test(float in[3], float out[4]){
float dot[4];
Loop1: for(int j=0; j<4; j++){
dot[j] = 0;
Loop2: for(int i=0; i<3; i++){
dot[j] += in[i]*wt[i][j];
}
dot[j] += b[j];
out[j] = dot[j];
}
return(0);
}
// nn_test_tb_float.cpp
// 2017/05/29 by marsee
//
#include <stdio.h>
#include "nn_test_float.h"
int nn_test(float in[3], float out[4]);
int nn_test_soft(float in[3], float out[4]);
int main(){
float in[3] = {0.390625, 0.5859375, 0.78125};
float out[4];
float out_soft[4];
nn_test(in, out);
nn_test_soft(in, out_soft);
for(int i=0; i<4; i++){
if(out[i] != out_soft[i]){
printf("ERROR HW and SW results mismatch i = %d, HW = %f, SW = %f\n", i, (float)out[i], (float)out_soft[i]);
}else{
printf("out[%d] = %f\n", i, (float)out[i]);
}
}
return(0);
}
int nn_test_soft(float in[3], float out[4]){
float dot[4];
for(int j=0; j<4; j++){
dot[j] = 0;
for(int i=0; i<3; i++){
dot[j] += in[i]*wt[i][j];
}
dot[j] += b[j];
out[j] = dot[j];
}
return(0);
}
out[0] = -0.588281
out[1] = 1.142187
out[2] = -1.217969
out[3] = 1.593750
// nn_test.h
// 2017/05/31 by marsee
//
#ifndef __NN_TEST_H__
#define __NN_TEST_H__
#include <ap_fixed.h>
const ap_fixed<9, 1, AP_TRN_ZERO, AP_SAT> wt[3][4]={
{-0.1, 0.2, -0.3, 0.4},
{-0.5, 0.6, -0.7, 0.8},
{-0.2, 0.4, -0.5, 0.6}
};
const ap_fixed<9, 1, AP_TRN_ZERO, AP_SAT> b[4] ={
-0.1, 0.4, -0.3, 0.5
};
#endif
// nn_test.cpp
// 2017/05/29 by marsee
//
#include <stdio.h>
#include "nn_test.h"
int nn_test(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[3], ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[4]){
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> dot[4];
Loop1: for(int j=0; j<4; j++){
dot[j] = 0;
Loop2: for(int i=0; i<3; i++){
dot[j] += in[i]*wt[i][j];
}
dot[j] += b[j];
out[j] = dot[j];
}
return(0);
}
// nn_test_tb.cpp
// 2017/05/29 by marsee
//
#include <stdio.h>
#include "nn_test.h"
int nn_test(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[3], ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[4]);
int nn_test_soft(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[3], ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[4]);
int main(){
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[3] = {0.390625, 0.5859375, 0.78125};
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[4];
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out_soft[4];
nn_test(in, out);
nn_test_soft(in, out_soft);
for(int i=0; i<4; i++){
if(out[i] != out_soft[i]){
printf("ERROR HW and SW results mismatch i = %d, HW = %f, SW = %f\n", i, (float)out[i], (float)out_soft[i]);
}else{
printf("out[%d] = %f\n", i, (float)out[i]);
}
}
return(0);
}
int nn_test_soft(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[3], ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> out[4]){
ap_fixed<13, 5, AP_TRN_ZERO, AP_SAT> dot[4];
for(int j=0; j<4; j++){
dot[j] = 0;
for(int i=0; i<3; i++){
dot[j] += in[i]*wt[i][j];
}
dot[j] += b[j];
out[j] = dot[j];
}
return(0);
}
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | 1 | 2 | 3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | 28 | 29 | 30 | - |