# 畳み込み層の重みをCヘッダファイルに書き出す
# 2018/05/31 by marsee
def fwrite_conv_weight(weight, wfile_name, float_wt_name, fixed_wt_name, MAGNIFICATION):
import datetime
import numpy as np
f = open(wfile_name, 'w')
todaytime = datetime.datetime.today()
f.write('// '+wfile_name+'\n')
strdtime = todaytime.strftime("%Y/%m/%d %H:%M:%S")
f.write('// {0} by marsee\n'.format(strdtime))
f.write("\n")
f.write('const float '+float_wt_name+'['+str(weight.shape[0])+']['+str(weight.shape[1])+']['+str(weight.shape[2])+']['+str(weight.shape[3])+'] = \n{\n')
for i in range(weight.shape[0]):
f.write("\t{\n")
for j in range(weight.shape[1]):
f.write("\t\t{\n")
for k in range(weight.shape[2]):
f.write("\t\t\t{")
for m in range(weight.shape[3]):
f.write(str(weight[i][j][k][m]))
if (m==weight.shape[3]-1):
f.write("}")
else:
f.write(",")
if (k==weight.shape[2]-1):
f.write("\n\t\t}\n")
else:
f.write(",\n")
if (j==weight.shape[1]-1):
f.write("\t}\n")
else:
f.write(",\n")
if (i==weight.shape[0]-1):
f.write("};\n")
else:
f.write("\t,\n")
f.write("\n")
f.write('const ap_fixed<'+str(int(np.log2(MAGNIFICATION))+1)+', 1, AP_TRN_ZERO, AP_SAT> '+fixed_wt_name+'['+str(weight.shape[0])+']['+str(weight.shape[1])+']['+str(weight.shape[2])+']['+str(weight.shape[3])+'] = \n{\n')
for i in range(weight.shape[0]):
f.write("\t{\n")
for j in range(weight.shape[1]):
f.write("\t\t{\n")
for k in range(weight.shape[2]):
f.write("\t\t\t{")
for m in range(weight.shape[3]):
w_int = int(weight[i][j][k][m]*MAGNIFICATION+0.5)
if (w_int > MAGNIFICATION-1):
w_int = MAGNIFICATION-1
elif (w_int < -MAGNIFICATION):
w_int = -MAGNIFICATION
f.write(str(float(w_int)/float(MAGNIFICATION)))
if (m==weight.shape[3]-1):
f.write("}")
else:
f.write(",")
if (k==weight.shape[2]-1):
f.write("\n\t\t}\n")
else:
f.write(",\n")
if (j==weight.shape[1]-1):
f.write("\t}\n")
else:
f.write(",\n")
if (i==weight.shape[0]-1):
f.write("};\n")
else:
f.write("\t,\n")
f.close()
MAGNIFICATION_CONV = 2 ** (9-1)
fwrite_conv_weight(conv_layer_weight.transpose(3,2,0,1), 'conv1_weight10.h', 'conv1_fweight', 'conv1_weight', MAGNIFICATION_CONV)
// conv1_weight10.h
// 2018/06/03 16:10:12 by marsee
const float conv1_fweight[10][1][5][5] =
{
{
{
{0.20261094,-0.07934965,-0.22467291,-0.43532223,-0.23157348},
{-0.3398506,-0.33518496,-0.39792794,-0.08618319,0.16632372},
{-0.5767307,-0.4275438,-0.072111405,0.3187846,0.2654636},
{0.11835691,0.25123549,0.38731813,0.27912328,0.15583257},
{0.13021287,0.38388866,0.24981308,0.022721838,-0.04710154}
}
}
,
{
{
{-0.48611653,-0.6372757,-0.40548003,-0.33244497,-0.13435255},
{0.15439186,-0.34480083,-0.56099683,-0.3915109,-0.41839477},
{0.4068115,0.3836496,0.10779987,-0.1230321,-0.4722871},
{0.3514016,0.3169199,0.32510042,0.2981098,-0.10132303},
{0.16548221,0.2640638,0.3619229,0.35238296,0.113044925}
}
}
,
{
{
{-0.13130069,0.18190795,0.15537558,-0.1676253,-0.14785497},
{0.06223634,0.35684425,-0.112429045,-0.44136783,-0.10125857},
{0.10510171,0.25642243,-0.2288756,-0.29937005,0.12721944},
{0.021834752,0.008635783,0.040261764,-0.017128099,0.055860933},
{-0.16628554,0.12985978,0.08550146,0.2620432,0.105794474}
}
}
,
{
{
{0.15156339,0.4940948,0.22510909,0.19534814,0.24242142},
{-0.20048767,0.3314954,0.45060343,0.116408214,0.054000396},
{-0.5791418,-0.1274401,0.15244117,0.2987521,-0.008650972},
{-0.65549827,-0.3982863,-0.23712645,-0.04862794,-0.030009096},
{-0.25779864,-0.3313806,-0.025542857,0.04132852,0.12885101}
}
}
,
{
{
{0.36471996,0.11910628,0.0868587,-0.26476386,-0.40310845},
{0.35694337,0.07778469,0.1454417,-0.27914035,-0.43084973},
{0.27650365,0.19447733,0.022587685,-0.4387378,-0.27778476},
{0.35590482,0.060368076,-0.2499182,-0.33735904,-0.2462857},
{0.13169082,-0.12147922,-0.19614659,-0.033236343,0.04993651}
}
}
,
{
{
{-0.08484216,0.17010233,0.397805,0.18052064,-0.20084426},
{0.19511358,0.2240115,0.23805015,-0.28208354,-0.30468363},
{0.58113253,0.23622094,-0.1035163,-0.29351595,-0.2777929},
{-0.12703945,-0.31102535,-0.45656392,-0.36484626,0.08292956},
{-0.516542,-0.59745365,-0.34286296,0.064657405,-0.016369406}
}
}
,
{
{
{-0.05747546,-0.025008501,0.2489682,0.0009843357,-0.31173185},
{0.10129268,0.140934,0.18465307,-0.29655868,-0.3589846},
{0.0927546,0.12933072,0.23520534,-0.13283624,-0.2216169},
{0.01556351,0.19052765,0.26735055,-0.11904856,0.05286852},
{-0.16821466,0.20077062,0.24849436,-0.027033936,-0.0066970563}
}
}
,
{
{
{-0.46006405,-0.07625411,0.072767265,0.21414295,0.014650909},
{-0.41662437,-0.01859824,0.20107509,0.14830865,0.08253051},
{-0.26404095,-0.023522798,0.15815544,0.24796312,-0.08803863},
{-0.27005908,0.030365303,0.3283318,0.015161242,0.014568055},
{0.0034153308,0.10755768,0.23039222,-0.050392643,-0.17668988}
}
}
,
{
{
{0.09082198,-0.033604637,0.113404974,0.20493641,0.14139216},
{0.38919494,0.21474971,0.20264329,0.2751836,0.20002662},
{0.33294797,0.37199846,0.37084493,0.10829608,0.17661056},
{-0.5168951,-0.29824486,-0.32331055,-0.20219678,-0.22110288},
{-0.62100536,-0.6191712,-0.5669018,-0.39315876,-0.28045934}
}
}
,
{
{
{0.12587526,0.171594,0.19060391,-0.17589498,-0.2094244},
{-0.013645746,0.22744659,0.12572204,-0.028848726,-0.37416157},
{-0.2322505,-0.05975187,0.3344037,0.20712087,-0.084726445},
{-0.14462651,-0.18951881,0.26089588,0.19588387,0.12522626},
{-0.03129309,-0.2751198,-0.120508276,0.0149853965,0.06411268}
}
}
};
const ap_fixed<9, 1, AP_TRN, AP_WRAP> conv1_weight[10][1][5][5] =
{
{
{
{0.203125,-0.07421875,-0.22265625,-0.4296875,-0.2265625},
{-0.3359375,-0.33203125,-0.39453125,-0.08203125,0.16796875},
{-0.57421875,-0.421875,-0.06640625,0.3203125,0.265625},
{0.1171875,0.25,0.38671875,0.27734375,0.15625},
{0.12890625,0.3828125,0.25,0.0234375,-0.04296875}
}
}
,
{
{
{-0.48046875,-0.6328125,-0.40234375,-0.328125,-0.12890625},
{0.15625,-0.33984375,-0.55859375,-0.38671875,-0.4140625},
{0.40625,0.3828125,0.109375,-0.1171875,-0.46875},
{0.3515625,0.31640625,0.32421875,0.296875,-0.09765625},
{0.1640625,0.265625,0.36328125,0.3515625,0.11328125}
}
}
,
{
{
{-0.12890625,0.18359375,0.15625,-0.1640625,-0.14453125},
{0.0625,0.35546875,-0.109375,-0.4375,-0.09765625},
{0.10546875,0.2578125,-0.2265625,-0.296875,0.12890625},
{0.0234375,0.0078125,0.0390625,-0.01171875,0.0546875},
{-0.1640625,0.12890625,0.0859375,0.26171875,0.10546875}
}
}
,
{
{
{0.15234375,0.4921875,0.2265625,0.1953125,0.2421875},
{-0.1953125,0.33203125,0.44921875,0.1171875,0.0546875},
{-0.57421875,-0.125,0.15234375,0.296875,-0.00390625},
{-0.65234375,-0.39453125,-0.234375,-0.04296875,-0.02734375},
{-0.25390625,-0.328125,-0.0234375,0.04296875,0.12890625}
}
}
,
{
{
{0.36328125,0.1171875,0.0859375,-0.26171875,-0.3984375},
{0.35546875,0.078125,0.14453125,-0.2734375,-0.42578125},
{0.27734375,0.1953125,0.0234375,-0.43359375,-0.2734375},
{0.35546875,0.05859375,-0.24609375,-0.33203125,-0.2421875},
{0.1328125,-0.1171875,-0.19140625,-0.03125,0.05078125}
}
}
,
{
{
{-0.08203125,0.171875,0.3984375,0.1796875,-0.1953125},
{0.1953125,0.22265625,0.23828125,-0.27734375,-0.30078125},
{0.58203125,0.234375,-0.1015625,-0.2890625,-0.2734375},
{-0.125,-0.30859375,-0.453125,-0.359375,0.08203125},
{-0.51171875,-0.59375,-0.33984375,0.06640625,-0.01171875}
}
}
,
{
{
{-0.0546875,-0.01953125,0.25,0.0,-0.30859375},
{0.1015625,0.140625,0.18359375,-0.29296875,-0.35546875},
{0.09375,0.12890625,0.234375,-0.12890625,-0.21875},
{0.015625,0.19140625,0.265625,-0.11328125,0.0546875},
{-0.1640625,0.19921875,0.25,-0.0234375,-0.00390625}
}
}
,
{
{
{-0.45703125,-0.07421875,0.07421875,0.21484375,0.015625},
{-0.4140625,-0.015625,0.19921875,0.1484375,0.08203125},
{-0.26171875,-0.01953125,0.15625,0.24609375,-0.0859375},
{-0.265625,0.03125,0.328125,0.015625,0.015625},
{0.00390625,0.109375,0.23046875,-0.046875,-0.171875}
}
}
,
{
{
{0.08984375,-0.03125,0.11328125,0.203125,0.140625},
{0.390625,0.21484375,0.203125,0.2734375,0.19921875},
{0.33203125,0.37109375,0.37109375,0.109375,0.17578125},
{-0.51171875,-0.29296875,-0.3203125,-0.19921875,-0.21875},
{-0.6171875,-0.6171875,-0.5625,-0.390625,-0.27734375}
}
}
,
{
{
{0.125,0.171875,0.19140625,-0.171875,-0.20703125},
{-0.0078125,0.2265625,0.125,-0.0234375,-0.37109375},
{-0.2265625,-0.0546875,0.3359375,0.20703125,-0.08203125},
{-0.140625,-0.1875,0.26171875,0.1953125,0.125},
{-0.02734375,-0.26953125,-0.1171875,0.015625,0.0625}
}
}
};
# 1番目のDence layer1の中間出力を取り出す
from keras.models import Model
dence_layer1_name = 'dense_5'
dence_layer1 = model.get_layer(dence_layer1_name)
dence_layer1_wb = dence_layer1.get_weights()
dence_layer1_model = Model(inputs=model.input,
outputs=model.get_layer(dence_layer1_name).output)
dence_layer1_output = dence_layer1_model.predict(x_test, verbose=1)
10000/10000 [==============================] - 2s 174us/step
print(dence_layer1_weight.shape)
print(dence_layer1_bias.shape)
(1440, 100)
(100,)
print("np.max(dence_layer1_weight) = {0}".format(np.max(dence_layer1_weight)))
print("np.min(dence_layer1_weight) = {0}".format(np.min(dence_layer1_weight)))
abs_dence_layer1_weight = np.absolute(dence_layer1_weight)
print("np.max(abs_dence_layer1_weight) = {0}".format(np.max(abs_dence_layer1_weight)))
print("np.min(abs_dence_layer1_weight) = {0}".format(np.min(abs_dence_layer1_weight)))
print("np.max(dence_layer1_bias) = {0}".format(np.max(dence_layer1_bias)))
print("np.min(dence_layer1_bias) = {0}".format(np.min(dence_layer1_bias)))
abs_dence_layer1_bias = np.absolute(dence_layer1_bias)
print("np.max(abs_dence_layer1_bias) = {0}".format(np.max(abs_dence_layer1_bias)))
print("np.min(abs_dence_layer1_bias) = {0}".format(np.min(abs_dence_layer1_bias)))
np.max(dence_layer1_weight) = 0.287210673094
np.min(dence_layer1_weight) = -0.320384502411
np.max(abs_dence_layer1_weight) = 0.320384502411
np.min(abs_dence_layer1_weight) = 1.92374045582e-07
np.max(dence_layer1_bias) = 0.105059452355
np.min(dence_layer1_bias) = -0.0615252479911
np.max(abs_dence_layer1_bias) = 0.105059452355
np.min(abs_dence_layer1_bias) = 0.000534977589268
print("dence_layer1_output = {0}".format(dence_layer1_output.shape))
print("np.std(dence_layer1_output) = {0}".format(np.std(dence_layer1_output)))
print("np.max(dence_layer1_output) = {0}".format(np.max(dence_layer1_output)))
print("np.min(dence_layer1_output) = {0}".format(np.min(dence_layer1_output)))
abs_dence_layer1_output = np.absolute(dence_layer1_output)
print("np.max(abs_dence_layer1_output) = {0}".format(np.max(abs_dence_layer1_output)))
print("np.min(abs_dence_layer1_output) = {0}".format(np.min(abs_dence_layer1_output)))
dence_layer1_output = (10000, 100)
np.std(dence_layer1_output) = 3.02382802963
np.max(dence_layer1_output) = 14.2637271881
np.min(dence_layer1_output) = -13.8859920502
np.max(abs_dence_layer1_output) = 14.2637271881
np.min(abs_dence_layer1_output) = 4.04380261898e-06
# Dence layer1のweightのグラフ
dence_layer1_weight_f = dence_layer1_weight.flatten()
plt.plot(dence_layer1_weight_f)
plt.title('dence_layer1_weight')
plt.show()
# Dence layer1のbiasのグラフ
dence_layer1_bias_f = dence_layer1_bias.flatten()
plt.plot(dence_layer1_bias_f)
plt.title('dence_layer1_bias')
plt.show()
# 2番目のDence layer2の中間出力を取り出す
from keras.models import Model
dence_layer2_name = 'dense_6'
dence_layer2 = model.get_layer(dence_layer2_name)
dence_layer2_wb = dence_layer2.get_weights()
dence_layer2_model = Model(inputs=model.input,
outputs=model.get_layer(dence_layer2_name).output)
dence_layer2_output = dence_layer2_model.predict(x_test, verbose=1)
10000/10000 [==============================] - 2s 167us/step
print(dence_layer2_weight.shape)
print(dence_layer2_bias.shape)
(100, 10)
(10,)
print("np.max(dence_layer2_weight) = {0}".format(np.max(dence_layer2_weight)))
print("np.min(dence_layer2_weight) = {0}".format(np.min(dence_layer2_weight)))
abs_dence_layer2_weight = np.absolute(dence_layer2_weight)
print("np.max(abs_dence_layer2_weight) = {0}".format(np.max(abs_dence_layer2_weight)))
print("np.min(abs_dence_layer2_weight) = {0}".format(np.min(abs_dence_layer2_weight)))
print("np.max(dence_layer2_bias) = {0}".format(np.max(dence_layer2_bias)))
print("np.min(dence_layer2_bias) = {0}".format(np.min(dence_layer2_bias)))
abs_dence_layer2_bias = np.absolute(dence_layer2_bias)
print("np.max(abs_dence_layer2_bias) = {0}".format(np.max(abs_dence_layer2_bias)))
print("np.min(abs_dence_layer2_bias) = {0}".format(np.min(abs_dence_layer2_bias)))
np.max(dence_layer2_weight) = 0.420090407133
np.min(dence_layer2_weight) = -0.625470399857
np.max(abs_dence_layer2_weight) = 0.625470399857
np.min(abs_dence_layer2_weight) = 0.000126185041154
np.max(dence_layer2_bias) = 0.0749695450068
np.min(dence_layer2_bias) = -0.0558836981654
np.max(abs_dence_layer2_bias) = 0.0749695450068
np.min(abs_dence_layer2_bias) = 0.00171886803582
print("dence_layer2_output = {0}".format(dence_layer2_output.shape))
print("np.std(dence_layer2_output) = {0}".format(np.std(dence_layer2_output)))
print("np.max(dence_layer2_output) = {0}".format(np.max(dence_layer2_output)))
print("np.min(dence_layer2_output) = {0}".format(np.min(dence_layer2_output)))
abs_dence_layer2_output = np.absolute(dence_layer2_output)
print("np.max(abs_dence_layer2_output) = {0}".format(np.max(abs_dence_layer2_output)))
print("np.min(abs_dence_layer2_output) = {0}".format(np.min(abs_dence_layer2_output)))
dence_layer2_output = (10000, 10)
np.std(dence_layer2_output) = 9.34499263763
np.max(dence_layer2_output) = 30.0013465881
np.min(dence_layer2_output) = -35.2990074158
np.max(abs_dence_layer2_output) = 35.2990074158
np.min(abs_dence_layer2_output) = 0.000138353556395
# Dence layer2のweightのグラフ
dence_layer2_weight_f = dence_layer2_weight.flatten()
plt.plot(dence_layer2_weight_f)
plt.title('dence_layer2_weight')
plt.show()
# Dence layer2のbiasのグラフ
dence_layer2_bias_f = dence_layer2_bias.flatten()
plt.plot(dence_layer2_bias_f)
plt.title('dence_layer2_bias')
plt.show()
# Convolution layerの中間出力を取り出す
from keras.models import Model
conv_layer_name = 'conv2d_4'
conv_layer = model.get_layer(conv_layer_name)
conv_layer_wb = conv_layer.get_weights()
conv_layer_model = Model(inputs=model.input,
outputs=model.get_layer(conv_layer_name).output)
conv_output = conv_layer_model.predict(x_test, verbose=1)
10000/10000 [==============================] - 1s 150us/step
conv_layer_weight = conv_layer_wb[0]
conv_layer_bias = conv_layer_wb[1]
print(conv_layer_weight.shape)
print(conv_layer_weight.T.shape)
print(conv_layer_bias.shape)
(5, 5, 1, 10)
(10, 1, 5, 5)
(10,)
で転置した重みの配列を示す。print("conv_layer_weight.T = {0}".format(conv_layer_weight.T))
conv_layer_weight.T = [[[[ 0.20261094 -0.3398506 -0.5767307 0.11835691 0.13021287]
[-0.07934965 -0.33518496 -0.4275438 0.25123549 0.38388866]
[-0.22467291 -0.39792794 -0.07211141 0.38731813 0.24981308]
[-0.43532223 -0.08618319 0.3187846 0.27912328 0.02272184]
[-0.23157348 0.16632372 0.2654636 0.15583257 -0.04710154]]]
[[[-0.48611653 0.15439186 0.4068115 0.3514016 0.16548221]
[-0.6372757 -0.34480083 0.3836496 0.3169199 0.2640638 ]
[-0.40548003 -0.56099683 0.10779987 0.32510042 0.3619229 ]
[-0.33244497 -0.3915109 -0.1230321 0.2981098 0.35238296]
[-0.13435255 -0.41839477 -0.4722871 -0.10132303 0.11304493]]]
[[[-0.13130069 0.06223634 0.10510171 0.02183475 -0.16628554]
[ 0.18190795 0.35684425 0.25642243 0.00863578 0.12985978]
[ 0.15537558 -0.11242905 -0.2288756 0.04026176 0.08550146]
[-0.1676253 -0.44136783 -0.29937005 -0.0171281 0.2620432 ]
[-0.14785497 -0.10125857 0.12721944 0.05586093 0.10579447]]]
[[[ 0.15156339 -0.20048767 -0.5791418 -0.65549827 -0.25779864]
[ 0.4940948 0.3314954 -0.1274401 -0.3982863 -0.3313806 ]
[ 0.22510909 0.45060343 0.15244117 -0.23712645 -0.02554286]
[ 0.19534814 0.11640821 0.2987521 -0.04862794 0.04132852]
[ 0.24242142 0.0540004 -0.00865097 -0.0300091 0.12885101]]]
[[[ 0.36471996 0.35694337 0.27650365 0.35590482 0.13169082]
[ 0.11910628 0.07778469 0.19447733 0.06036808 -0.12147922]
[ 0.0868587 0.1454417 0.02258768 -0.2499182 -0.19614659]
[-0.26476386 -0.27914035 -0.4387378 -0.33735904 -0.03323634]
[-0.40310845 -0.43084973 -0.27778476 -0.2462857 0.04993651]]]
[[[-0.08484216 0.19511358 0.58113253 -0.12703945 -0.516542 ]
[ 0.17010233 0.2240115 0.23622094 -0.31102535 -0.59745365]
[ 0.397805 0.23805015 -0.1035163 -0.45656392 -0.34286296]
[ 0.18052064 -0.28208354 -0.29351595 -0.36484626 0.06465741]
[-0.20084426 -0.30468363 -0.2777929 0.08292956 -0.01636941]]]
[[[-0.05747546 0.10129268 0.0927546 0.01556351 -0.16821466]
[-0.0250085 0.140934 0.12933072 0.19052765 0.20077062]
[ 0.2489682 0.18465307 0.23520534 0.26735055 0.24849436]
[ 0.00098434 -0.29655868 -0.13283624 -0.11904856 -0.02703394]
[-0.31173185 -0.3589846 -0.2216169 0.05286852 -0.00669706]]]
[[[-0.46006405 -0.41662437 -0.26404095 -0.27005908 0.00341533]
[-0.07625411 -0.01859824 -0.0235228 0.0303653 0.10755768]
[ 0.07276727 0.20107509 0.15815544 0.3283318 0.23039222]
[ 0.21414295 0.14830865 0.24796312 0.01516124 -0.05039264]
[ 0.01465091 0.08253051 -0.08803863 0.01456806 -0.17668988]]]
[[[ 0.09082198 0.38919494 0.33294797 -0.5168951 -0.62100536]
[-0.03360464 0.21474971 0.37199846 -0.29824486 -0.6191712 ]
[ 0.11340497 0.20264329 0.37084493 -0.32331055 -0.5669018 ]
[ 0.20493641 0.2751836 0.10829608 -0.20219678 -0.39315876]
[ 0.14139216 0.20002662 0.17661056 -0.22110288 -0.28045934]]]
[[[ 0.12587526 -0.01364575 -0.2322505 -0.14462651 -0.03129309]
[ 0.171594 0.22744659 -0.05975187 -0.18951881 -0.2751198 ]
[ 0.19060391 0.12572204 0.3344037 0.26089588 -0.12050828]
[-0.17589498 -0.02884873 0.20712087 0.19588387 0.0149854 ]
[-0.2094244 -0.37416157 -0.08472645 0.12522626 0.06411268]]]]
で表示したバイアス値を示す。print("conv_layer_bias = {0}".format(conv_layer_bias))
conv_layer_bias = [-0.00722218 0.00386539 -0.10034832 -0.10226133 -0.00783706 -0.00266487
-0.15441592 -0.17244887 0.00067333 -0.17412803]
print("np.max(conv_layer_weight) = {0}".format(np.max(conv_layer_weight)))
print("np.min(conv_layer_weight) = {0}".format(np.min(conv_layer_weight)))
abs_conv_layer_weight = np.absolute(conv_layer_weight)
print("np.max(abs_conv_layer_weight) = {0}".format(np.max(abs_conv_layer_weight)))
print("np.min(abs_conv_layer_weight) = {0}".format(np.min(abs_conv_layer_weight)))
print("np.max(conv_layer_bias) = {0}".format(np.max(conv_layer_bias)))
print("np.min(conv_layer_bias) = {0}".format(np.min(conv_layer_bias)))
abs_conv_layer_bias = np.absolute(conv_layer_bias)
print("np.max(abs_conv_layer_bias) = {0}".format(np.max(abs_conv_layer_bias)))
print("np.min(abs_conv_layer_bias) = {0}".format(np.min(abs_conv_layer_bias)))
print("conv_output = {0}".format(conv_output.shape))
print("np.std(conv_output) = {0}".format(np.std(conv_output)))
print("np.max(conv_output) = {0}".format(np.max(conv_output)))
print("np.min(conv_output) = {0}".format(np.min(conv_output)))
abs_conv_output = np.absolute(conv_output)
print("np.max(abs_conv) = {0}".format(np.max(abs_conv_output)))
print("np.min(abs_conv) = {0}".format(np.min(abs_conv_output)))
np.max(conv_layer_weight) = 0.581132531166
np.min(conv_layer_weight) = -0.65549826622
np.max(abs_conv_layer_weight) = 0.65549826622
np.min(abs_conv_layer_weight) = 0.000984335667454
np.max(conv_layer_bias) = 0.0038653917145
np.min(conv_layer_bias) = -0.17412802577
np.max(abs_conv_layer_bias) = 0.17412802577
np.min(abs_conv_layer_bias) = 0.000673334288877
conv_output = (10000, 24, 24, 10)
np.std(conv_output) = 0.691880404949
np.max(conv_output) = 3.46592283249
np.min(conv_output) = -4.23804473877
np.max(abs_conv) = 4.23804473877
np.min(abs_conv) = 7.68341124058e-09
# Convolution layerのweightのグラフ
conv_layer_weight_f = conv_layer_weight.flatten()
plt.plot(conv_layer_weight_f)
plt.title('conv_layer_weight')
plt.show()
# Convolution layerのbiasのグラフ
conv_layer_bias_f = conv_layer_bias.flatten()
plt.plot(conv_layer_bias_f)
plt.title('conv_layer_bias')
plt.show()
# My Mnist CNN (Convolution layerの特徴マップは5個)
# Conv2D - ReLU - MaxPooling - Dence - ReLU - Dence
# 2018/05/25 by marsee
# Keras / Tensorflowで始めるディープラーニング入門 https://qiita.com/yampy/items/706d44417c433e68db0d
# のPythonコードを再利用させて頂いている
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, Activation
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 12
img_rows, img_cols = 28, 28
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#Kerasのバックエンドで動くTensorFlowとTheanoでは入力チャンネルの順番が違うので場合分けして書いています
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
y_train = y_train.astype('int32')
y_test = y_test.astype('int32')
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(10, kernel_size=(5, 5),
input_shape=input_shape))
model.add(Activation(activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(100))
model.add(Activation(activation='relu'))
model.add(Dense(num_classes))
model.add(Activation(activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(x_test, y_test))
('x_train shape:', (60000, 28, 28, 1))
(60000, 'train samples')
(10000, 'test samples')
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
60000/60000 [==============================] - 12s 201us/step - loss: 0.2579 - acc: 0.9231 - val_loss: 0.0840 - val_acc: 0.9733
Epoch 2/12
60000/60000 [==============================] - 12s 201us/step - loss: 0.0785 - acc: 0.9762 - val_loss: 0.0564 - val_acc: 0.9819
Epoch 3/12
60000/60000 [==============================] - 12s 192us/step - loss: 0.0545 - acc: 0.9834 - val_loss: 0.0492 - val_acc: 0.9838
Epoch 4/12
60000/60000 [==============================] - 13s 210us/step - loss: 0.0425 - acc: 0.9869 - val_loss: 0.0442 - val_acc: 0.9862
Epoch 5/12
60000/60000 [==============================] - 12s 196us/step - loss: 0.0340 - acc: 0.9898 - val_loss: 0.0396 - val_acc: 0.9875
Epoch 6/12
60000/60000 [==============================] - 12s 198us/step - loss: 0.0284 - acc: 0.9915 - val_loss: 0.0382 - val_acc: 0.9874
Epoch 7/12
60000/60000 [==============================] - 11s 191us/step - loss: 0.0243 - acc: 0.9928 - val_loss: 0.0340 - val_acc: 0.9886
Epoch 8/12
60000/60000 [==============================] - 11s 189us/step - loss: 0.0206 - acc: 0.9937 - val_loss: 0.0371 - val_acc: 0.9878
Epoch 9/12
60000/60000 [==============================] - 12s 199us/step - loss: 0.0167 - acc: 0.9949 - val_loss: 0.0312 - val_acc: 0.9897
Epoch 10/12
60000/60000 [==============================] - 12s 195us/step - loss: 0.0146 - acc: 0.9954 - val_loss: 0.0317 - val_acc: 0.9896
Epoch 11/12
60000/60000 [==============================] - 11s 188us/step - loss: 0.0121 - acc: 0.9963 - val_loss: 0.0344 - val_acc: 0.9892
Epoch 12/12
60000/60000 [==============================] - 12s 205us/step - loss: 0.0103 - acc: 0.9970 - val_loss: 0.0320 - val_acc: 0.9898
from keras.utils.vis_utils import plot_model
plot_model(model, show_shapes=True, to_file='./model.png')
model.summary()
_______________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d_5 (Conv2D) (None, 24, 24, 10) 260
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 12, 12, 10) 0
_________________________________________________________________
flatten_4 (Flatten) (None, 1440) 0
_________________________________________________________________
dense_7 (Dense) (None, 100) 144100
_________________________________________________________________
dense_8 (Dense) (None, 10) 1010
=================================================================
Total params: 145,370
Trainable params: 145,370
Non-trainable params: 0
# 学習済みモデルの保存
from keras.models import load_model
model.save('mnist_cnn10_model.h5') # creates a HDF5 file 'my_model.h5'
del model # deletes the existing model
# 学習済みモデルの読み込み
from keras.models import load_model
model = load_model('mnist_cnn10_model.h5')
model_list = model.get_weights()
print model_list
[array([[[[-0.4802782 , 0.08903456, 0.17129059, -0.18762073,
-0.19215566, 0.0767612 , -0.19989973, 0.16069482,
0.10121205, 0.10839224]],
[[-0.55260026, 0.31618226, 0.02519642, -0.3371241 ,
-0.18700214, 0.47713503, 0.01407363, 0.18270946,
-0.00360232, 0.0480496 ]],
[[-0.28096938, 0.31544265, 0.21408693, -0.4249214 ,
0.05259206, 0.24198672, -0.12785499, -0.16038668,
0.25517157, 0.02352966]],
[[ 0.19760679, 0.17439696, 0.15509322, -0.3724036 ,
0.14294085, 0.30212507, 0.20030482, -0.0445758 ,
0.16999234, 0.11382752]],
[[ 0.34932545, 0.19552206, 0.3587776 , -0.50719273,
0.2470015 , 0.07441435, 0.22428715, -0.35767055,
-0.07781951, 0.22610919]]],
[[[-0.50595784, 0.20634648, 0.32507014, 0.05827161,
-0.15143315, -0.48752806, 0.10217368, 0.00805497,
0.0631953 , -0.02518882]],
[[-0.18465312, 0.30423662, 0.39945245, 0.01863884,
0.03493409, -0.5750042 , 0.02879223, 0.03981001,
0.0992638 , -0.04431673]],
[[-0.01881977, 0.28165781, 0.27969372, -0.12105816,
0.39619493, -0.20682125, 0.11560314, 0.15059802,
0.31867403, -0.01151863]],
[[ 0.34246606, 0.23152475, 0.11814857, -0.16201593,
0.33553603, -0.08004267, 0.05377955, 0.05844887,
0.3824376 , 0.29480523]],
[[ 0.22607948, 0.1431006 , 0.12278882, -0.06739531,
-0.10244448, -0.16496062, 0.07855147, -0.0039023 ,
-0.01187495, 0.31914127]]],
[[[-0.36106065, -0.1614712 , 0.0425666 , 0.26438203,
0.1530066 , -0.27296525, 0.13756014, 0.12951061,
-0.13317643, -0.27455592]],
[[-0.24096149, 0.00605726, 0.1439901 , 0.2999792 ,
0.3998839 , -0.57802194, 0.20733553, 0.22792007,
-0.11590697, -0.10110019]],
[[ 0.19279055, -0.05433302, 0.1588538 , 0.36550105,
0.3963272 , -0.40570349, 0.07748813, -0.03811004,
0.27451742, -0.20028938]],
[[ 0.3532763 , -0.00738094, 0.07176854, 0.44040167,
0.05400598, -0.34983876, 0.0413314 , 0.1358357 ,
0.4606149 , 0.22590274]],
[[ 0.02443966, 0.31850007, 0.0679253 , 0.44180956,
-0.33495304, -0.47900125, 0.01341348, 0.11268906,
0.37477848, 0.21716192]]],
[[[-0.28646868, -0.33610198, -0.4617771 , 0.21716239,
0.13870867, 0.2500289 , -0.00521774, 0.18825355,
-0.44079527, -0.45571223]],
[[ 0.10224386, -0.38061497, -0.319509 , 0.12749907,
0.24174243, 0.12979732, -0.03148215, 0.25274608,
-0.31800672, -0.29355517]],
[[ 0.30091146, -0.28666645, -0.10368297, 0.39559895,
-0.08464333, -0.20640281, 0.10996511, -0.12132592,
-0.2605404 , -0.15574814]],
[[ 0.27224183, -0.08070813, -0.30462918, 0.2425765 ,
-0.42863956, -0.38760048, 0.01434944, 0.01161201,
0.11027226, 0.15539089]],
[[-0.03363509, 0.15163417, -0.39445326, 0.26487276,
-0.46634 , -0.37421483, -0.25277063, -0.30671078,
0.15761915, 0.27163678]]],
[[[ 0.01090175, -0.3971682 , -0.3616919 , -0.2198588 ,
0.22649288, 0.36216414, 0.16255492, 0.28195596,
-0.2900382 , -0.3076533 ]],
[[ 0.02631478, -0.21446598, -0.64792824, -0.12109952,
0.0477443 , 0.6136793 , 0.07133511, 0.03860151,
-0.32767197, -0.33002102]],
[[ 0.26029003, -0.17678206, -0.53189945, -0.22726585,
-0.29128915, 0.50199383, 0.10108512, -0.10461918,
-0.41573155, -0.39046872]],
[[ 0.08828537, -0.34382978, -0.27508992, -0.20536248,
-0.29675165, 0.05918439, -0.04499418, -0.4220725 ,
-0.12406551, 0.18522236]],
[[-0.16209605, -0.10475101, -0.17508513, -0.19714086,
-0.2464418 , -0.06318696, -0.16879626, -0.35019565,
0.11980721, 0.3179446 ]]]], dtype=float32), array([-0.05326715, -0.10172927, -0.00153971, -0.00202374, -0.11225405,
0.03784639, -0.22747649, -0.1184488 , -0.13200918, -0.18062808],
dtype=float32), array([[ 0.05988384, 0.0177884 , -0.01428634, ..., 0.03967512,
-0.02745369, 0.0235893 ],
[-0.02437956, -0.02609818, 0.00166744, ..., -0.0283565 ,
-0.03127908, -0.03267145],
[ 0.01952924, 0.04912804, 0.05458435, ..., -0.03700528,
0.02735562, 0.05371138],
...,
[ 0.03930264, -0.05253042, -0.02040245, ..., 0.02623902,
-0.04602768, -0.0570806 ],
[ 0.05283869, 0.03285475, 0.0225143 , ..., 0.0078735 ,
0.06278732, -0.02751559],
[-0.03585282, 0.05243319, -0.00109443, ..., -0.02352152,
-0.0505695 , -0.03361446]], dtype=float32), array([-0.005398 , -0.01852978, 0.022526 , -0.05067606, -0.01559338,
0.00181101, -0.00674882, 0.02209998, 0.0119611 , 0.02653758,
0.04715605, 0.00115289, 0.01218627, 0.00955511, -0.00915333,
0.01971679, 0.00280326, -0.00939319, 0.02062515, -0.03470616,
-0.01717205, -0.0006302 , 0.0281504 , -0.02706482, -0.02169199,
-0.04854991, -0.01171666, -0.03342789, -0.00403417, 0.04069184,
-0.00713639, 0.01422117, -0.03270031, 0.07872368, -0.01933507,
-0.03112246, -0.0127308 , 0.02346958, 0.02234364, 0.0009662 ,
-0.01526533, 0.02731066, -0.02599644, 0.03074143, -0.02295697,
0.01084804, -0.00369489, -0.02861736, -0.01144757, 0.01723441,
0.03169027, -0.04880989, -0.00714325, 0.00121178, 0.02717792,
0.02516818, 0.04525929, 0.01094827, -0.0134111 , -0.00777571,
-0.01005205, -0.03453677, 0.02772082, -0.03537939, -0.01389873,
-0.00258296, 0.0014193 , 0.02491944, 0.00557612, 0.05277465,
0.03517715, 0.03964692, 0.04351197, -0.00425854, -0.00757222,
-0.00217356, 0.01388615, 0.04111845, -0.01360166, -0.02312726,
0.01533243, -0.01296438, -0.0164743 , -0.01136677, 0.01515818,
0.02614263, -0.0167461 , 0.00552223, -0.01624235, 0.00473867,
0.03577977, -0.01952587, 0.01290206, 0.0558174 , 0.01492252,
-0.04054749, -0.01954609, -0.00233423, 0.01031396, 0.03316768],
dtype=float32), array([[-2.80878514e-01, -2.88952619e-01, -6.83253184e-02,
-4.65221584e-01, -7.49759227e-02, 6.33983985e-02,
-4.09977026e-02, 1.52914107e-01, -1.62776962e-01,
2.25325406e-01],
[-1.16497621e-01, -4.10411984e-01, -7.08504915e-02,
1.14576600e-01, -2.84170300e-01, 2.02368587e-01,
3.22541237e-01, 2.77578473e-01, -2.31056251e-02,
-3.96834254e-01],
[ 1.92328736e-01, -2.25020871e-01, -1.11906387e-01,
-2.87643850e-01, -1.05152600e-01, 5.42706251e-02,
4.07181308e-03, -4.37325239e-01, -5.72222099e-02,
-3.27897817e-01],
[ 4.58031707e-02, -4.31063652e-01, -1.70698479e-01,
2.36170590e-01, 3.97744030e-02, 2.09598631e-01,
1.29017875e-01, -2.64844820e-02, 1.15363739e-01,
6.25539050e-02],
[-2.91498125e-01, 7.66226426e-02, 1.30788106e-02,
1.21028148e-01, 1.15245983e-01, 1.78283691e-01,
3.21665317e-01, 1.02033220e-01, 1.45019650e-01,
-4.34377521e-01],
[ 1.68283820e-01, -3.09178401e-02, 6.57193065e-02,
1.83798093e-02, -7.25373849e-02, 1.67001039e-02,
1.26208022e-01, -3.93356174e-01, 6.57608733e-02,
2.00210825e-01],
[ 9.03305188e-02, -3.31516951e-01, 1.92073271e-01,
1.28750160e-01, -5.60461223e-01, 2.02110186e-01,
2.47376963e-01, 1.91605061e-01, -1.78822219e-01,
-2.13595301e-01],
[ 1.79702878e-01, -2.85403341e-01, -3.79967839e-01,
-3.18726778e-01, -3.74518842e-01, -2.43183710e-02,
2.44848490e-01, 2.43323982e-01, -1.34915501e-01,
-3.29058856e-01],
[-4.59100045e-02, 1.11619122e-01, 2.06353113e-01,
-3.57645638e-02, -2.68476754e-01, 1.54805463e-02,
1.19950175e-01, 2.68370628e-01, 1.79247513e-01,
-5.92741966e-01],
[-1.18884221e-02, 3.30010265e-01, -3.34912717e-01,
-1.25106558e-01, -3.13555360e-01, 3.38253379e-01,
1.71396181e-01, -2.64308155e-01, -2.28820279e-01,
9.15235952e-02],
[-1.96573287e-02, 1.09934568e-01, 3.11464965e-01,
-3.46049458e-01, -4.22263414e-01, 1.13516875e-01,
-1.10042468e-01, 2.11521327e-01, 1.19895123e-01,
1.22512609e-01],
[ 4.91617508e-02, -1.46876182e-02, -1.10340687e-02,
-1.40922934e-01, 2.40739062e-01, 2.78086541e-03,
-3.04801941e-01, 1.87239528e-01, -4.08947110e-01,
1.64359197e-01],
[-1.23397402e-01, 1.75705373e-01, -9.41747651e-02,
-3.65135044e-01, 1.11648791e-01, 1.61744043e-01,
-4.55581516e-01, 2.70987302e-01, -2.47416683e-02,
-8.45107734e-02],
[ 8.28784853e-02, 1.96988985e-01, -4.19137686e-01,
3.10681686e-02, -2.06089336e-02, 2.11591825e-01,
-3.51008564e-01, 1.58253074e-01, -8.44351202e-02,
5.98762669e-02],
[-2.11891711e-01, 7.34165385e-02, 6.44661207e-03,
8.63067247e-03, -2.31139749e-01, -1.74408883e-01,
2.16911077e-01, -1.50186718e-01, -3.73014003e-01,
-3.68643641e-01],
[ 2.34784991e-01, 3.78104374e-02, 2.63703734e-01,
-1.64538354e-01, 7.57270120e-03, -3.90653104e-01,
-1.43432751e-01, 2.12894857e-01, 1.69312999e-01,
-2.59785682e-01],
[ 9.35157537e-02, -2.01519594e-01, 1.75988719e-01,
-2.92130262e-01, 2.28341654e-01, 1.91730857e-02,
7.07815886e-02, 1.37614071e-01, 1.98204890e-01,
-1.40204385e-01],
[-2.83252615e-02, 3.78034413e-02, -1.35061309e-01,
-1.62101313e-01, -6.32801875e-02, 2.15638056e-01,
-2.08552018e-01, -9.57552046e-02, -3.08166534e-01,
-1.19683079e-01],
[-1.83200374e-01, 3.76025349e-01, -1.16502978e-01,
-4.83475059e-01, -6.25678301e-02, 2.37899646e-01,
-3.66046041e-01, 2.22620890e-01, -4.73464787e-01,
-1.89155549e-01],
[-8.66091922e-02, -9.89375189e-02, -1.94817007e-01,
9.37115997e-02, 1.39805645e-01, -1.35264009e-01,
-3.90744150e-01, 2.31051669e-01, 1.50245175e-01,
1.29960239e-01],
[-1.88448876e-01, -2.16989845e-01, -1.20259278e-01,
-2.40481123e-01, 2.78745115e-01, -1.64214984e-01,
-1.34227902e-01, -2.03885585e-01, -2.50468135e-01,
1.82746530e-01],
[-3.06171596e-01, 2.18999043e-01, -3.19759518e-01,
-1.01892829e-01, -1.74568921e-01, -2.91039079e-01,
2.94285625e-01, -2.74888486e-01, -6.43369108e-02,
3.48723471e-01],
[ 2.33909730e-02, -6.47128597e-02, -4.31493759e-01,
2.13833421e-01, -1.19319089e-01, -2.89464384e-01,
-1.12514161e-01, 3.63984853e-01, -4.55533743e-01,
2.60717385e-02],
[-2.95785904e-01, -2.07676753e-01, 6.98602349e-02,
-8.48661549e-03, -3.83481503e-01, -6.69387057e-02,
2.18238056e-01, 2.49815926e-01, -2.70438492e-01,
-4.34468538e-01],
[ 4.70142439e-02, -6.06754273e-02, -5.58634579e-01,
1.58430338e-01, 3.16151381e-02, -1.07376061e-01,
-3.49974513e-01, -7.29726180e-02, -3.56006436e-02,
3.03892493e-01],
[-3.34823012e-01, -2.51858741e-01, -2.25251958e-01,
2.37957463e-01, 2.57917583e-01, 3.60314548e-02,
-3.53990138e-01, -7.66057670e-02, 2.54444063e-01,
2.12081417e-01],
[-8.70895386e-02, 5.11322869e-03, -2.91756868e-01,
-2.07769036e-01, 2.97158152e-01, -4.08434540e-01,
-2.68067390e-01, 2.00924113e-01, -4.38634753e-01,
7.55239949e-02],
[ 1.79762044e-03, -3.45835894e-01, 4.62487787e-02,
1.29018426e-01, 2.00612947e-01, 2.40591943e-01,
5.16790375e-02, -4.29002047e-01, 2.19572246e-01,
1.05557097e-02],
[-3.16552520e-01, 8.77547190e-02, 4.17853892e-02,
-4.08354223e-01, -1.12028815e-01, 2.61908531e-01,
7.60191679e-02, -2.38478839e-01, 1.47306379e-02,
3.49691689e-01],
[ 2.84786552e-01, -6.80592703e-03, -4.72505003e-01,
-2.76086569e-01, -3.66689682e-01, 1.20811805e-01,
-1.53523177e-01, -1.52481034e-01, -2.68145621e-01,
4.70004231e-02],
[-4.45838310e-02, 7.93924257e-02, -7.34841526e-02,
2.08099082e-01, -3.68329078e-01, 2.06218287e-01,
-4.25800115e-01, 1.48879498e-01, -1.16511500e-02,
2.36930773e-01],
[ 1.89541459e-01, -1.49233416e-01, 2.09065124e-01,
2.00762693e-02, -2.88338929e-01, 1.99728936e-01,
-1.97521448e-01, 1.43347621e-01, -2.36340642e-01,
-3.29071403e-01],
[-1.43338472e-01, 2.85153925e-01, 3.88877541e-02,
2.14123070e-01, -3.78108233e-01, 2.49663651e-01,
-1.90097138e-01, -2.09107697e-01, -3.31528336e-01,
2.41039231e-01],
[-9.46740285e-02, 3.46576244e-01, -3.03534240e-01,
-5.37712157e-01, -3.05850301e-02, -6.98754862e-02,
1.39591247e-01, -3.61753345e-01, -2.54348367e-01,
-1.73599243e-01],
[-7.52603561e-02, -4.64169860e-01, -3.11523080e-01,
-3.30096390e-03, -2.48671651e-01, 2.67981470e-01,
3.46507430e-02, 6.33662045e-02, -1.73883047e-02,
1.57723546e-01],
[-2.55069643e-01, -3.28791529e-01, -1.91593617e-01,
1.98152009e-02, 2.38621473e-01, 1.73862070e-01,
-3.37664545e-01, 8.53433087e-02, -2.10560709e-01,
3.63261439e-02],
[-5.02947047e-02, -2.81642228e-01, -2.31026471e-01,
-2.10499510e-01, -1.52135611e-01, -2.49384075e-01,
-1.82276487e-01, 3.45350131e-02, 1.10356145e-01,
6.95606992e-02],
[-9.24093127e-02, 4.59271222e-02, 3.93759608e-02,
2.91396320e-01, -6.77868873e-02, -4.35065717e-01,
-3.41314614e-01, -2.61516005e-01, -1.26054004e-01,
2.79389948e-01],
[-1.65248692e-01, 3.01901400e-01, -4.27287251e-01,
-2.14315772e-01, 1.85965419e-01, -6.04463667e-02,
6.02773912e-02, 2.65717179e-01, -4.41522062e-01,
1.70508966e-01],
[-2.63828665e-01, -8.64971578e-02, -2.78461576e-01,
6.19269870e-02, 1.75784558e-01, 2.26351202e-01,
2.23095179e-01, -7.56275430e-02, 1.32076845e-01,
-9.38763022e-02],
[-2.86855221e-01, 3.81994694e-02, -3.11091870e-01,
-7.58372173e-02, 2.34059244e-01, -1.56661533e-02,
7.35731423e-02, 1.77167624e-01, 2.17671648e-01,
-1.91961788e-03],
[ 1.86832726e-01, 1.35448843e-01, 3.00116807e-01,
-1.73696235e-01, -9.37856138e-02, 1.51225656e-01,
-9.32918712e-02, -3.50246668e-01, -3.24846774e-01,
-5.62554505e-03],
[-3.51985544e-01, 1.26511768e-01, 1.49354547e-01,
1.93097010e-01, 8.64192247e-02, 7.05208927e-02,
-5.19544959e-01, 2.70478725e-01, 1.65952802e-01,
1.64275318e-02],
[-1.92465544e-01, 4.02155459e-01, -1.52402923e-01,
-1.99932814e-01, 3.47709596e-01, 6.46834169e-03,
3.87867332e-01, 1.28063947e-01, -3.45817953e-01,
-4.91191477e-01],
[ 6.11011274e-02, -2.56536752e-01, -3.15822661e-02,
-1.52900815e-01, -3.21199924e-01, 1.59323826e-01,
3.27706814e-01, 1.36229798e-01, -2.06773639e-01,
2.47566059e-01],
[-2.85091579e-01, -1.90595418e-01, 2.74384946e-01,
-1.67294994e-01, -1.41146630e-01, 2.19166890e-01,
-7.70386904e-02, -6.47071823e-02, 9.02576721e-06,
-3.55082661e-01],
[-8.33381787e-02, -2.64859885e-01, 1.22059382e-01,
-2.96119362e-01, -2.02185154e-01, -2.04638451e-01,
-2.34781384e-01, 4.05640006e-01, 3.95843871e-02,
-5.00460327e-01],
[-6.75931275e-02, -1.52381118e-02, -5.84617443e-02,
-7.04567367e-03, -4.38545316e-01, -1.61278471e-01,
-8.04644227e-02, 2.87377626e-01, -3.14729095e-01,
1.63273811e-01],
[-4.85402755e-02, -1.85809851e-01, 2.34951764e-01,
1.07792743e-01, -1.19594529e-01, -4.99206930e-01,
-3.42985153e-01, 2.04150021e-01, -1.40759617e-01,
4.17127758e-02],
[ 4.76027206e-02, 2.02197984e-01, -2.56469403e-03,
5.16320094e-02, -4.73078012e-01, 1.83827221e-01,
2.54296690e-01, -5.82304932e-02, 2.67515868e-01,
-3.10604237e-02],
[ 2.36623093e-01, -2.22096846e-01, -1.41983896e-01,
-3.52556050e-01, -3.20000276e-02, -9.11143273e-02,
5.81668355e-02, 1.78689566e-02, 2.08153933e-01,
-3.37888002e-01],
[-4.14484113e-01, 5.72569035e-02, 6.03880510e-02,
2.75157154e-01, -6.30981252e-02, -2.10512072e-01,
1.08315110e-01, 2.03937560e-01, -6.50893524e-02,
-2.20293328e-01],
[ 2.76402891e-01, 1.74387738e-01, 1.06817611e-01,
-8.05123001e-02, -1.91160321e-01, -3.90674204e-01,
-2.64012404e-02, 2.29674041e-01, 2.47093081e-01,
1.52173817e-01],
[ 1.88219309e-01, -1.60041615e-01, -2.60463208e-01,
-2.78181106e-01, 1.74190581e-01, 1.50532395e-01,
2.39792839e-01, -1.97646185e-03, -3.39136064e-01,
2.27342233e-01],
[ 1.81723490e-01, 2.82607019e-01, 1.06002472e-01,
-8.10205936e-03, -3.41993093e-01, -1.69809997e-01,
7.41727501e-02, -3.31415057e-01, -1.56984672e-01,
6.68503791e-02],
[-3.35356623e-01, 2.68279344e-01, -6.96995184e-02,
-5.31341374e-01, 1.01676062e-01, 1.10403292e-01,
-1.39216095e-01, 1.23701543e-01, -2.04578757e-01,
-3.05327803e-01],
[ 1.52495489e-01, 2.40899324e-01, 8.64105150e-02,
-4.70167339e-01, 3.36731046e-01, -2.88031578e-01,
1.22474372e-01, -2.08129853e-01, -4.26135898e-01,
1.23008616e-01],
[ 2.19447419e-01, -4.25975442e-01, -2.69947082e-01,
-2.57135965e-02, -3.49941641e-01, 1.04076453e-01,
-3.61683359e-03, -2.87522405e-01, 2.05251873e-02,
-1.03612281e-01],
[ 1.96892500e-01, -8.73491615e-02, 1.34108305e-01,
1.87838942e-01, -7.36293867e-02, -1.67612627e-01,
5.34972176e-02, -1.28114730e-01, 2.39227191e-01,
1.52084604e-01],
[-2.42285430e-01, -1.80974156e-01, 2.19037905e-01,
-2.78696179e-01, -1.71007901e-01, -2.07579866e-01,
-2.65557259e-01, 2.27460340e-01, -1.89204544e-01,
1.05596513e-01],
[-3.00730944e-01, -8.07805061e-02, -3.20149034e-01,
1.02798693e-01, 3.85463059e-01, 1.40067115e-01,
-3.96318287e-01, 1.27322385e-02, -5.26268661e-01,
2.65310705e-01],
[ 1.77418590e-02, -3.32880855e-01, 1.96217701e-01,
9.38635245e-02, 9.29819867e-02, -3.18250328e-01,
-9.68264043e-02, 1.16937332e-01, 7.45990947e-02,
1.64286569e-01],
[-2.65594870e-01, 1.00067541e-01, 1.91965532e-02,
-3.47705372e-03, 1.33793931e-02, 1.58512205e-01,
-1.75091654e-01, -8.55431631e-02, 1.84122652e-01,
2.84497470e-01],
[-2.72124559e-01, -1.85961753e-01, -4.86050874e-01,
1.09921977e-01, -1.12289395e-02, 2.26568803e-01,
2.64995009e-01, 1.38251394e-01, -4.94774207e-02,
-6.99937791e-02],
[ 3.74109745e-02, 2.47385919e-01, 9.68004614e-02,
2.26332277e-01, 1.86111126e-02, -2.54699349e-01,
1.74074680e-01, 1.21922724e-01, -3.51350099e-01,
-4.60877508e-01],
[ 2.35768378e-01, 4.54334393e-02, 5.33979014e-02,
1.27460673e-01, -7.01661766e-01, -8.58926475e-02,
-2.22885370e-01, -4.78002690e-02, -2.42045864e-01,
-4.47942078e-01],
[-3.86173278e-01, 2.65619904e-01, 2.26018026e-01,
8.76465663e-02, 2.24053368e-01, -3.48186679e-02,
-2.83748150e-01, 5.31676486e-02, -3.37433070e-01,
-3.55700403e-01],
[-3.32953036e-01, 1.71580344e-01, 1.71892568e-01,
2.22348973e-01, -1.97261035e-01, 7.07722157e-02,
-2.85758406e-01, -1.79066554e-01, 1.86767325e-01,
-2.61755675e-01],
[-1.66537970e-01, -7.62976781e-02, 1.34396255e-01,
2.12341934e-01, -3.06788445e-01, -2.03811824e-01,
1.15235768e-01, -2.25298047e-01, 8.40689316e-02,
-1.22556776e-01],
[-1.55728787e-01, 8.89719203e-02, 2.23977998e-01,
-3.28815132e-02, -4.55562890e-01, -4.49441552e-01,
1.48577452e-01, -2.08408982e-01, -3.42789024e-01,
7.36563057e-02],
[-2.99370289e-01, 2.14110270e-01, -2.54100394e-02,
5.72636165e-03, 2.37848416e-01, -1.61451086e-01,
-2.57838815e-01, -1.68274287e-02, 2.10511789e-01,
-1.72612481e-02],
[ 2.54113376e-01, 1.52949333e-01, -3.42802823e-01,
8.62261131e-02, 3.54665339e-01, -2.70900726e-01,
-2.58765638e-01, -4.86204416e-01, -6.25081882e-02,
-2.48219520e-01],
[-2.11227015e-02, 3.11614543e-01, -7.31830969e-02,
5.06604388e-02, -3.96664739e-01, -5.11043549e-01,
-7.12758601e-02, 5.23637906e-02, -1.72585770e-01,
-1.12323724e-01],
[-2.88673878e-01, -3.41729730e-01, 8.76247585e-02,
-3.31366479e-01, 2.95934714e-02, -1.40673220e-01,
-2.64001906e-01, -3.75759751e-01, -2.20368892e-01,
1.65278658e-01],
[-2.61867434e-01, -1.25094682e-01, -1.88089572e-02,
-2.86843389e-01, 2.07129523e-01, 9.33256671e-02,
2.02610716e-01, 1.14842623e-01, 2.93302596e-01,
-3.47602844e-01],
[-5.24003319e-02, -1.82070181e-01, 2.64166474e-01,
1.53725669e-01, 1.60775915e-01, -3.94912884e-02,
5.98972812e-02, 1.27977923e-01, 3.62922251e-02,
-2.37544343e-01],
[ 2.83543635e-02, -3.82527709e-01, 1.03175066e-01,
-3.20723563e-01, -3.55718613e-01, 3.48593853e-02,
1.52670860e-01, 2.37062365e-01, -8.84934217e-02,
1.07653186e-01],
[-8.02218262e-03, 1.98889375e-01, 2.36755818e-01,
-3.64727765e-01, -1.66197829e-02, -2.52599061e-01,
3.08327228e-02, -2.65515476e-01, -4.67243642e-02,
-9.93776992e-02],
[-2.88613439e-01, 1.60547391e-01, 2.01603085e-01,
3.29991728e-01, -5.04409075e-01, 2.12129638e-01,
1.29706696e-01, 3.26536223e-02, -5.79475239e-02,
-3.03462207e-01],
[ 1.73510164e-01, -4.73308414e-01, 3.45571339e-02,
-2.80015141e-01, -1.42622843e-01, -3.91261354e-02,
-4.54723686e-02, -4.02801454e-01, -4.28644791e-02,
2.38903821e-01],
[-7.78434575e-02, 7.01967180e-02, -3.48449171e-01,
-5.15536427e-01, 3.84873636e-02, -3.21855098e-01,
3.03351879e-01, -1.55083299e-01, 2.30123237e-01,
-8.70974036e-04],
[-2.91565925e-01, -1.71608552e-01, -3.24349612e-01,
2.24242955e-01, 2.14290485e-01, -3.38350609e-02,
1.36947989e-01, 1.28424913e-02, 5.85890487e-02,
-4.89389390e-01],
[-3.49002093e-01, -2.24898815e-01, 2.38856182e-01,
2.56595671e-01, 2.10459158e-02, 1.75605848e-01,
-3.92496169e-01, -1.32802978e-01, 8.51763859e-02,
4.41880003e-02],
[ 1.49462268e-01, -8.83552507e-02, -2.92263716e-01,
3.37864727e-01, 1.99508980e-01, -1.99407041e-02,
1.95664078e-01, -3.62197131e-01, -2.76591420e-01,
-1.89328298e-01],
[-1.29553089e-02, 2.76988298e-01, -4.49188322e-01,
4.61517796e-02, -4.40331697e-01, 3.83680701e-01,
1.05097756e-01, -5.92100248e-02, -3.65615457e-01,
-1.52612358e-01],
[ 1.82668746e-01, 2.31690034e-01, -2.92528003e-01,
-2.55986303e-01, 8.32752287e-02, 2.51465917e-01,
-1.01054765e-01, -7.06190243e-03, -2.96387017e-01,
-4.84422773e-01],
[-1.21400669e-01, -3.13585289e-02, -1.49048269e-01,
-2.05441698e-01, 2.27967411e-01, 1.49613246e-01,
-1.65963650e-01, -3.29703897e-01, 8.17758366e-02,
9.74359736e-02],
[ 2.89199293e-01, -7.04863593e-02, -1.22465491e-01,
-7.15844659e-03, -5.14583707e-01, 6.51249960e-02,
-1.20044194e-01, 3.26011032e-01, -1.46292567e-01,
-1.34193763e-01],
[ 2.07244024e-01, 2.18047187e-01, -3.28369349e-01,
3.54067504e-01, -1.34166539e-01, -3.58019650e-01,
3.81010287e-02, 1.76515803e-01, -2.42457300e-01,
-2.58901734e-02],
[-1.83583036e-01, 9.63129923e-02, 1.80392161e-01,
-3.12118948e-01, 1.35678813e-01, -4.84556109e-01,
2.94969007e-02, 2.10692644e-01, 2.85272449e-01,
7.40195960e-02],
[ 2.14306619e-02, 2.23890185e-01, -5.03564402e-02,
-5.09863198e-01, 7.22838640e-02, 3.17906708e-01,
-2.77185917e-01, 9.14458837e-03, 2.00836927e-01,
2.31873438e-01],
[ 4.97248918e-02, 4.68859673e-02, -3.31564605e-01,
2.14749053e-01, -2.34072153e-02, 2.28042364e-01,
-2.55816698e-01, 1.08321579e-02, 2.10366532e-01,
1.82601720e-01],
[-2.62626056e-02, -1.21024236e-01, 1.02919996e-01,
-5.97031154e-02, 1.77486595e-02, 6.08048066e-02,
2.15883568e-01, -5.29421747e-01, -1.41826728e-02,
6.10244796e-02],
[ 2.30102479e-01, 1.66655496e-01, -8.18227942e-04,
-3.35090339e-01, 1.70878336e-01, -1.07719235e-01,
-2.74331063e-01, 1.52734920e-01, -4.22130316e-01,
4.29905392e-03],
[ 1.01837583e-01, -1.64990351e-01, 4.56752926e-02,
-3.95802766e-01, 1.88975126e-01, -7.27127343e-02,
1.77403137e-01, -4.33928579e-01, 6.07757755e-02,
-8.86140168e-02],
[-4.19868499e-01, -2.48233587e-01, -2.57547587e-01,
3.02368283e-01, 1.13392621e-01, -1.54164657e-01,
-2.27882534e-01, 1.99300513e-01, -4.92941290e-01,
3.32245409e-01],
[-1.01652101e-01, -4.19613987e-01, 3.48329127e-01,
-2.74736881e-01, 2.61124879e-01, 6.26765192e-02,
4.22035269e-02, -5.20829000e-02, -3.64311099e-01,
-1.79357663e-01],
[ 1.39732897e-01, -2.66956538e-01, -3.84257436e-01,
-1.13201685e-01, 7.89685026e-02, 2.69948304e-01,
1.01909287e-01, -9.96095836e-02, 1.48862645e-01,
1.53904662e-01],
[ 1.36547923e-01, -2.41517991e-01, 2.04632416e-01,
-1.64432637e-02, -1.54070064e-01, -1.83150306e-01,
-2.03562394e-01, -2.21244648e-01, 1.00092985e-01,
-1.01901151e-01],
[-5.50452732e-02, -6.54754788e-02, 1.59336641e-01,
-3.78038645e-01, 1.72017649e-01, -2.92336732e-01,
3.41575712e-01, -4.80901748e-01, -3.72718275e-01,
-1.70941189e-01]], dtype=float32), array([ 0.01098968, 0.03846952, 0.00500416, -0.03767109, -0.01391269,
-0.02019179, -0.05111629, -0.06184113, 0.04913256, -0.00327604],
dtype=float32)]
from keras.utils.vis_utils import plot_model
plot_model(model, show_shapes=True, to_file='./model.png')
# My Mnist CNN
# Conv2D - ReLU - MaxPooling - Dence - ReLU - Dence
# 2018/05/25 by marsee
# Keras / Tensorflowで始めるディープラーニング入門 https://qiita.com/yampy/items/706d44417c433e68db0d
# のPythonコードを再利用させて頂いている
import keras
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
batch_size = 128
num_classes = 10
epochs = 12
img_rows, img_cols = 28, 28
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#Kerasのバックエンドで動くTensorFlowとTheanoでは入力チャンネルの順番が違うので場合分けして書いています
if K.image_data_format() == 'channels_first':
x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
input_shape = (1, img_rows, img_cols)
else:
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
y_train = y_train.astype('int32')
y_test = y_test.astype('int32')
y_train = keras.utils.np_utils.to_categorical(y_train, num_classes)
y_test = keras.utils.np_utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Conv2D(10, kernel_size=(5, 5),
activation='relu',
input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(100, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(x_test, y_test))
('x_train shape:', (60000, 28, 28, 1))
(60000, 'train samples')
(10000, 'test samples')
Train on 60000 samples, validate on 10000 samples
Epoch 1/12
60000/60000 [==============================] - 16s 265us/step - loss: 0.2680 - acc: 0.9204 - val_loss: 0.1202 - val_acc: 0.9640
Epoch 2/12
60000/60000 [==============================] - 16s 261us/step - loss: 0.0822 - acc: 0.9754 - val_loss: 0.0626 - val_acc: 0.9792
Epoch 3/12
60000/60000 [==============================] - 16s 260us/step - loss: 0.0558 - acc: 0.9830 - val_loss: 0.0476 - val_acc: 0.9845
Epoch 4/12
60000/60000 [==============================] - 15s 256us/step - loss: 0.0429 - acc: 0.9869 - val_loss: 0.0470 - val_acc: 0.9842
Epoch 5/12
60000/60000 [==============================] - 15s 254us/step - loss: 0.0349 - acc: 0.9891 - val_loss: 0.0369 - val_acc: 0.9867
Epoch 6/12
60000/60000 [==============================] - 17s 279us/step - loss: 0.0290 - acc: 0.9910 - val_loss: 0.0376 - val_acc: 0.9871
Epoch 7/12
60000/60000 [==============================] - 16s 274us/step - loss: 0.0238 - acc: 0.9927 - val_loss: 0.0372 - val_acc: 0.9877
Epoch 8/12
60000/60000 [==============================] - 16s 261us/step - loss: 0.0204 - acc: 0.9940 - val_loss: 0.0328 - val_acc: 0.9884
Epoch 9/12
60000/60000 [==============================] - 15s 251us/step - loss: 0.0172 - acc: 0.9947 - val_loss: 0.0334 - val_acc: 0.9878
Epoch 10/12
60000/60000 [==============================] - 15s 254us/step - loss: 0.0146 - acc: 0.9957 - val_loss: 0.0342 - val_acc: 0.9889
Epoch 11/12
60000/60000 [==============================] - 15s 254us/step - loss: 0.0123 - acc: 0.9966 - val_loss: 0.0359 - val_acc: 0.9878
Epoch 12/12
60000/60000 [==============================] - 15s 257us/step - loss: 0.0109 - acc: 0.9968 - val_loss: 0.0340 - val_acc: 0.9889
# Keras / Tensorflowで始めるディープラーニング入門 https://qiita.com/yampy/items/706d44417c433e68db0d
# のPythonコードを再利用させて頂いている
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# plot the loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
Phase 1.2 IO Placement/ Clock Placement/ Build Placer Device
ERROR: [Place 30-640] Place Check : This design requires more RAMB36/FIFO cells than are available in the target device. This design requires 144 of such cell types but only 140 compatible sites are available in the target device. Please analyze your synthesis results and constraints to ensure the design is mapped to Xilinx primitives as expected. If so, please consider targeting a larger device.
ERROR: [Place 30-640] Place Check : This design requires more RAMB18 and RAMB36/FIFO cells than are available in the target device. This design requires 290 of such cell types but only 280 compatible sites are available in the target device. Please analyze your synthesis results and constraints to ensure the design is mapped to Xilinx primitives as expected. If so, please consider targeting a larger device.
ERROR: [Place 30-640] Place Check : This design requires more RAMB36E1 cells than are available in the target device. This design requires 144 of such cell types but only 140 compatible sites are available in the target device. Please analyze your synthesis results and constraints to ensure the design is mapped to Xilinx primitives as expected. If so, please consider targeting a larger device.
INFO: [Timing 38-35] Done setting XDC timing constraints.
Phase 1.2 IO Placement/ Clock Placement/ Build Placer Device | Checksum: a2718320
Time (s): cpu = 00:00:04 ; elapsed = 00:00:04 . Memory (MB): peak = 2408.320 ; gain = 0.000 ; free physical = 1463 ; free virtual = 9892
Phase 1 Placer Initialization | Checksum: a2718320
Time (s): cpu = 00:00:04 ; elapsed = 00:00:04 . Memory (MB): peak = 2408.320 ; gain = 0.000 ; free physical = 1463 ; free virtual = 9892
ERROR: [Place 30-99] Placer failed with error: 'Implementation Feasibility check failed, Please see the previously displayed individual error or warning messages for more details.'
Please review all ERROR, CRITICAL WARNING, and WARNING messages during placement to understand the cause for failure.
Ending Placer Task | Checksum: a2718320
Time (s): cpu = 00:00:04 ; elapsed = 00:00:04 . Memory (MB): peak = 2408.320 ; gain = 0.000 ; free physical = 1464 ; free virtual = 9893
INFO: [Common 17-83] Releasing license: Implementation
46 Infos, 0 Warnings, 0 Critical Warnings and 5 Errors encountered.
place_design failed
ERROR: [Common 17-69] Command failed: Placer could not place all instances
INFO: [Common 17-206] Exiting Vivado at Sun May 20 05:23:04 2018...
[Sun May 20 05:23:04 2018] impl_1 finished
wait_on_run: Time (s): cpu = 00:01:18 ; elapsed = 00:02:03 . Memory (MB): peak = 2078.047 ; gain = 8.000 ; free physical = 2423 ; free virtual = 10851
ERROR: [Common 17-69] Command failed: Run 'impl_1' failed. Unable to open
INFO: [Common 17-206] Exiting Vivado at Sun May 20 05:23:04 2018...
Finished export RTL.
// mnist_conv_nn10_hlss.h
// 2018/05/18 by marsee
//
#ifndef __MNIST_CONV_NN10_HLSS_H__
#define __MNIST_CONV_NN10_HLSS_H__
#include <ap_fixed.h>
template<int U, int TI, int TD>
struct float2_axis{
struct data {
float data0;
float data1;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float1_axis{
struct data {
float data0;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_OUTPUT_LAYER 10
typedef ap_uint<4> output_type;
typedef ap_fixed<12,7,AP_TRN,AP_WRAP> out_affine_type;
#endif
// mnist_conv_nn10_hlss.cpp
// 2018/05/18 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "mnist_conv_nn10_hlss.h"
int input_layer(hls::stream<ap_axiu<8,1,1,1> >&ins,
hls::stream<ap_fixed_axis<9,1,1,1> >&outs);
int conv_layer1(hls::stream<ap_fixed_axis<9,1,1,1> >& ins,
hls::stream<ap_fixed_axis<10,3,10,1> >& outs);
int relu_conv1(hls::stream<ap_fixed_axis<10,3,10,1> >& ins,
hls::stream<ap_fixed_axis<10,3,10,1> >& outs);
int max_pooling(hls::stream<ap_fixed_axis<10,3,10,1> >& ins,
hls::stream<ap_fixed_axis<10,3,10,1> >& outs);
int affine_layer1(hls::stream<ap_fixed_axis<10,3,10,1> >& ins,
hls::stream<ap_fixed_axis<13,7,1,1> >& outs);
int relu_affine1(hls::stream<ap_fixed_axis<13,7,1,1> >& ins,
hls::stream<ap_fixed_axis<13,7,1,1> >& outs);
int affine_layer2(hls::stream<ap_fixed_axis<13,7,1,1> >& ins,
hls::stream<ap_fixed_axis<12,7,1,1> >& outs);
int output_layer(hls::stream<ap_fixed_axis<12,7,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]);
int all_layers(hls::stream<ap_axiu<8,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]){
#pragma HLS INTERFACE s_axilite port=output
#pragma HLS INTERFACE s_axilite port=dot2
#pragma HLS ARRAY_PARTITION variable=dot2 complete dim=1
#pragma HLS DATAFLOW
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
hls::stream<ap_fixed_axis<9,1,1,1> > outs_input_layer;
// #pragma HLS STREAM variable=outs_input_layer depth=560 dim=1
hls::stream<ap_fixed_axis<10,3,10,1> > outs_conv_layer;
// #pragma HLS STREAM variable=outs_conv_layer depth=312 dim=1
hls::stream<ap_fixed_axis<10,3,10,1> > outs_relu_conv1;
// #pragma HLS STREAM variable=outs_relu depth=312 dim=1
hls::stream<ap_fixed_axis<10,3,10,1> > outs_max_pooling;
// #pragma HLS STREAM variable=outs_max_pooling depth=78 dim=1
hls::stream<ap_fixed_axis<13,7,1,1> > outs_affine_layer1;
// #pragma HLS STREAM variable=outs_affine_layer1 depth=100 dim=1
hls::stream<ap_fixed_axis<13,7,1,1> > outs_relu_affine1;
// #pragma HLS STREAM variable=outs_relu_affine1 depth=100 dim=1
hls::stream<ap_fixed_axis<12,7,1,1> > outs_affine_layer2;
// #pragma HLS STREAM variable=outs_affine_layer2 depth=3 dim=1
input_layer(ins, outs_input_layer);
conv_layer1(outs_input_layer, outs_conv_layer);
relu_conv1(outs_conv_layer, outs_relu_conv1);
max_pooling(outs_relu_conv1, outs_max_pooling);
affine_layer1(outs_max_pooling, outs_affine_layer1);
relu_affine1(outs_affine_layer1, outs_relu_affine1);
affine_layer2(outs_relu_affine1, outs_affine_layer2);
output_layer(outs_affine_layer2, output, dot2);
return(0);
}
をテンプレートの変数に追加して、出力の演算をする for ループのPIPELINE指示子のII を指定できるようにした。const size_t OUTPUT_PIPELINE_II
// affine_layer_template.h
// 2018/05/02 by marsee
// テンプレートを使用して汎用化した affine_layer
// #define LOOP3_PIPELINE_ENABLE を書くとLoop3にPIPELINE指示子が入る
// 2018/05/19:テンプレートにOUTPUT_PIPELINE_IIを追加
//
#ifndef __AFFINE_LAYER_TEMPLATE_H__
#define __AFFINE_LAYER_TEMPLATE_H__
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include <ap_fixed.h>
#include "layer_general.h"
#define TO_LITERAL(x) #x
#define PRAGMA_HLS(tok) _Pragma(TO_LITERAL(HLS tok)) // @hiyuhさんから
template<
const size_t IN_W, // 入力のビット幅
const size_t IN_I, // 入力の小数点位置
const size_t OUT_W, // 出力のビット長
const size_t OUT_I, // 出力の小数点位置
const size_t WB_W, // 重みとバイアスのビット長
const size_t WB_I, // 重みとバイアスの小数点位置
const size_t V_PRE_LAYER_HIGHT,
const size_t H_PRE_LAYER_WIDTH,
const size_t NUMBER_OF_CHANNELS,
const size_t NUMBER_OF_OUTPUT,
const size_t HORIZ_PIPELINE_II,
const size_t OUTPUT_PIPELINE_II
>int affine_layer_template(hls::stream<ap_fixed_axis<IN_W,IN_I,NUMBER_OF_CHANNELS,1> >& ins,
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> >& outs,
const ap_fixed<WB_W,WB_I,AP_TRN,AP_WRAP> af_weight[V_PRE_LAYER_HIGHT*H_PRE_LAYER_WIDTH*NUMBER_OF_CHANNELS][NUMBER_OF_OUTPUT],
const ap_fixed<WB_W,WB_I,AP_TRN,AP_WRAP> af_bias[NUMBER_OF_OUTPUT]
){
//#pragma HLS ARRAY_PARTITION variable=af_weight complete dim=1
ap_fixed_axis<IN_W,IN_I,NUMBER_OF_CHANNELS,1> stdata;
ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP> dot[NUMBER_OF_OUTPUT];
//#pragma HLS ARRAY_PARTITION variable=dot complete dim=1
ap_fixed_axis<OUT_W,OUT_I,1,1> outd;
Loop1: do {
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for (int y=0; y<V_PRE_LAYER_HIGHT; y++){
Loop3: for (int x=0; x<H_PRE_LAYER_WIDTH; x++){
#ifdef LOOP3_PIPELINE_ENABLE
PRAGMA_HLS(pipeline II=HORIZ_PIPELINE_II)
#endif
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
Loop4: for (int col=0; col<NUMBER_OF_OUTPUT; col++){
PRAGMA_HLS(pipeline II=OUTPUT_PIPELINE_II)
if (x==0 && y==0) // 最初は 0 にクリアする
dot[col] = 0;
ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP> dot_temp = ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP>(0);
for (int i=0; i<NUMBER_OF_CHANNELS; i++){
dot_temp += stdata.data[i] * af_weight[V_PRE_LAYER_HIGHT*H_PRE_LAYER_WIDTH*i+y*H_PRE_LAYER_WIDTH+x][col];
}
dot[col] += dot_temp;
if (y==V_PRE_LAYER_HIGHT-1 && x==H_PRE_LAYER_WIDTH-1){ // 最後はバイアスを加算する
dot[col] += af_bias[col];
outd.data[0] = dot[col];
if(col == 0)
outd.user = 1;
else
outd.user = 0;
if(col == NUMBER_OF_OUTPUT-1)
outd.last = 1;
else
outd.last = 0;
outs << outd;
}
}
}
}
return(0);
}
#endif
// affine_layer1.cpp
// 2018/05/02 by marsee
// affine layer1 by template
// 2018/05/19:テンプレートにOUTPUT_PIPELINE_IIを追加
//
#include "affine_layer_template.h"
#include "af1_weight.h"
#include "af1_bias.h"
int affine_layer1(hls::stream<ap_fixed_axis<10,3,10,1> >& ins,
hls::stream<ap_fixed_axis<13,7,1,1> >& outs){
//#pragma HLS DATA_PACK variable=outs
//#pragma HLS DATA_PACK variable=ins
return(affine_layer_template<10,3,13,7,9,1,12,12,10,100,0,1>(ins, outs, af1_weight, af1_bias));
}
// affine_layer2.cpp
// 2018/05/03 by marsee
// affine layer2 by template
// 2018/05/19:テンプレートにOUTPUT_PIPELINE_IIを追加
//
#define LOOP3_PIPELINE_ENABLE
#include "affine_layer_template.h"
#include "af2_weight.h"
#include "af2_bias.h"
int affine_layer2(hls::stream<ap_fixed_axis<13,7,1,1> >& ins,
hls::stream<ap_fixed_axis<12,7,1,1> >& outs){
//#pragma HLS DATA_PACK variable=outs
//#pragma HLS DATA_PACK variable=ins
return(affine_layer_template<13,7,12,7,9,1,1,100,1,10,3,1>(ins, outs, af2_weight, af2_bias));
}
を有効にして、#include "curve_data_0_100.h"
をコメントアウトし、#define NUM_ITERATIONS 300 // C Simulation
のコメントアウトを外した。#define NUM_ITERATIONS 2 // C/RTL CoSimulation
つまり、0 番目から 300 番目の白線画像でやってみた。"curve_data_0_100.h"
の内の hw_err_cnt がハードウェア化関数でのエラーの数を表す。sw_err_cnt が float で実装したソフトウェアのエラーの数を示す。今回はハードウェアのエラーが 8 個で、ソフトウェアのエラーが 20 個だった。これは、”AXI4-Stream インターフェースの畳み込みニューラルネットワーク3(シミュレーション)”でのエラー数と同じだ。hw_err_cnt = 8 sw_err_cnt = 20
の 2500 番目から 2800 番目の白線画像でやってみた。結果を示す。#include "curve_data_2500_2600.h"
で、ハードウェアが 14 個、ソフトウェアが 11 個間違った。これも”AXI4-Stream インターフェースの畳み込みニューラルネットワーク3(シミュレーション)”でのエラー数と同じだ。hw_err_cnt = 14 sw_err_cnt = 11
で C シミュレーションを行った。結果を示す。#include "curve_data_5000_5100.h"
で、エラー数はハードウェアが 37 個、ソフトウェアが 15 個だった。やはり、ハードウェアのエラー数が多くなっている。これも”AXI4-Stream インターフェースの畳み込みニューラルネットワーク3(シミュレーション)”でのエラー数と同じで、すべての C シミュレーションにおいてエラー数が同一なので、同じCNN と言えると思う。hw_err_cnt = 46 sw_err_cnt = 15
// all_layers_templapte.h
// 2018/03/13 by marsee
//
#ifndef __ALL_LAYER_TEMPLATE_H__
#define __ALL_LAYER_TEMPLATE_H__
#include <ap_fixed.h>
#define NUMBER_OF_OUTPUT_LAYER 3
typedef ap_uint<2> output_type;
typedef ap_fixed<12,7,AP_TRN,AP_WRAP> out_affine_type;
#endif
// all_layers_template.cpp
// 2018/05/10 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "all_layers_template.h"
int input_layer(hls::stream<ap_axiu<32,1,1,1> >&ins,
hls::stream<ap_fixed_axis<9,1,1,1> >&outs);
int conv_layer1(hls::stream<ap_fixed_axis<9,1,1,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1> >& outs);
int relu_conv1(hls::stream<ap_fixed_axis<16,6,2,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1> >& outs);
int max_pooling(hls::stream<ap_fixed_axis<16,6,2,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1> >& outs);
int affine_layer1(hls::stream<ap_fixed_axis<16,6,2,1> >& ins,
hls::stream<ap_fixed_axis<19,7,1,1> >& outs);
int relu_affine1(hls::stream<ap_fixed_axis<19,7,1,1> >& ins,
hls::stream<ap_fixed_axis<19,7,1,1> >& outs);
int affine_layer2(hls::stream<ap_fixed_axis<19,7,1,1> >& ins,
hls::stream<ap_fixed_axis<12,7,1,1> >& outs);
int output_layer(hls::stream<ap_fixed_axis<12,7,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]);
int all_layers(hls::stream<ap_axiu<32,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]){
#pragma HLS INTERFACE s_axilite port=output
#pragma HLS INTERFACE s_axilite port=dot2
#pragma HLS ARRAY_PARTITION variable=dot2 complete dim=1
#pragma HLS DATAFLOW
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
hls::stream<ap_fixed_axis<9,1,1,1> > outs_input_layer;
//#pragma HLS STREAM variable=outs_input_layer depth=560 dim=1
hls::stream<ap_fixed_axis<16,6,2,1> > outs_conv_layer;
//#pragma HLS STREAM variable=outs_conv_layer depth=312 dim=1
hls::stream<ap_fixed_axis<16,6,2,1> > outs_relu_conv1;
//#pragma HLS STREAM variable=outs_relu depth=312 dim=1
hls::stream<ap_fixed_axis<16,6,2,1> > outs_max_pooling;
//#pragma HLS STREAM variable=outs_max_pooling depth=78 dim=1
hls::stream<ap_fixed_axis<19,7,1,1> > outs_affine_layer1;
//#pragma HLS STREAM variable=outs_affine_layer1 depth=100 dim=1
hls::stream<ap_fixed_axis<19,7,1,1> > outs_relu_affine1;
//#pragma HLS STREAM variable=outs_relu_affine1 depth=100 dim=1
hls::stream<ap_fixed_axis<12,7,1,1> > outs_affine_layer2;
//#pragma HLS STREAM variable=outs_affine_layer2 depth=3 dim=1
input_layer(ins, outs_input_layer);
conv_layer1(outs_input_layer, outs_conv_layer);
relu_conv1(outs_conv_layer, outs_relu_conv1);
max_pooling(outs_relu_conv1, outs_max_pooling);
affine_layer1(outs_max_pooling, outs_affine_layer1);
relu_affine1(outs_affine_layer1, outs_relu_affine1);
affine_layer2(outs_relu_affine1, outs_affine_layer2);
output_layer(outs_affine_layer2, output, dot2);
return(0);
}
// all_layers_soft.cpp
// 2018/03/14 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "all_layers.h"
int conv_layer_soft(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs);
int relu_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs);
int max_pooling_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float2_axis<1,1,1> >& outs);
int affine_layer1_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs);
int relu_affine1_soft(hls::stream<float1_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs);
int affine_layer2_soft(hls::stream<float1_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs);
int output_layer_soft(hls::stream<float1_axis<1,1,1> >& ins, output_type& output,
float dot2[NUMBER_OF_OUTPUT_LAYER]);
int all_layers_soft(hls::stream<ap_axiu<32,1,1,1> >& ins, output_type& output,
float dot2[NUMBER_OF_OUTPUT_LAYER]){
hls::stream<float2_axis<1,1,1> > outs_conv_layer_soft;
hls::stream<float2_axis<1,1,1> > outs_relu_soft;
hls::stream<float2_axis<1,1,1> > outs_max_pooling_soft;
hls::stream<float1_axis<1,1,1> > outs_affine_layer1_soft;
hls::stream<float1_axis<1,1,1> > outs_relu_affine1_soft;
hls::stream<float1_axis<1,1,1> > outs_affine_layer2_soft;
conv_layer_soft(ins, outs_conv_layer_soft);
relu_soft(outs_conv_layer_soft, outs_relu_soft);
max_pooling_soft(outs_relu_soft, outs_max_pooling_soft);
affine_layer1_soft(outs_max_pooling_soft, outs_affine_layer1_soft);
relu_affine1_soft(outs_affine_layer1_soft, outs_relu_affine1_soft);
affine_layer2_soft(outs_relu_affine1_soft, outs_affine_layer2_soft);
output_layer_soft(outs_affine_layer2_soft, output, dot2);
return(0);
}
// all_layers_template_tb.cpp
// 2018/05/12 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "all_layers_template.h"
#include "curve_data_0_100.h"
//#include "curve_data_2500_2600.h"
//#include "curve_data_5000_5100.h"
#define ALL_DATA_NUM 300
#define NUM_OF_KERNELS 2
#define COULMN_PIXELS 56
#define ROW_PIXELS 10
#define ALL_PIXELS 560
#define NUM_OF_OUTPUT 3
#define NUM_ITERATIONS 300 // C Simulation
//#define NUM_ITERATIONS 2 // C/RTL CoSimulation
int all_layers(hls::stream<ap_axiu<32,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]);
int all_layers_soft(hls::stream<ap_axiu<32,1,1,1> >& ins, output_type& output,
float dot2[NUMBER_OF_OUTPUT_LAYER]);
int main(){
using namespace std;
hls::stream<ap_axiu<32,1,1,1> > ins;
hls::stream<ap_axiu<32,1,1,1> > ins_soft;
output_type output, output_soft;
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER];
float dot2_soft[NUMBER_OF_OUTPUT_LAYER];
ap_axiu<32,1,1,1> pix;
int hw_err_cnt = 0;
int sw_err_cnt = 0;
for(int i=0; i<NUM_ITERATIONS; i++){
// ins に入力データを用意する
for(int m=0; m<5; m++){ // dummy data
pix.user = 0;
pix.data = ap_uint<32>(m);
ins << pix;
}
for(int y=0; y<ROW_PIXELS; y++){
for(int x=0; x<COULMN_PIXELS; x++){
// 1 画面分のデータを ins、ins_soft に入力する
pix.data = ap_uint<32>(t_train_256[i][y*COULMN_PIXELS+x]);
if (x==0 && y==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (x == COULMN_PIXELS-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
ins_soft << pix;
}
}
all_layers(ins, output, dot2);
all_layers_soft(ins_soft, output_soft, dot2_soft);
int t_test_num = 0;
for(int m=0; m<NUMBER_OF_OUTPUT_LAYER; m++){
if(t_test[i][m] == 1.0f){
t_test_num = m;
break;
}
}
// out と out_soft を比較する
/* cout << "output" << " = " << int(output) << " output_soft = " << int(output_soft) << endl; for(int j=0; j<NUMBER_OF_OUTPUT_LAYER; j++){ cout << "dot2[" << j << "] = " << float(dot2[j]) << " dot2_soft[" << j << "] = " << dot2_soft[j] << endl; } */
if(int(output) != t_test_num){
cout << "hw_error: i = " << i << " output = " << int(output) << " t_test_num = " << t_test_num << endl;
hw_err_cnt++;
//return(1);
}
if(int(output_soft) != t_test_num){
cout << "sw_error: i = "<< i << " output_soft = " << int(output_soft) << " t_test_num" " = " << t_test_num << endl;
sw_err_cnt++;
//return(1);
}
if(int(output) != t_test_num || int(output_soft) != t_test_num){
for(int j=0; j<NUMBER_OF_OUTPUT_LAYER; j++){
cout << "dot2[" << j << "] = " << fixed << setprecision(8) << float(dot2[j]) << " dot2_soft[" << j << "] = " << dot2_soft[j] << endl;
}
cout << endl;
}
}
cout << "hw_err_cnt = " << hw_err_cnt << " sw_err_cnt = " << sw_err_cnt << endl;
return(0);
}
// max_pooling_template.h
// 2018/05/10 by marsee
// テンプレートを使用して汎用化した max_pooling
//
#ifndef __MAX_POOLING_TEMPLATE__
#define __MAX_POOLING_TEMPLATE__
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include <ap_fixed.h>
#include "layer_general.h"
#define TO_LITERAL(x) #x
#define PRAGMA_HLS(tok) _Pragma(TO_LITERAL(HLS tok)) // @hiyuhさんから
template<
const size_t W,
const size_t I,
const size_t NUMBER_OF_KERNEL,
const size_t ARRAY_SIZE, // ARRAY_SIZE x ARRAY_SIZE の領域からプーリングする
const size_t X_STRIDE,
const size_t Y_STRIDE,
const size_t VERTICAL_HIGHT_IN,
const size_t HORIZONTAL_WIDTH_IN
>int max_pooling_template(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs){
typedef ap_fixed<W, I, AP_TRN, AP_WRAP> conv_type;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> mp_out;
conv_type line_buf[NUMBER_OF_KERNEL][ARRAY_SIZE-1][HORIZONTAL_WIDTH_IN];
#pragma HLS ARRAY_PARTITION variable=line_buf block factor=2 dim=1
#pragma HLS ARRAY_PARTITION variable=line_buf block factor=1 dim=2
conv_type pix_mat[NUMBER_OF_KERNEL][ARRAY_SIZE][ARRAY_SIZE];
#pragma HLS array_partition variable=pix_mat complete
conv_type val[NUMBER_OF_KERNEL], conv_data;
Loop1: do {
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop2: for (int y=0; y<VERTICAL_HIGHT_IN; y++){
Loop3: for (int x=0; x<HORIZONTAL_WIDTH_IN; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
Loop4: for (int n=0; n<NUMBER_OF_KERNEL; n++){
#pragma HLS UNROLL
conv_data = pix.data[n];
// 2次元配列のデータを左シフト
Loop5 : for (int k=0; k<ARRAY_SIZE; k++){
#pragma HLS UNROLL
Loop6 : for (int m=0; m<ARRAY_SIZE-1; m++){
pix_mat[n][k][m] = pix_mat[n][k][m+1];
}
}
Loop7: for (int i=0; i<ARRAY_SIZE-1; i++){ // 以前の行のデータを line_buf から入力
pix_mat[n][i][ARRAY_SIZE-1] = line_buf[n][i][x];
}
pix_mat[n][ARRAY_SIZE-1][ARRAY_SIZE-1] = conv_data; // pix_mat の最後に新しいデータを入力
Loop8: for (int i=0; i<ARRAY_SIZE-2; i++){ // 行の入れ替え
line_buf[n][i][x] = line_buf[n][i+1][x];
}
line_buf[n][ARRAY_SIZE-2][x] = conv_data;
// max pooling の検索
Loop9 : for (int k=0; k<ARRAY_SIZE; k++){
#pragma HLS UNROLL
Loop10 : for (int m=0; m<ARRAY_SIZE; m++){
if (k==0 && m==0){
val[n] = pix_mat[n][k][m];
} else if (val[n] < pix_mat[n][k][m]){
val[n] = pix_mat[n][k][m];
}
}
}
mp_out.data[n] = val[n];
if (x==X_STRIDE-1 && y==Y_STRIDE-1){ // 最初のデータでは、TUSERをアサートする
mp_out.user = 1;
} else {
mp_out.user = 0;
}
if (x == HORIZONTAL_WIDTH_IN-1){ // 行の最後で TLAST をアサートする
mp_out.last = 1;
} else {
mp_out.last = 0;
}
}
if (x%X_STRIDE==X_STRIDE-1 && y%Y_STRIDE==Y_STRIDE-1){ // ストライド
outs << mp_out;
}
}
}
return(0);
}
#endif
// max_pooling.cpp
// 2018/05/10 by marsee
//
#include "max_pooling_template.h"
int max_pooling(hls::stream<ap_fixed_axis<16,6,2,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1> >& outs){
#pragma HLS DATA_PACK variable=outs
#pragma HLS DATA_PACK variable=ins
return(max_pooling_template<16,6,2,2,2,2,6,52>(ins, outs));
}
// max_pooling.h
// 2018/04/19 by marsee
//
#ifndef __MAX_POOLING_H__
#define __MAX_POOLING_H__
#include <ap_fixed.h>
static const size_t H_PIXEL_WIDTH_IN = 52;
static const size_t V_PIXEL_WIDTH_IN = 6;
static const size_t H_PIXEL_WIDTH_OUT = 26;
static const size_t V_PIXEL_WIDTH_OUT = 3;
static const size_t NUMBER_OF_KERNEL = 2;
static const size_t ARRAY_SIZE = 2;
static const size_t W = 16;
static const size_t I = 6;
static const size_t X_STRIDE = 2;
static const size_t Y_STRIDE = 2;
typedef ap_fixed<W, I, AP_TRN, AP_WRAP> conv_type;
#endif
// max_pooling_tb.cpp
// 2018/04/19 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "max_pooling.h"
#include "relu_output.h"
int max_pooling(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs);
int max_pooling2(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs);
int max_pooling_soft(hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > ins;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > ins2;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > ins_soft;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs2;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > outs_soft;
float mp_fout[H_PIXEL_WIDTH_OUT*V_PIXEL_WIDTH_OUT][NUMBER_OF_KERNEL];
conv_type mp_out[H_PIXEL_WIDTH_OUT*V_PIXEL_WIDTH_OUT][NUMBER_OF_KERNEL];
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix2;
float_axis<NUMBER_OF_KERNEL,1> fpix;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
pix.data[k] = (conv_type)i;
}
ins << pix;
ins2 << pix;
fpix.user = 0;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
fpix.data[k] = (float)i;
}
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
ofstream OHX("relu_output_X0.csv");
ofstream OHF("relu_output_F0.csv");
for(int j=0; j < V_PIXEL_WIDTH_IN; j++){
for(int i=0; i < H_PIXEL_WIDTH_IN; i++){
for(int k=0; k<NUMBER_OF_KERNEL; k++){
pix.data[k] = relu_out[j*H_PIXEL_WIDTH_IN+i][k];
fpix.data[k] = relu_fout[j*H_PIXEL_WIDTH_IN+i][k];
}
OHX << pix.data[0];
if(i != H_PIXEL_WIDTH_IN-1)
OHX << ",";
else
OHX << endl;
OHF << fpix.data[0];
if(i != H_PIXEL_WIDTH_IN-1)
OHF << ",";
else
OHF << endl;
if (j==0 && i==0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == H_PIXEL_WIDTH_IN-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins2 << pix;
ins_soft << fpix;
}
}
max_pooling(ins, outs);
max_pooling_soft(ins_soft, outs_soft);
max_pooling2(ins2, outs2);
// outs, outs_soft を mp_out[][], relu_fout[][] に出力する
int errcnt = 0;
for(int j=0; j < V_PIXEL_WIDTH_OUT; j++){
for(int i=0; i < H_PIXEL_WIDTH_OUT; i++){
outs >> pix;
outs2 >> pix2;
outs_soft >> fpix;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
mp_out[j*H_PIXEL_WIDTH_OUT+i][k] = pix.data[k];
mp_fout[j*H_PIXEL_WIDTH_OUT+i][k] = fpix.data[k];
printf("%d, %d, data[%d] = %f, data2[%d] = %f, fdata[%d] = %f\n", j, i, k, (float)pix.data[k], k, (float)pix2.data[k], k, fpix.data[k]);
if (pix.data[k] != pix2.data[k]){
printf("ERROR HW and SW results mismatch i = %ld, j = %ld, HW[%d] = %f, HW2[%d] = %f, SW[%d] = %f\n", i, j, k, (float)pix.data[k], k, (float)pix2.data[k], k,fpix.data[k]);
errcnt++;
//return(1);
}
}
}
}
cout << "Error Count = " << errcnt << endl;
cout << "Success HW and SW results match" << endl;
cout << endl;
// max_pooling の結果をヘッダファイルに出力
ofstream OH("max_pooling_output.h");
OH << "// max_pooling_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __MAX_POOLING_OUTPUT_H__" << endl;
OH << "#define __MAX_POOLING_OUTPUT_H__" << endl;
OH << endl;
OH << "const float mp_fout[" << V_PIXEL_WIDTH_OUT*H_PIXEL_WIDTH_OUT << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<V_PIXEL_WIDTH_OUT ; y++){
for (int x=0; x<H_PIXEL_WIDTH_OUT ; x++){
OH << " {" << fixed << setprecision(12) << mp_fout[H_PIXEL_WIDTH_OUT*y+x][0];
for (int i=1; i<NUMBER_OF_KERNEL; ++i)
{
OH << ", " << mp_fout[H_PIXEL_WIDTH_OUT*y+x][i];
}
OH << "}";
if (y==V_PIXEL_WIDTH_OUT-1 && x==H_PIXEL_WIDTH_OUT-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> mp_out[" << V_PIXEL_WIDTH_OUT*H_PIXEL_WIDTH_OUT << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<V_PIXEL_WIDTH_OUT ; y++){
for (int x=0; x<H_PIXEL_WIDTH_OUT ; x++){
OH << " {" << fixed << setprecision(12) << (float)mp_out[H_PIXEL_WIDTH_OUT*y+x][0];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << (float)mp_out[H_PIXEL_WIDTH_OUT*y+x][i];
}
OH << "}";
if (y==V_PIXEL_WIDTH_OUT -1 && x==H_PIXEL_WIDTH_OUT -1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int max_pooling_soft(hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs){
float_axis<NUMBER_OF_KERNEL,1> fpix;
float fpixd_ary[NUMBER_OF_KERNEL][V_PIXEL_WIDTH_IN][H_PIXEL_WIDTH_IN];
float fval[NUMBER_OF_KERNEL];
do {
// user が 1になった時にフレームがスタートする
ins >> fpix;
} while(fpix.user == 0);
for (int y=0; y<V_PIXEL_WIDTH_IN; y++){
for (int x=0; x<H_PIXEL_WIDTH_IN; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> fpix;
for(int i=0; i<NUMBER_OF_KERNEL; i++){
fpixd_ary[i][y][x] = fpix.data[i];
}
}
}
for (int y=0; y<V_PIXEL_WIDTH_IN-1; y+=Y_STRIDE){
for (int x=0; x<H_PIXEL_WIDTH_IN-1; x+=X_STRIDE){
for(int p=0; p<NUMBER_OF_KERNEL; p++){
for(int m=0; m<Y_STRIDE; m++){
for(int n=0; n<X_STRIDE; n++){
if(m==0 && n==0){
fval[p] = fpixd_ary[p][y][x];
} else if(fval[p] < fpixd_ary[p][y+m][x+n]){
fval[p] = fpixd_ary[p][y+m][x+n];
}
}
}
}
for(int i=0; i<NUMBER_OF_KERNEL; i++){
fpix.data[i] = fval[i];
}
if(x==0 && y==0)
fpix.user = 1;
else
fpix.user = 0;
if(x==V_PIXEL_WIDTH_OUT - X_STRIDE)
fpix.last = 1;
else
fpix.last = 0;
outs << fpix;
}
}
return(0);
}
int max_pooling2(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs){
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
conv_type maxp_val[NUMBER_OF_KERNEL][V_PIXEL_WIDTH_IN][H_PIXEL_WIDTH_IN];
conv_type pool_out[NUMBER_OF_KERNEL][V_PIXEL_WIDTH_OUT][H_PIXEL_WIDTH_OUT];
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> maxp_out;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
for (int y=0; y<V_PIXEL_WIDTH_IN; y++){
for (int x=0; x<H_PIXEL_WIDTH_IN; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
for (int i=0; i<NUMBER_OF_KERNEL; i++){
maxp_val[i][y][x] = pix.data[i];
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUMBER_OF_KERNEL; i++){
POOL2: for(int j=0; j<V_PIXEL_WIDTH_IN; j += Y_STRIDE){
POOL3: for(int k=0; k<H_PIXEL_WIDTH_IN; k += X_STRIDE){
POOL4: for(int m=0; m<Y_STRIDE; m++){
POOL5: for(int n=0; n<X_STRIDE; n++){
if(m==0 && n==0){
pool_out[i][j/Y_STRIDE][k/X_STRIDE] = maxp_val[i][j][k];
} else if(pool_out[i][j/Y_STRIDE][k/X_STRIDE] < maxp_val[i][j+m][k+n]){
pool_out[i][j/Y_STRIDE][k/X_STRIDE] = maxp_val[i][j+m][k+n];
}
}
}
}
}
}
for(int y=0; y<V_PIXEL_WIDTH_OUT; y++){
for(int x=0; x<H_PIXEL_WIDTH_OUT; x++){
for(int i=0; i<NUMBER_OF_KERNEL; i++){
maxp_out.data[i] = pool_out[i][y][x];
}
if (x==0 && y==0){ // 最初のデータでは、TUSERをアサートする
maxp_out.user = 1;
} else {
maxp_out.user = 0;
}
if (x == (H_PIXEL_WIDTH_OUT-1)){ // 行の最後で TLAST をアサートする
maxp_out.last = 1;
} else {
maxp_out.last = 0;
}
outs << maxp_out;
}
}
return(0);
}
// input_layer.h
// 2018/05/08 by marsee
//
#ifndef __INPUT_LAYER_H__
#define __INPUT_LAYER_H__
static const size_t IN_W = 32;
static const size_t OUT_W = 9;
static const size_t OUT_I = 1;
static const size_t VERTICAL_PIXEL_HIGHT = 10;
static const size_t HORIZONTAL_PIXEL_WIDTH = 56;
#endif
// input_layer.cpp
// 2018/05/08 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include "layer_general.h"
#include <ap_axi_sdata.h>
#include "input_layer.h"
int input_layer(hls::stream<ap_axiu<IN_W,1,1,1> >&ins,
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> >&outs){
#pragma HLS DATA_PACK variable=outs
#pragma HLS INTERFACE axis register both port=ins
ap_axiu<IN_W,1,1,1> pix;
ap_fixed_axis<OUT_W,OUT_I,1,1> out;
Loop1: do {
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop2: for (int y=0; y<VERTICAL_PIXEL_HIGHT; y++){
Loop3: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
out.data[0] = (ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP>)((ap_ufixed<16, 8, AP_TRN, AP_WRAP>)(pix.data & 0xff) / 256);
out.user = pix.user;
out.last = pix.last;
outs << out;
}
}
return(0);
}
// input_layer_tb.cpp
// 2018/05/09 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "input_layer.h"
#include "bmp_header.h"
int input_layer(hls::stream<ap_axiu<IN_W,1,1,1> >&ins,
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> >&outs);
#define BMP_FILE_NAME "straight_RED_rect0_00_rgb.bmp"
int main(){
using namespace std;
hls::stream<ap_axiu<IN_W,1,1,1> > ins;
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> > outs;
ap_axiu<IN_W,1,1,1> pix;
ap_fixed_axis<9,1,1,1> pixf;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr;
int *rd_bmp;
ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP> *out_vals;
int blue, green, red;
ap_fixed_axis<OUT_W,OUT_I,1,1> val;
if ((fbmpr = fopen(BMP_FILE_NAME, "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open straight_RED_rect0_00.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
if ((out_vals =(ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP> *)malloc(sizeof(ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP>) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate hw_conv memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data = i;
ins << pix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
pix.data = (ap_uint<32>)rd_bmp[(j*bmpihr.biWidth)+i];
if (j==0 && i==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (i == bmpihr.biWidth-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
}
}
input_layer(ins, outs);
// 出力の outs を out_vals[] に入れる
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
outs >> val;
out_vals[y*bmpihr.biWidth+x] = val.data[0];
}
}
// ヘッダ出力
ofstream OH("input_layer_output.h");
OH << "// input_layer_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __INPUT_LAYER_OUTPUT_H__" << endl;
OH << "#define __INPUT_LAYER_OUTPUT_H__" << endl;
OH << endl;
OH << "const ap_fixed<9, 1, AP_TRN, AP_WRAP> conv_layer_out[" << bmpihr.biHeight*bmpihr.biWidth << "] = {" << endl;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
OH << " " << fixed << setprecision(12) << out_vals[bmpihr.biWidth*y+x];
if (y==bmpihr.biHeight-1 && x==bmpihr.biWidth-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
free(rd_bmp);
free(out_vals);
return(0);
}
The Nexys 4 board contains two external memories: a 128Mbit Cellular RAM (pseudo-static DRAM) and a 128Mbit non-volatile serial Flash device. The Cellular RAM has an SRAM interface, and the serial Flash is on a dedicated quad-mode (x4) SPI bus. The connections and pin assignments between the FPGA and external memories are shown in Fig 4 and Table 3.
// relu_affine1.h
// 2018/05/07 by marsee (HLS stream)
// for relu after affine1
//
#ifndef __RELU_H__
#define __RELU_H__
static const size_t HORIZONTAL_PIXEL_WIDTH = 100;
static const size_t VERTICAL_PIXEL_WIDTH = 1;
static const size_t ALL_PIXELS = HORIZONTAL_PIXEL_WIDTH * VERTICAL_PIXEL_WIDTH;
static const size_t NUMBER_OF_KERNEL = 1;
static const size_t W = 19;
static const size_t I = 7;
typedef ap_fixed<W, I, AP_TRN, AP_WRAP> conv_type;
#endif
// relu_affine1_tb.cpp
// 2018/02/20 by marsee (HLS stream)
// 2018/05/06 : relu_template.h に対応
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "relu_affine1.h"
#include "affine_layer1_output.h"
int relu_affine1(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs);
int relu_soft( hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > ins;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > ins_soft;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > outs_soft;
float relu_fout[ALL_PIXELS][NUMBER_OF_KERNEL];
conv_type relu_out[ALL_PIXELS][NUMBER_OF_KERNEL];
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
float_axis<NUMBER_OF_KERNEL,1> fpix;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
for(int j=0; j<NUMBER_OF_KERNEL; j++){
pix.data[j] = (conv_type)i;
}
ins << pix;
fpix.user = 0;
for(int j=0; j<NUMBER_OF_KERNEL; j++){
fpix.data[j] = (float)i;
}
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < VERTICAL_PIXEL_WIDTH; j++){
for(int i=0; i < HORIZONTAL_PIXEL_WIDTH; i++){
for(int k=0; k<NUMBER_OF_KERNEL; k++){
pix.data[k] = affine1_out[j*HORIZONTAL_PIXEL_WIDTH+i];
fpix.data[k] = affine1_fout[j*HORIZONTAL_PIXEL_WIDTH+i];
}
if (j==0 && i==0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == HORIZONTAL_PIXEL_WIDTH-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
}
relu_affine1(ins, outs);
relu_soft(ins_soft, outs_soft);
// outs, outs_soft を relu_out[][], relu_fout[][] に出力する
int errcnt=0;
for(int j=0; j < VERTICAL_PIXEL_WIDTH; j++){
for(int i=0; i < HORIZONTAL_PIXEL_WIDTH; i++){
outs >> pix;
outs_soft >> fpix;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
relu_out[j*HORIZONTAL_PIXEL_WIDTH+i][k] = pix.data[k];
relu_fout[j*HORIZONTAL_PIXEL_WIDTH+i][k] = fpix.data[k];
if ((double)pow((double)pix.data[k]-(double)fpix.data[k],(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %d, j = %d, HW[%d] = %f, SW[%d] = %f\n", i, j, k, (float)pix.data[k], k, fpix.data[k]);
errcnt++;
//return(1);
}
printf("HW and SW results i = %d, j = %d, HW[%d] = %f, SW[%d] = %f\n", i, j, k, (float)pix.data[k], k, fpix.data[k]);
}
}
}
cout << "Error Count = " << errcnt << endl;
cout << "Success HW and SW results match" << endl;
cout << endl;
// ReLU の結果をヘッダファイルに出力
ofstream OH("relu_output.h");
OH << "// relu_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __RELU_OUTPUT_H__" << endl;
OH << "#define __RELU_OUTPUT_H__" << endl;
OH << endl;
OH << "const float relu_fout[" << VERTICAL_PIXEL_WIDTH*HORIZONTAL_PIXEL_WIDTH << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<VERTICAL_PIXEL_WIDTH ; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH ; x++){
OH << " {" << fixed << setprecision(12) << relu_fout[HORIZONTAL_PIXEL_WIDTH*y+x][0];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << relu_fout[HORIZONTAL_PIXEL_WIDTH*y+x][i];
}
OH << "}";
if (y==VERTICAL_PIXEL_WIDTH-1 && x==HORIZONTAL_PIXEL_WIDTH-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> relu_out[" << VERTICAL_PIXEL_WIDTH*HORIZONTAL_PIXEL_WIDTH << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<VERTICAL_PIXEL_WIDTH ; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH ; x++){
OH << " {" << (float)relu_out[HORIZONTAL_PIXEL_WIDTH*y+x][0];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << (float)relu_out[HORIZONTAL_PIXEL_WIDTH*y+x][1];
}
OH << "}";
if (y==VERTICAL_PIXEL_WIDTH -1 && x==HORIZONTAL_PIXEL_WIDTH -1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int relu_soft(hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs){
float_axis<NUMBER_OF_KERNEL,1> fpix;
do {
// user が 1になった時にフレームがスタートする
ins >> fpix;
} while(fpix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> fpix; // AXI4-Stream からの入力
for(int i=0; i<NUMBER_OF_KERNEL; i++){
if (fpix.data[i] < 0.0) // データが 0 以下だったら 0 にする
fpix.data[i] = 0.0;
}
outs << fpix;
}
}
return(0);
}
// relu_affine1.cpp
// 2018/05/06 by marsee
// relu after affine1
//
#include "relu_template.h"
int relu_affine1(hls::stream<ap_fixed_axis<19,7,1,1> >& ins,
hls::stream<ap_fixed_axis<19,7,1,1> >& outs){
#pragma HLS DATA_PACK variable=outs
#pragma HLS DATA_PACK variable=ins
return(relu_template<19,7,1,1,100>(ins, outs));
}
// relu_template.h
// 2018/05/06 by marsee
// テンプレートを使用して汎用化した relu
//
#ifndef __RELU_TEMPLATE_H__
#define __RELU_TEMPLATE_H__
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include <ap_fixed.h>
#include "layer_general.h"
template<
const size_t W,
const size_t I,
const size_t NUMBER_OF_KERNEL,
const size_t VERTICAL_HIGHT,
const size_t HORIZONTAL_WIDTH
>int relu_template(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs){
typedef ap_fixed<W,I,AP_TRN,AP_WRAP> conv_type;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
do {
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop1: for (int y=0; y<VERTICAL_HIGHT; y++){
Loop2: for (int x=0; x<HORIZONTAL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
for(int i=0; i<NUMBER_OF_KERNEL; i++){
if (pix.data[i] < conv_type(0.0)) // データが 0 以下だったら 0 にする
pix.data[i] = conv_type(0.0);
}
outs << pix;
}
}
return(0);
}
#endif
// relu_conv1.cpp
// 2018/05/06 by marsee
// relu after conv1
//
#include "relu_template.h"
int relu_conv1(hls::stream<ap_fixed_axis<16,6,2,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1> >& outs){
#pragma HLS DATA_PACK variable=outs
#pragma HLS DATA_PACK variable=ins
return(relu_template<16,6,2,6,52>(ins, outs));
}
// relu_conv1.h
// 2018/02/20 by marsee (HLS stream)
//
#ifndef __RELU_H__
#define __RELU_H__
static const size_t HORIZONTAL_PIXEL_WIDTH = 52;
static const size_t VERTICAL_PIXEL_WIDTH = 6;
static const size_t ALL_PIXELS = HORIZONTAL_PIXEL_WIDTH * VERTICAL_PIXEL_WIDTH;
static const size_t NUMBER_OF_KERNEL = 2;
static const size_t ARRAY_SIZE = 2;
static const size_t W = 16;
static const size_t I = 6;
typedef ap_fixed<W, I, AP_TRN, AP_WRAP> conv_type;
#endif
// relu_conv1_tb.cpp
// 2018/02/20 by marsee (HLS stream)
// 2018/05/06 : relu_template.h に対応
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "relu_conv1.h"
#include "conv_layer_output.h"
int relu_conv1(hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs);
int relu_soft( hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > ins;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > ins_soft;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > outs_soft;
float relu_fout[ALL_PIXELS][NUMBER_OF_KERNEL];
conv_type relu_out[ALL_PIXELS][NUMBER_OF_KERNEL];
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> pix;
float_axis<NUMBER_OF_KERNEL,1> fpix;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
for(int j=0; j<NUMBER_OF_KERNEL; j++){
pix.data[j] = (conv_type)i;
}
ins << pix;
fpix.user = 0;
for(int j=0; j<NUMBER_OF_KERNEL; j++){
fpix.data[j] = (float)i;
}
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < VERTICAL_PIXEL_WIDTH; j++){
for(int i=0; i < HORIZONTAL_PIXEL_WIDTH; i++){
for(int k=0; k<NUMBER_OF_KERNEL; k++){
pix.data[k] = conv_layer_out[j*HORIZONTAL_PIXEL_WIDTH+i][k];
fpix.data[k] = conv_layer_fout[j*HORIZONTAL_PIXEL_WIDTH+i][k];
}
if (j==0 && i==0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == HORIZONTAL_PIXEL_WIDTH-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
}
relu_conv1(ins, outs);
relu_soft(ins_soft, outs_soft);
// outs, outs_soft を relu_out[][], relu_fout[][] に出力する
int errcnt=0;
for(int j=0; j < VERTICAL_PIXEL_WIDTH; j++){
for(int i=0; i < HORIZONTAL_PIXEL_WIDTH; i++){
outs >> pix;
outs_soft >> fpix;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
relu_out[j*HORIZONTAL_PIXEL_WIDTH+i][k] = pix.data[k];
relu_fout[j*HORIZONTAL_PIXEL_WIDTH+i][k] = fpix.data[k];
if ((double)pow((double)pix.data[k]-(double)fpix.data[k],(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %d, j = %d, HW[%d] = %f, SW[%d] = %f\n", i, j, k, (float)pix.data[k], k, fpix.data[k]);
errcnt++;
return(1);
}
printf("HW and SW results i = %d, j = %d, HW[%d] = %f, SW[%d] = %f\n", i, j, k, (float)pix.data[k], k, fpix.data[k]);
}
}
}
cout << "Error Count = " << errcnt << endl;
cout << "Success HW and SW results match" << endl;
cout << endl;
// ReLU の結果をヘッダファイルに出力
ofstream OH("relu_output.h");
OH << "// relu_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __RELU_OUTPUT_H__" << endl;
OH << "#define __RELU_OUTPUT_H__" << endl;
OH << endl;
OH << "const float relu_fout[" << VERTICAL_PIXEL_WIDTH*HORIZONTAL_PIXEL_WIDTH << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<VERTICAL_PIXEL_WIDTH ; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH ; x++){
OH << " {" << fixed << setprecision(12) << relu_fout[HORIZONTAL_PIXEL_WIDTH*y+x][0];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << relu_fout[HORIZONTAL_PIXEL_WIDTH*y+x][i];
}
OH << "}";
if (y==VERTICAL_PIXEL_WIDTH-1 && x==HORIZONTAL_PIXEL_WIDTH-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> relu_out[" << VERTICAL_PIXEL_WIDTH*HORIZONTAL_PIXEL_WIDTH << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<VERTICAL_PIXEL_WIDTH ; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH ; x++){
OH << " {" << (float)relu_out[HORIZONTAL_PIXEL_WIDTH*y+x][0];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << (float)relu_out[HORIZONTAL_PIXEL_WIDTH*y+x][1];
}
OH << "}";
if (y==VERTICAL_PIXEL_WIDTH -1 && x==HORIZONTAL_PIXEL_WIDTH -1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int relu_soft(hls::stream<float_axis<2,1> >& ins,
hls::stream<float_axis<2,1> >& outs){
float_axis<2,1> fpix;
do {
// user が 1になった時にフレームがスタートする
ins >> fpix;
} while(fpix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> fpix; // AXI4-Stream からの入力
for(int i=0; i<NUMBER_OF_KERNEL; i++){
if (fpix.data[i] < 0.0) // データが 0 以下だったら 0 にする
fpix.data[i] = 0.0;
}
outs << fpix;
}
}
return(0);
}
// conv_layer_template.h
// 2018/05/01 : by marsee
// テンプレートを使用して汎用化した conv_layer
//
// 2018/08/27 : conv_bias のバグフィックス
//
#ifndef __CONV_LAYER_TEMPLATE_H___
#define __CONV_LAYER_TEMPLATE_H___
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include <ap_fixed.h>
#include "layer_general.h"
#define TO_LITERAL(x) #x
#define PRAGMA_HLS(tok) _Pragma(TO_LITERAL(HLS tok)) // @hiyuhさんから
template<
const size_t IN_W, // 入力のビット幅
const size_t IN_I, // 入力の小数点位置
const size_t MID_W, // 中間の演算ビット長
const size_t MID_I, // 中間の値の小数点位置
const size_t OUT_W, // 出力のビット長
const size_t OUT_I, // 出力の小数点位置
const size_t WB_W, // 重みとバイアスのビット長
const size_t WB_I, // 重みとバイアスの小数点位置
const size_t NUMBER_OF_IN_CHANNELS,
const size_t NUMBER_OF_OUT_CHANNELS,
const size_t H_KERNEL_SIZE,
const size_t V_KERNEL_SIZE,
const size_t PADDING,
const size_t PADDING_DATA,
const size_t STRIDE,
const size_t HORIZONTAL_PIXEL_WIDTH,
const size_t VERTICAL_PIXEL_WIDTH
>int conv_layer_template(hls::stream<ap_fixed_axis<IN_W,IN_I,NUMBER_OF_IN_CHANNELS,1> >&ins,
hls::stream<ap_fixed_axis<OUT_W,OUT_I,NUMBER_OF_OUT_CHANNELS,1> >&outs,
const ap_fixed<WB_W,WB_I,AP_TRN,AP_WRAP> conv_weight[NUMBER_OF_OUT_CHANNELS][NUMBER_OF_IN_CHANNELS][V_KERNEL_SIZE][H_KERNEL_SIZE],
const ap_fixed<WB_W,WB_I,AP_TRN,AP_WRAP> conv_bias[NUMBER_OF_OUT_CHANNELS]
){
typedef ap_fixed<IN_W,IN_I,AP_TRN,AP_WRAP> in_type;
typedef ap_fixed<MID_W,MID_I,AP_TRN,AP_WRAP> val_type;
ap_fixed_axis<IN_W,IN_I,NUMBER_OF_IN_CHANNELS,1> pix;
ap_fixed_axis<OUT_W,OUT_I,NUMBER_OF_OUT_CHANNELS,1> conv_out;
in_type line_buf[V_KERNEL_SIZE-1][HORIZONTAL_PIXEL_WIDTH+2*PADDING][NUMBER_OF_IN_CHANNELS];
const size_t AP_FACTOR = V_KERNEL_SIZE-1;
PRAGMA_HLS(array_partition variable=line_buf block factor=AP_FACTOR dim=1)
#pragma HLS resource variable=line_buf core=RAM_2P
in_type pix_mat[V_KERNEL_SIZE][H_KERNEL_SIZE][NUMBER_OF_IN_CHANNELS];
#pragma HLS array_partition variable=pix_mat complete
val_type val;
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop_y : for (int y=0; y<(VERTICAL_PIXEL_WIDTH+2*PADDING); y++){
Loop_x : for (int x=0; x<(HORIZONTAL_PIXEL_WIDTH+2*PADDING); x++){
#pragma HLS PIPELINE II=1
if (y<PADDING || y>=(VERTICAL_PIXEL_WIDTH+PADDING) ||
x<PADDING || x>=(HORIZONTAL_PIXEL_WIDTH+PADDING)){ // PADDING
for (int n=0; n<NUMBER_OF_OUT_CHANNELS; n++){
pix.data[n] = PADDING_DATA;
}
} else if (!(x==PADDING && y==PADDING)){ // 最初の入力はすでに入力されている
ins >> pix;
}
// 2次元配列のデータを左シフト
Loop_pix_mat0 : for (int k=0; k<V_KERNEL_SIZE; k++){
Loop_pix_mat1 : for (int m=0; m<H_KERNEL_SIZE-1; m++){
#pragma HLS UNROLL
Loop_pixmat2 : for (int n=0; n<NUMBER_OF_IN_CHANNELS; n++){
pix_mat[k][m][n] = pix_mat[k][m+1][n];
}
}
}
Loop_pix_mat_e0 : for (int i=0; i<V_KERNEL_SIZE-1; i++){ // 以前の行のデータを line_buf から入力
Loop_pix_mat_e1 : for (int n=0; n<NUMBER_OF_IN_CHANNELS; n++){
pix_mat[i][H_KERNEL_SIZE-1][n] = line_buf[i][x][n];
}
}
Loop_pix_mat_nd : for (int n=0; n<NUMBER_OF_IN_CHANNELS; n++){
pix_mat[V_KERNEL_SIZE-1][H_KERNEL_SIZE-1][n] = pix.data[n]; // pix_mat の最後に新しいデータを入力
}
Loop_lineb_exchg0 : for (int i=0; i<V_KERNEL_SIZE-2; i++){ // 行の入れ替え
Loop_lineb_exchg1 : for (int n=0; n<NUMBER_OF_IN_CHANNELS; n++){
line_buf[i][x][n] = line_buf[i+1][x][n];
}
}
Loop_lineb_exchg2 : for (int n=0; n<NUMBER_OF_IN_CHANNELS; n++){
line_buf[V_KERNEL_SIZE-2][x][n] = pix.data[n];
}
// conv_layer の演算
Conv_calc0 : for (int k=0; k<NUMBER_OF_OUT_CHANNELS; k++){
val = 0.0;
Conv_calc1 : for (int j=0; j<V_KERNEL_SIZE; j++){
Conv_calc2 : for (int i=0; i<H_KERNEL_SIZE; i++){
Conv_calc3 : for (int n=0; n<NUMBER_OF_IN_CHANNELS; n++){
val += (val_type)pix_mat[j][i][n] * (val_type)conv_weight[k][n][j][i];
}
}
}
val += (val_type)conv_bias[k];
conv_out.data[k] = val;
}
// 最初の V_KERNEL_SIZE-1行とその他の行の最初の H_KERNEL_SIZE-1 列は無効データなので出力しない
if (x<(H_KERNEL_SIZE-1) || y<(V_KERNEL_SIZE-1))
continue;
else { // 有効なデータの時
if (x==(H_KERNEL_SIZE-1) && y==(V_KERNEL_SIZE-1)){ // 最初のデータでは、TUSERをアサートする
conv_out.user = 1;
} else {
conv_out.user = 0;
}
if (x > (HORIZONTAL_PIXEL_WIDTH+2*PADDING-1-STRIDE)){ // 行の最後で TLAST をアサートする
conv_out.last = 1;
} else {
conv_out.last = 0;
}
if ((x%STRIDE==STRIDE-1) && (y%STRIDE==STRIDE-1)){
outs << conv_out;
}
}
}
}
return(0);
}
#endif
// conv_layer1.cpp
// 2018/05/05 by marsee
// conv layer1 by template
//
#include "conv_layer_template.h"
#include "conv1_weight.h"
#include "conv1_bias.h"
int conv_layer1(hls::stream<ap_fixed_axis<9,1,1,1> >& ins,
hls::stream<ap_fixed_axis<16,6,2,1> >& outs){
#pragma HLS DATA_PACK variable=outs
#pragma HLS DATA_PACK variable=ins
return(conv_layer_template<9,1,22,6,16,6,9,1,1,2,5,5,0,0,1,56,10>(ins, outs, conv1_weight, conv1_bias));
}
// conv_layer_tb.cpp
// 2018/02/13 by marsee
// 2018/04/14 : HLS ストリーム対応
// 2018/04/24 : 検証用に異なる実装のconv_layer2()と比較
// 2018/05/06 : conv_layer_template.h に対応
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "conv_layer1.h"
#include "bmp_header.h"
int conv_layer1(hls::stream<ap_fixed_axis<9,1,1,1> >& ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >& outs);
int conv_layer_soft(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs);
int conv_layer2(hls::stream<ap_axiu<32,1,1,1> >&ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >&outs);
#define BMP_FILE_NAME "straight_RED_rect0_00_rgb.bmp"
int main(){
using namespace std;
hls::stream<ap_fixed_axis<9,1,1,1> > ins;
hls::stream<ap_axiu<32,1,1,1> > ins2;
hls::stream<ap_axiu<32,1,1,1> > ins_soft;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs;
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> > outs2;
hls::stream<float_axis<NUMBER_OF_KERNEL,1> > outs_soft;
ap_axiu<32,1,1,1> pix;
ap_fixed_axis<9,1,1,1> pixf;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> vals;
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> vals2;
float_axis<NUMBER_OF_KERNEL,1> vals_soft;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpw, *fbmpwf;
int *rd_bmp;
int *hw_conv[NUMBER_OF_KERNEL];
int *sw_conv[NUMBER_OF_KERNEL];
float *hw_convf[NUMBER_OF_KERNEL];
float *sw_convf[NUMBER_OF_KERNEL];
int blue, green, red;
ap_uint<2> r_l;
char fhname[100];
char fsname[100];
if ((fbmpr = fopen(BMP_FILE_NAME, "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open straight_RED_rect0_00.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
for(int i=0; i<NUMBER_OF_KERNEL; i++){
if ((hw_conv[i] =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate hw_conv[%d] memory\n", i);
exit(1);
}
if ((sw_conv[i] =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate sw_conv[%d] memory\n", i);
exit(1);
}
}
for(int i=0; i<NUMBER_OF_KERNEL; i++){
if ((hw_convf[i] =(float *)malloc(sizeof(float) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate hw_convf[%d] memory\n", i);
exit(1);
}
if ((sw_convf[i] =(float *)malloc(sizeof(float) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate sw_convf[%d] memory\n", i);
exit(1);
}
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pixf.user = 0;
pix.data = i;
pixf.data[0] = i;
pix.last = 0;
pixf.last = 0;
ins << pixf;
ins2 << pix;
ins_soft << pix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
pix.data = (ap_uint<32>)rd_bmp[(j*bmpihr.biWidth)+i];
pixf.data[0] = (ap_fixed<9,1,AP_TRN,AP_WRAP>)((ap_ufixed<16,8,AP_TRN,AP_WRAP>)(pix.data & 0xff) / 256);
if (j==0 && i==0) { // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
pixf.user = 1;
} else {
pix.user = 0;
pixf.user = 0;
}
if (i == bmpihr.biWidth-1) { // 行の最後でTLASTをアサートする
pix.last = 1;
pixf.last = 0;
} else {
pix.last = 0;
pixf.last = 0;
}
ins << pixf;
ins2 << pix;
ins_soft << pix;
}
}
// 畳み込み演算
conv_layer1(ins, outs);
conv_layer2(ins2, outs2);
conv_layer_soft(ins_soft, outs_soft);
// 画像サイズの縮小(畳み込みをすると行、列共に -4
bmpfhr.bfSize = (HORIZONTAL_PIXEL_WIDTH-4) * (VERTICAL_PIXEL_WIDTH-4) * 3 + 54;
bmpihr.biHeight = VERTICAL_PIXEL_WIDTH - 4;
bmpihr.biWidth = HORIZONTAL_PIXEL_WIDTH - 4;
// ハードウェアとソフトウェアのラプラシアン・フィルタの値のチェック
out_type val[NUMBER_OF_KERNEL];
out_type val2[NUMBER_OF_KERNEL];
float val_soft[NUMBER_OF_KERNEL];
cout << endl;
cout << "outs" << endl;
int errcnt=0;
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
outs >> vals;
outs2 >> vals2;
outs_soft >> vals_soft;
for(int k=0; k<NUMBER_OF_KERNEL; k++){
val[k] = vals.data[k];
val2[k] = vals2.data[k];
val_soft[k] = vals_soft.data[k];
int *hw_convp = hw_conv[k];
int *sw_convp = sw_conv[k];
hw_convp[(j*bmpihr.biWidth)+i] = ((int)val[k]+32)*4; // 32を足して負の符号を排除し、整数部6ビットなので、2ビット分補正する
sw_convp[(j*bmpihr.biWidth)+i] = ((int)val_soft[k]+32)*4;
float *hw_convfp = hw_convf[k];
float *sw_convfp = sw_convf[k];
hw_convfp[(j*bmpihr.biWidth)+i] = (float)val[k];
sw_convfp[(j*bmpihr.biWidth)+i] = val_soft[k];
if (val[k] != val2[k]){
printf("ERROR val and val2 results mismatch i = %d, j = %d, val[%d] = %f, val2[%d] = %f\n", i, j, k, (float)val[k], k, (float)val2[k]);
errcnt++;
//return(1);
}
printf("HW and SW results i = %d, j = %d, HW[%d] = %f, HW2[%d] = %f, SW[%d] = %f\n", i, j, k, (float)val[k], k, (float)val2[k], k, val_soft[k]);
}
}
}
cout << "Error Count = " << errcnt << endl;
cout << "Success HW and SW results match" << endl;
cout << endl;
// ハードウェアの畳み込み演算の結果を temp_conv0.bmp, temp_conv1.bmp に出力する
for (int k=0; k<NUMBER_OF_KERNEL; k++){
if (k==0){
if ((fbmpw=fopen("temp_conv0.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv0.bmp by binary write mode\n");
exit(1);
}
} else {
if ((fbmpw=fopen("temp_conv1.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv1.bmp by binary write mode\n");
exit(1);
}
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
int *hw_convp = hw_conv[k];
blue = hw_convp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
}
// ソフトウェアの畳み込み演算の結果を temp_conv_float0.bmp, temp_conv_float1.bmp に出力する
for(int k=0; k<2; k++){
if (k == 0){
if ((fbmpwf=fopen("temp_conv_float0.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv_float0.bmp by binary write mode\n");
exit(1);
}
} else {
if ((fbmpwf=fopen("temp_conv_float1.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_conv_float1.bmp by binary write mode\n");
exit(1);
}
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpwf);
fwrite(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpwf);
fwrite(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpwf);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpwf);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
int *sw_convp = sw_conv[k];
blue = sw_convp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = blue;
red = blue;
fputc(blue, fbmpwf);
fputc(green, fbmpwf);
fputc(red, fbmpwf);
}
}
fclose(fbmpwf);
}
// ヘッダ出力
ofstream OH("conv_layer_output.h");
OH << "// conv_layer_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __CONV_LAYER_OUTPUT_H__" << endl;
OH << "#define __CONV_LAYER_OUTPUT_H__" << endl;
OH << endl;
OH << "const float conv_layer_fout[" << bmpihr.biHeight*bmpihr.biWidth << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
OH << " {" << fixed << setprecision(12) << sw_convf[0][bmpihr.biWidth*y+x];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << sw_convf[i][bmpihr.biWidth*y+x];
}
OH << "}";
if (y==bmpihr.biHeight-1 && x==bmpihr.biWidth-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "const ap_fixed<16, 6, AP_TRN, AP_WRAP> conv_layer_out[" << bmpihr.biHeight*bmpihr.biWidth << "][" << NUMBER_OF_KERNEL << "] = {" << endl;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
OH << " {" << hw_convf[0][bmpihr.biWidth*y+x];
for(int i=1; i<NUMBER_OF_KERNEL; i++){
OH << ", " << hw_convf[i][bmpihr.biWidth*y+x];
}
OH << "}";
if (y==bmpihr.biHeight-1 && x==bmpihr.biWidth-1)
OH << endl;
else
OH << "," << endl;
}
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
free(rd_bmp);
for(int k=0; k<NUMBER_OF_KERNEL; k++){
free(hw_conv[k]);
free(sw_conv[k]);
free(hw_convf[k]);
free(sw_convf[k]);
}
return(0);
}
int conv_layer_soft(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<float_axis<NUMBER_OF_KERNEL,1> >& outs){
ap_axiu<32,1,1,1> pix;
float_axis<NUMBER_OF_KERNEL,1> conv_out;
hls::LineBuffer<ARRAY_SIZE-1, HORIZONTAL_PIXEL_WIDTH, float> linebuf;
hls::Window<ARRAY_SIZE, ARRAY_SIZE, float> mbuf;
float ap_uf_pix;
float val;
do {
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop1: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop2: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
ap_uf_pix = (float)(pix.data & 0xff) / 256.0;
//printf("ap_uf_pix_soft = %f\n", ap_uf_pix);
mbuf.shift_pixels_left(); // mbuf の列を1ビット左シフト
for(int i=0; i<ARRAY_SIZE-1; i++){
mbuf.insert_pixel(linebuf.getval(i,x), i, ARRAY_SIZE-1);
}
mbuf.insert_pixel(ap_uf_pix, ARRAY_SIZE-1, ARRAY_SIZE-1);
// LineBuffer の更新
linebuf.shift_pixels_up(x);
linebuf.insert_bottom_row(ap_uf_pix, x);
// conv_layer の演算
for (int k=0; k<NUMBER_OF_KERNEL; k++){
val=0.0;
for (int j=0; j<ARRAY_SIZE; j++){
for (int i=0; i<ARRAY_SIZE; i++){
val += mbuf.getval(j,i) * conv1_fweight[k][0][j][i];
}
}
val += conv1_fbias[k];
conv_out.data[k] = val;
}
// 最初のARRAY_SIZE-1行とその他の行の最初のARRAY_SIZE-1列は無効データなので出力しない
if (x<(ARRAY_SIZE-1) || y<(ARRAY_SIZE-1))
continue;
else { // 有効なデータの時
if (x==(ARRAY_SIZE-1) && y==(ARRAY_SIZE-1)){ // 最初のデータでは、TUSERをアサートする
conv_out.user = 1;
} else {
conv_out.user = 0;
}
if (x == (HORIZONTAL_PIXEL_WIDTH-1)){ // 行の最後で TLAST をアサートする
conv_out.last = 1;
} else {
conv_out.last = 0;
}
outs << conv_out;
}
}
}
return(0);
}
// 検証用 conv_layer2()
// 検証用に conv_layer() とは異なる実装でコーディング
int conv_layer2(hls::stream<ap_axiu<32,1,1,1> >&ins,
hls::stream<ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> >&outs){
ap_axiu<32,1,1,1> pix;
val_type conv_val[NUMBER_OF_KERNEL][VERTICAL_PIXEL_WIDTH][HORIZONTAL_PIXEL_WIDTH];
in_type ap_uf_pix[VERTICAL_PIXEL_WIDTH][HORIZONTAL_PIXEL_WIDTH];
ap_fixed_axis<W,I,NUMBER_OF_KERNEL,1> conv_out;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
Loop2: for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
Loop3: for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
ap_uf_pix[y][x] = (in_type)((ap_ufixed<16, 8, AP_TRN, AP_WRAP>)(pix.data & 0xff) / 256);
}
}
for(int i=0; i<NUMBER_OF_KERNEL; i++){ // カーネルの個数
for(int j=0; j<VERTICAL_PIXEL_WIDTH-(ARRAY_SIZE-1); j++){
for(int k=0; k<HORIZONTAL_PIXEL_WIDTH-(ARRAY_SIZE-1); k++){
conv_val[i][j][k] = 0;
for(int m=0; m<ARRAY_SIZE; m++){
for(int n=0; n<ARRAY_SIZE; n++){
conv_val[i][j][k] += (val_type)ap_uf_pix[j+m][k+n] * (val_type)conv1_weight[i][0][m][n];
}
}
conv_val[i][j][k] += (val_type)conv1_bias[i];
}
}
}
for(int y=0; y<VERTICAL_PIXEL_WIDTH-(ARRAY_SIZE-1); y++){
for(int x=0; x<HORIZONTAL_PIXEL_WIDTH-(ARRAY_SIZE-1); x++){
for(int i=0; i<NUMBER_OF_KERNEL; i++){
conv_out.data[i] = conv_val[i][y][x];
}
if (x==0 && y==0){ // 最初のデータでは、TUSERをアサートする
conv_out.user = 1;
} else {
conv_out.user = 0;
}
if (x == (HORIZONTAL_PIXEL_WIDTH - ARRAY_SIZE)){ // 行の最後で TLAST をアサートする
conv_out.last = 1;
} else {
conv_out.last = 0;
}
outs << conv_out;
}
}
return(0);
}
// affine_layer2.h
// 2018/05/03 by marsee
//
#ifndef __AFFINE_LAYER2_H__
#define __AFFINE_LAYER2_H__
#include <ap_fixed.h>
static const size_t NUMBER_OF_MIDDLE_LAYER = 100;
static const size_t NUMBER_OF_OUTPUT_LAYER = 3;
static const size_t IN_W = 19;
static const size_t IN_I = 7;
static const size_t OUT_W = 12;
static const size_t OUT_I = 7;
typedef struct {
ap_fixed<12,7,AP_TRN,AP_WRAP> data [NUMBER_OF_OUTPUT_LAYER];
} mdata_type;
typedef struct {
float data [NUMBER_OF_OUTPUT_LAYER];
} fmdata_type;
typedef ap_fixed<OUT_W,OUT_I,AP_TRN,AP_WRAP> out_type;
typedef ap_fixed<IN_W,IN_I,AP_TRN,AP_WRAP> affine_type;
#endif
// affine_layer2_tb.cpp
// 2018/05/03 by marsee
// HLS Streaming
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "layer_general.h"
#include "affine_layer2.h"
#include "relu_affine1_output.h"
#include "af2_weight.h"
#include "af2_bias.h"
int affine_layer2(hls::stream<ap_fixed_axis<IN_W,IN_I,1,1> >& ins,
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> >& outs);
int affine_layer2_2(hls::stream<ap_fixed_axis<IN_W,IN_I,1,1> >& ins,
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> >& outs);
int affine_layer2_soft(hls::stream<float_axis<1,1> >& ins,
hls::stream<float_axis<1,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed_axis<IN_W,IN_I,1,1> > ins;
hls::stream<ap_fixed_axis<IN_W,IN_I,1,1> > ins2;
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> > outs;
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> > outs2;
hls::stream<float_axis<1,1> > ins_soft;
hls::stream<float_axis<1,1> > outs_soft;
mdata_type dot;
fmdata_type fdot;
ap_fixed_axis<IN_W,IN_I,1,1> pix;
float_axis<1,1> fpix;
ap_fixed_axis<OUT_W,OUT_I,1,1> pdata;
ap_fixed_axis<OUT_W,OUT_I,1,1> pdata2;
float_axis<1,1> fpdata;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data[0]= (affine_type)i;
ins << pix;
ins2 << pix;
fpix.user = 0;
fpix.data[0] = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int i=0; i < NUMBER_OF_MIDDLE_LAYER; i++){
pix.data[0] = relu_affine1_out[i];
fpix.data[0] = relu_affine1_fout[i];
if (i == 0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == NUMBER_OF_MIDDLE_LAYER-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins2 << pix;
ins_soft << fpix;
}
affine_layer2(ins, outs);
affine_layer2_2(ins2, outs2);
affine_layer2_soft(ins_soft, outs_soft);
// outs, outs_soft を dot[] と fdot[] に代入して比較する
int errcnt = 0;
for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
outs >> pdata;
outs2 >> pdata2;
outs_soft >> fpdata;
dot.data[i] = pdata.data[0];
fdot.data[i] = fpdata.data[0];
printf("i = %d, HW = %f, HW2 = %f, SW = %f\n", i, (float)dot.data[i], (float)pdata2.data[0], fdot.data[i]);
if(pdata.data[0] != pdata2.data[0]){ // 2つの実装の値が合わない
printf("ERROR HW and SW results mismatch i = %d, HW = %f, HW2 = %f, SW = %f\n", i, (float)dot.data[i], (float)pdata2.data[0], fdot.data[i]);
errcnt++;
//return(1);
}
}
cout << "Error Count = " << errcnt << endl;
// max_pooling の結果をヘッダファイルに出力
ofstream OH("affine_layer2_output.h");
OH << "// affine_layer2_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __AFFINE_LAYER2_OUTPUT_H__" << endl;
OH << "#define __AFFINE_LAYER2_OUTPUT_H__" << endl;
OH << endl;
OH << "const float affine2_fout[" << NUMBER_OF_OUTPUT_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_OUTPUT_LAYER ; i++){
OH << " " << fixed << setprecision(14) << fdot.data[i];
if (i == NUMBER_OF_OUTPUT_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "const ap_fixed<12,7,AP_TRN,AP_WRAP> affine2_out[" << NUMBER_OF_OUTPUT_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_OUTPUT_LAYER ; i++){
OH << " " << fixed << setprecision(14) << (float)dot.data[i];
if (i == NUMBER_OF_OUTPUT_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int affine_layer2_soft(hls::stream<float_axis<1,1> >& ins,
hls::stream<float_axis<1,1> >& outs){
float_axis<1,1> stdata;
float dot[NUMBER_OF_OUTPUT_LAYER];
float_axis<1,1> outd;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for (int i=0; i<NUMBER_OF_MIDDLE_LAYER; i++){
if (i != 0) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
Loop4: for (int col=0; col<NUMBER_OF_OUTPUT_LAYER; col++){
if (i == 0) // 最初は 0 にクリアする
dot[col] = 0;
float dot_temp = stdata.data[0] * af2_fweight[i][col];
dot[col] += dot_temp;
if (i == NUMBER_OF_MIDDLE_LAYER-1){ // 最後はバイアスを加算する
dot[col] += af2_fbias[col];
outd.data[0] = dot[col];
if(col == 0)
outd.user = 1;
else
outd.user = 0;
if(col == NUMBER_OF_OUTPUT_LAYER-1)
outd.last = 1;
else
outd.last = 0;
outs << outd;
}
}
}
return(0);
}
// 検証用 affine_layer1_2()
// 検証用に affine_layer1() とは異なる実装でコーディング
int affine_layer2_2(hls::stream<ap_fixed_axis<IN_W,IN_I,1,1> >& ins,
hls::stream<ap_fixed_axis<OUT_W,OUT_I,1,1> >& outs){
ap_fixed_axis<IN_W,IN_I,1,1> stdata;
affine_type aff2_in[NUMBER_OF_MIDDLE_LAYER];
out_type dot2[NUMBER_OF_OUTPUT_LAYER];
ap_fixed_axis<OUT_W,OUT_I,1,1> outd;
do {
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
for(int i=0; i<NUMBER_OF_MIDDLE_LAYER; i++){
if(i != 0) // 最初の入力はすでに入力されている
ins >> stdata;
aff2_in[i] = stdata.data[0];
}
for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
dot2[i] = 0;
for(int j=0; j<NUMBER_OF_MIDDLE_LAYER; j++){
dot2[i] += aff2_in[j]*af2_weight[j][i];
}
dot2[i] += af2_bias[i];
outd.data[0] = dot2[i];
if(i == 0)
outd.user = 1;
else
outd.user = 0;
if(i == NUMBER_OF_OUTPUT_LAYER-1)
outd.last = 1;
else
outd.last = 0;
outs << outd;
}
return(0);
}
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | 1 | 2 | 3 | 4 | 5 |
6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | 14 | 15 | 16 | 17 | 18 | 19 |
20 | 21 | 22 | 23 | 24 | 25 | 26 |
27 | 28 | 29 | 30 | 31 | - | - |