dog: 82%
truck: 64%
bicycle: 85%
だったので、精度は下がっているようだ。dog: 99%
truck: 92%
bicycle: 99%
layer filters size input output
0 conv 32 3 x 3 / 1 608 x 608 x 3 -> 608 x 608 x 32 0.639 BFLOPs
1 max 2 x 2 / 2 608 x 608 x 32 -> 304 x 304 x 32
2 conv 64 3 x 3 / 1 304 x 304 x 32 -> 304 x 304 x 64 3.407 BFLOPs
3 max 2 x 2 / 2 304 x 304 x 64 -> 152 x 152 x 64
4 conv 128 3 x 3 / 1 152 x 152 x 64 -> 152 x 152 x 128 3.407 BFLOPs
5 conv 64 1 x 1 / 1 152 x 152 x 128 -> 152 x 152 x 64 0.379 BFLOPs
6 conv 128 3 x 3 / 1 152 x 152 x 64 -> 152 x 152 x 128 3.407 BFLOPs
7 max 2 x 2 / 2 152 x 152 x 128 -> 76 x 76 x 128
8 conv 256 3 x 3 / 1 76 x 76 x 128 -> 76 x 76 x 256 3.407 BFLOPs
9 conv 128 1 x 1 / 1 76 x 76 x 256 -> 76 x 76 x 128 0.379 BFLOPs
10 conv 256 3 x 3 / 1 76 x 76 x 128 -> 76 x 76 x 256 3.407 BFLOPs
11 max 2 x 2 / 2 76 x 76 x 256 -> 38 x 38 x 256
12 conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512 3.407 BFLOPs
13 conv 256 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 256 0.379 BFLOPs
14 conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512 3.407 BFLOPs
15 conv 256 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 256 0.379 BFLOPs
16 conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512 3.407 BFLOPs
17 max 2 x 2 / 2 38 x 38 x 512 -> 19 x 19 x 512
18 conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024 3.407 BFLOPs
19 conv 512 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 512 0.379 BFLOPs
20 conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024 3.407 BFLOPs
21 conv 512 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 512 0.379 BFLOPs
22 conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024 3.407 BFLOPs
23 conv 1024 3 x 3 / 1 19 x 19 x1024 -> 19 x 19 x1024 6.814 BFLOPs
24 conv 1024 3 x 3 / 1 19 x 19 x1024 -> 19 x 19 x1024 6.814 BFLOPs
25 route 16
26 conv 64 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 64 0.095 BFLOPs
27 reorg / 2 38 x 38 x 64 -> 19 x 19 x 256
28 route 27 24
29 conv 1024 3 x 3 / 1 19 x 19 x1280 -> 19 x 19 x1024 8.517 BFLOPs
30 conv 425 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 425 0.314 BFLOPs
31 detection
mask_scale: Using default '1.000000'
Loading weights from yolo.weights...Done!
data/dog.jpg: Predicted in 23.070361 seconds.
dog: 82%
truck: 64%
bicycle: 85%
mkdir -p obj
mkdir -p results
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/gemm.c -o obj/gemm.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/utils.c -o obj/utils.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/cuda.c -o obj/cuda.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/deconvolutional_layer.c -o obj/deconvolutional_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/convolutional_layer.c -o obj/convolutional_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/list.c -o obj/list.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/image.c -o obj/image.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/activations.c -o obj/activations.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/im2col.c -o obj/im2col.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/col2im.c -o obj/col2im.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/blas.c -o obj/blas.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/crop_layer.c -o obj/crop_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/dropout_layer.c -o obj/dropout_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/maxpool_layer.c -o obj/maxpool_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/softmax_layer.c -o obj/softmax_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/data.c -o obj/data.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/matrix.c -o obj/matrix.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/network.c -o obj/network.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/connected_layer.c -o obj/connected_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/cost_layer.c -o obj/cost_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/parser.c -o obj/parser.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/option_list.c -o obj/option_list.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/detection_layer.c -o obj/detection_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/route_layer.c -o obj/route_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/upsample_layer.c -o obj/upsample_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/box.c -o obj/box.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/normalization_layer.c -o obj/normalization_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/avgpool_layer.c -o obj/avgpool_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/layer.c -o obj/layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/local_layer.c -o obj/local_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/shortcut_layer.c -o obj/shortcut_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/logistic_layer.c -o obj/logistic_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/activation_layer.c -o obj/activation_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/rnn_layer.c -o obj/rnn_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/gru_layer.c -o obj/gru_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/crnn_layer.c -o obj/crnn_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/demo.c -o obj/demo.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/batchnorm_layer.c -o obj/batchnorm_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/region_layer.c -o obj/region_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/reorg_layer.c -o obj/reorg_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/tree.c -o obj/tree.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/lstm_layer.c -o obj/lstm_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/l2norm_layer.c -o obj/l2norm_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./src/yolo_layer.c -o obj/yolo_layer.o
gcc -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -shared obj/gemm.o obj/utils.o obj/cuda.o obj/deconvolutional_layer.o obj/convolutional_layer.o obj/list.o obj/image.o obj/activations.o obj/im2col.o obj/col2im.o obj/blas.o obj/crop_layer.o obj/dropout_layer.o obj/maxpool_layer.o obj/softmax_layer.o obj/data.o obj/matrix.o obj/network.o obj/connected_layer.o obj/cost_layer.o obj/parser.o obj/option_list.o obj/detection_layer.o obj/route_layer.o obj/upsample_layer.o obj/box.o obj/normalization_layer.o obj/avgpool_layer.o obj/layer.o obj/local_layer.o obj/shortcut_layer.o obj/logistic_layer.o obj/activation_layer.o obj/rnn_layer.o obj/gru_layer.o obj/crnn_layer.o obj/demo.o obj/batchnorm_layer.o obj/region_layer.o obj/reorg_layer.o obj/tree.o obj/lstm_layer.o obj/l2norm_layer.o obj/yolo_layer.o -o libdarknet.so -lm -pthread
ar rcs libdarknet.a obj/gemm.o obj/utils.o obj/cuda.o obj/deconvolutional_layer.o obj/convolutional_layer.o obj/list.o obj/image.o obj/activations.o obj/im2col.o obj/col2im.o obj/blas.o obj/crop_layer.o obj/dropout_layer.o obj/maxpool_layer.o obj/softmax_layer.o obj/data.o obj/matrix.o obj/network.o obj/connected_layer.o obj/cost_layer.o obj/parser.o obj/option_list.o obj/detection_layer.o obj/route_layer.o obj/upsample_layer.o obj/box.o obj/normalization_layer.o obj/avgpool_layer.o obj/layer.o obj/local_layer.o obj/shortcut_layer.o obj/logistic_layer.o obj/activation_layer.o obj/rnn_layer.o obj/gru_layer.o obj/crnn_layer.o obj/demo.o obj/batchnorm_layer.o obj/region_layer.o obj/reorg_layer.o obj/tree.o obj/lstm_layer.o obj/l2norm_layer.o obj/yolo_layer.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/captcha.c -o obj/captcha.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/lsd.c -o obj/lsd.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/super.c -o obj/super.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/art.c -o obj/art.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/tag.c -o obj/tag.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/cifar.c -o obj/cifar.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/go.c -o obj/go.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/rnn.c -o obj/rnn.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/segmenter.c -o obj/segmenter.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/regressor.c -o obj/regressor.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/classifier.c -o obj/classifier.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/coco.c -o obj/coco.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/yolo.c -o obj/yolo.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/detector.c -o obj/detector.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/nightmare.c -o obj/nightmare.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast -c ./examples/darknet.c -o obj/darknet.o
gcc -Iinclude/ -Isrc/ -Wall -Wno-unused-result -Wno-unknown-pragmas -Wfatal-errors -fPIC -Ofast obj/captcha.o obj/lsd.o obj/super.o obj/art.o obj/tag.o obj/cifar.o obj/go.o obj/rnn.o obj/segmenter.o obj/regressor.o obj/classifier.o obj/coco.o obj/yolo.o obj/detector.o obj/nightmare.o obj/darknet.o libdarknet.a -o darknet -lm -pthread libdarknet.a
layer filters size input output
0 conv 32 3 x 3 / 1 416 x 416 x 3 -> 416 x 416 x 32 0.299 BFLOPs
1 conv 64 3 x 3 / 2 416 x 416 x 32 -> 208 x 208 x 64 1.595 BFLOPs
2 conv 32 1 x 1 / 1 208 x 208 x 64 -> 208 x 208 x 32 0.177 BFLOPs
3 conv 64 3 x 3 / 1 208 x 208 x 32 -> 208 x 208 x 64 1.595 BFLOPs
4 res 1 208 x 208 x 64 -> 208 x 208 x 64
5 conv 128 3 x 3 / 2 208 x 208 x 64 -> 104 x 104 x 128 1.595 BFLOPs
6 conv 64 1 x 1 / 1 104 x 104 x 128 -> 104 x 104 x 64 0.177 BFLOPs
7 conv 128 3 x 3 / 1 104 x 104 x 64 -> 104 x 104 x 128 1.595 BFLOPs
8 res 5 104 x 104 x 128 -> 104 x 104 x 128
9 conv 64 1 x 1 / 1 104 x 104 x 128 -> 104 x 104 x 64 0.177 BFLOPs
10 conv 128 3 x 3 / 1 104 x 104 x 64 -> 104 x 104 x 128 1.595 BFLOPs
11 res 8 104 x 104 x 128 -> 104 x 104 x 128
12 conv 256 3 x 3 / 2 104 x 104 x 128 -> 52 x 52 x 256 1.595 BFLOPs
13 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
14 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
15 res 12 52 x 52 x 256 -> 52 x 52 x 256
16 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
17 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
18 res 15 52 x 52 x 256 -> 52 x 52 x 256
19 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
20 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
21 res 18 52 x 52 x 256 -> 52 x 52 x 256
22 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
23 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
24 res 21 52 x 52 x 256 -> 52 x 52 x 256
25 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
26 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
27 res 24 52 x 52 x 256 -> 52 x 52 x 256
28 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
29 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
30 res 27 52 x 52 x 256 -> 52 x 52 x 256
31 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
32 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
33 res 30 52 x 52 x 256 -> 52 x 52 x 256
34 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
35 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
36 res 33 52 x 52 x 256 -> 52 x 52 x 256
37 conv 512 3 x 3 / 2 52 x 52 x 256 -> 26 x 26 x 512 1.595 BFLOPs
38 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
39 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
40 res 37 26 x 26 x 512 -> 26 x 26 x 512
41 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
42 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
43 res 40 26 x 26 x 512 -> 26 x 26 x 512
44 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
45 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
46 res 43 26 x 26 x 512 -> 26 x 26 x 512
47 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
48 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
49 res 46 26 x 26 x 512 -> 26 x 26 x 512
50 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
51 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
52 res 49 26 x 26 x 512 -> 26 x 26 x 512
53 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
54 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
55 res 52 26 x 26 x 512 -> 26 x 26 x 512
56 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
57 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
58 res 55 26 x 26 x 512 -> 26 x 26 x 512
59 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
60 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
61 res 58 26 x 26 x 512 -> 26 x 26 x 512
62 conv 1024 3 x 3 / 2 26 x 26 x 512 -> 13 x 13 x1024 1.595 BFLOPs
63 conv 512 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 512 0.177 BFLOPs
64 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs
65 res 62 13 x 13 x1024 -> 13 x 13 x1024
66 conv 512 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 512 0.177 BFLOPs
67 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs
68 res 65 13 x 13 x1024 -> 13 x 13 x1024
69 conv 512 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 512 0.177 BFLOPs
70 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs
71 res 68 13 x 13 x1024 -> 13 x 13 x1024
72 conv 512 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 512 0.177 BFLOPs
73 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs
74 res 71 13 x 13 x1024 -> 13 x 13 x1024
75 conv 512 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 512 0.177 BFLOPs
76 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs
77 conv 512 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 512 0.177 BFLOPs
78 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs
79 conv 512 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 512 0.177 BFLOPs
80 conv 1024 3 x 3 / 1 13 x 13 x 512 -> 13 x 13 x1024 1.595 BFLOPs
81 conv 255 1 x 1 / 1 13 x 13 x1024 -> 13 x 13 x 255 0.088 BFLOPs
82 detection
83 route 79
84 conv 256 1 x 1 / 1 13 x 13 x 512 -> 13 x 13 x 256 0.044 BFLOPs
85 upsample 2x 13 x 13 x 256 -> 26 x 26 x 256
86 route 85 61
87 conv 256 1 x 1 / 1 26 x 26 x 768 -> 26 x 26 x 256 0.266 BFLOPs
88 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
89 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
90 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
91 conv 256 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 256 0.177 BFLOPs
92 conv 512 3 x 3 / 1 26 x 26 x 256 -> 26 x 26 x 512 1.595 BFLOPs
93 conv 255 1 x 1 / 1 26 x 26 x 512 -> 26 x 26 x 255 0.177 BFLOPs
94 detection
95 route 91
96 conv 128 1 x 1 / 1 26 x 26 x 256 -> 26 x 26 x 128 0.044 BFLOPs
97 upsample 2x 26 x 26 x 128 -> 52 x 52 x 128
98 route 97 36
99 conv 128 1 x 1 / 1 52 x 52 x 384 -> 52 x 52 x 128 0.266 BFLOPs
100 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
101 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
102 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
103 conv 128 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 128 0.177 BFLOPs
104 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
105 conv 255 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 255 0.353 BFLOPs
106 detection
Loading weights from yolov3.weights...Done!
data/dog.jpg: Predicted in 20.733993 seconds.
dog: 99%
truck: 92%
bicycle: 99%
Configuration -> Portを選択する
Port:/dev/ttyUSB1、Baud Rate:115200を選択する
(c) Copyright 2012-2017 Xilinx, Inc. All Rights Reserved.
#-----------------------------------------------------------
# Tool version : sds++ 2017.4 SW Build 2086221 on Fri Dec 15 20:55:10 MST 2017
# Start time : Wed Mar 28 20:06:49 JST 2018
# Command line : sds++ -DHLS_NO_XIL_FPO_LIB -D__ARM_PCS_VFP -Wall -O0 -g -I../src -I/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/linux/linux/inc/include -c -fmessage-length=0 -MTsrc/xf_dense_npyr_optical_flow_accel.o -I/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/include/c++/6.2.1 -I/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/include/c++/6.2.1/arm-xilinx-linux-gnueabi -I/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/include/c++/6.2.1/backward -I/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/include -I/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/include/glib-2.0 -I/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/lib/glib-2.0/include -Wno-overloaded-virtual -Wno-unused-label -Wno-strict-overflow -Wno-uninitialized -Wno-unused-function -Wno-unused-variable -Wno-unknown-attributes -Wno-unused-local-typedefs -Wno-sign-compare -MMD -MP -MFsrc/xf_dense_npyr_optical_flow_accel.d -MTsrc/xf_dense_npyr_optical_flow_accel.o -o src/xf_dense_npyr_optical_flow_accel.o ../src/xf_dense_npyr_optical_flow_accel.cpp -sds-hw xf::DenseNonPyrLKOpticalFlow<25,0,2160,3840,1> xf_dense_npyr_optical_flow_accel.cpp -files /home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/linux/linux/inc/include/imgproc/xf_dense_npyr_optical_flow.hpp -clkid 0 -sds-end -sds-sys-config linux -sds-proc linux -sds-pf /home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20
# Log file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports/sds_xf_dense_npyr_optical_flow_accel.log
# Journal file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports/sds_xf_dense_npyr_optical_flow_accel.jou
# Report file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports/sds_xf_dense_npyr_optical_flow_accel.rpt
#-----------------------------------------------------------
High-Level Synthesis
--------------------
Vivado HLS Report : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/vhls/w0_xf_DenseNonPyrLKOpticalFlow/solution/syn/report/w0_xf_DenseNonPyrLKOpticalFlow_csynth.rpt
================================================================
== Performance Estimates
================================================================
+ Timing (ns):
* Summary:
+--------+-------+----------+------------+
| Clock | Target| Estimated| Uncertainty|
+--------+-------+----------+------------+
|ap_clk | 10.00| 8.93| 2.70|
+--------+-------+----------+------------+
+ Latency (clock cycles):
* Summary:
+-------+---------+-------+---------+---------+
| Latency | Interval | Pipeline|
| min | max | min | max | Type |
+-------+---------+-------+---------+---------+
| 99850| 8400726| 99850| 8400726| none |
+-------+---------+-------+---------+---------+
+ Detail:
* Instance:
+--------------------------+---------------+-------+---------+-------+---------+---------+
| | | Latency | Interval | Pipeline|
| Instance | Module | min | max | min | max | Type |
+--------------------------+---------------+-------+---------+-------+---------+---------+
|grp_fpga_optflow8_fu_232 |fpga_optflow8 | 99849| 8400725| 99849| 8400725| none |
+--------------------------+---------------+-------+---------+-------+---------+---------+
* Loop:
N/A
================================================================
== Utilization Estimates
================================================================
* Summary:
+-----------------+---------+-------+--------+-------+
| Name | BRAM_18K| DSP48E| FF | LUT |
+-----------------+---------+-------+--------+-------+
|DSP | -| -| -| -|
|Expression | -| -| -| -|
|FIFO | -| -| -| -|
|Instance | 180| 37| 14325| 18290|
|Memory | -| -| -| -|
|Multiplexer | -| -| -| 51|
|Register | -| -| 3| -|
+-----------------+---------+-------+--------+-------+
|Total | 180| 37| 14328| 18341|
+-----------------+---------+-------+--------+-------+
|Available | 280| 220| 106400| 53200|
+-----------------+---------+-------+--------+-------+
|Utilization (%) | 64| 16| 13| 34|
+-----------------+---------+-------+--------+-------+
+ Detail:
* Instance:
+--------------------------+---------------+---------+-------+-------+-------+
| Instance | Module | BRAM_18K| DSP48E| FF | LUT |
+--------------------------+---------------+---------+-------+-------+-------+
|grp_fpga_optflow8_fu_232 |fpga_optflow8 | 180| 37| 14325| 18290|
+--------------------------+---------------+---------+-------+-------+-------+
|Total | | 180| 37| 14325| 18290|
+--------------------------+---------------+---------+-------+-------+-------+
* DSP48:
N/A
* Memory:
N/A
* FIFO:
N/A
* Expression:
N/A
* Multiplexer:
+--------------------+----+-----------+-----+-----------+
| Name | LUT| Input Size| Bits| Total Bits|
+--------------------+----+-----------+-----+-----------+
|ap_NS_fsm | 15| 3| 1| 3|
|flowx_data_write | 9| 2| 1| 2|
|flowy_data_write | 9| 2| 1| 2|
|frame0_data_V_read | 9| 2| 1| 2|
|frame1_data_V_read | 9| 2| 1| 2|
+--------------------+----+-----------+-----+-----------+
|Total | 51| 11| 5| 11|
+--------------------+----+-----------+-----+-----------+
* Register:
+------------------------------------------+---+----+-----+-----------+
| Name | FF| LUT| Bits| Const Bits|
+------------------------------------------+---+----+-----+-----------+
|ap_CS_fsm | 2| 0| 2| 0|
|ap_reg_grp_fpga_optflow8_fu_232_ap_start | 1| 0| 1| 0|
+------------------------------------------+---+----+-----+-----------+
|Total | 3| 0| 3| 0|
+------------------------------------------+---+----+-----+-----------+
================================================================
== Interface
================================================================
* Summary:
+-----------------------+-----+-----+------------+--------------------------------+--------------+
| RTL Ports | Dir | Bits| Protocol | Source Object | C Type |
+-----------------------+-----+-----+------------+--------------------------------+--------------+
|ap_clk | in | 1| ap_ctrl_hs | w0_xf_DenseNonPyrLKOpticalFlow | return value |
|ap_rst_n | in | 1| ap_ctrl_hs | w0_xf_DenseNonPyrLKOpticalFlow | return value |
|ap_start | in | 1| ap_ctrl_hs | w0_xf_DenseNonPyrLKOpticalFlow | return value |
|ap_done | out | 1| ap_ctrl_hs | w0_xf_DenseNonPyrLKOpticalFlow | return value |
|ap_idle | out | 1| ap_ctrl_hs | w0_xf_DenseNonPyrLKOpticalFlow | return value |
|ap_ready | out | 1| ap_ctrl_hs | w0_xf_DenseNonPyrLKOpticalFlow | return value |
|frame0_allocatedFlag | in | 8| ap_none | frame0_allocatedFlag | pointer |
|frame0_rows | in | 32| ap_none | frame0_rows | pointer |
|frame0_cols | in | 32| ap_none | frame0_cols | pointer |
|frame0_size | in | 32| ap_none | frame0_size | pointer |
|frame0_data_V_dout | in | 8| ap_fifo | frame0_data_V | pointer |
|frame0_data_V_empty_n | in | 1| ap_fifo | frame0_data_V | pointer |
|frame0_data_V_read | out | 1| ap_fifo | frame0_data_V | pointer |
|frame1_allocatedFlag | in | 8| ap_none | frame1_allocatedFlag | pointer |
|frame1_rows | in | 32| ap_none | frame1_rows | pointer |
|frame1_cols | in | 32| ap_none | frame1_cols | pointer |
|frame1_size | in | 32| ap_none | frame1_size | pointer |
|frame1_data_V_dout | in | 8| ap_fifo | frame1_data_V | pointer |
|frame1_data_V_empty_n | in | 1| ap_fifo | frame1_data_V | pointer |
|frame1_data_V_read | out | 1| ap_fifo | frame1_data_V | pointer |
|flowx_allocatedFlag | in | 8| ap_none | flowx_allocatedFlag | pointer |
|flowx_rows | in | 32| ap_none | flowx_rows | pointer |
|flowx_cols | in | 32| ap_none | flowx_cols | pointer |
|flowx_size | in | 32| ap_none | flowx_size | pointer |
|flowx_data_din | out | 32| ap_fifo | flowx_data | pointer |
|flowx_data_full_n | in | 1| ap_fifo | flowx_data | pointer |
|flowx_data_write | out | 1| ap_fifo | flowx_data | pointer |
|flowy_allocatedFlag | in | 8| ap_none | flowy_allocatedFlag | pointer |
|flowy_rows | in | 32| ap_none | flowy_rows | pointer |
|flowy_cols | in | 32| ap_none | flowy_cols | pointer |
|flowy_size | in | 32| ap_none | flowy_size | pointer |
|flowy_data_din | out | 32| ap_fifo | flowy_data | pointer |
|flowy_data_full_n | in | 1| ap_fifo | flowy_data | pointer |
|flowy_data_write | out | 1| ap_fifo | flowy_data | pointer |
+-----------------------+-----+-----+------------+--------------------------------+--------------+
(c) Copyright 2012-2017 Xilinx, Inc. All Rights Reserved.
#-----------------------------------------------------------
# Tool version : sds++ 2017.4 SW Build 2086221 on Fri Dec 15 20:55:10 MST 2017
# Start time : Wed Mar 28 20:08:47 JST 2018
# Command line : sds++ --sysroot=/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot -L=/lib -L=/usr/lib -Wl,-rpath-link=/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/lib,-rpath-link=/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/lib -sdcard ../data --remote_ip_cache /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/ip_cache -o dnp_of.elf ./src/xf_dense_npyr_optical_flow_accel.o ./src/xf_dense_npyr_optical_flow_tb.o -lglib-2.0 -ldrm -lv4l2subdev -lmediactl -lopencv_imgcodecs -lopencv_core -llzma -ltiff -lpng16 -lz -ljpeg -lopencv_imgproc -ldl -lrt -lwebp -lopencv_features2d -lopencv_flann -dmclkid 0 -sds-sys-config linux -sds-proc linux -sds-pf /home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20
# Log file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports/sds.log
# Journal file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports/sds.jou
# Report file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports/sds.rpt
#-----------------------------------------------------------
-------------------
Design Timing Check
-------------------
Partition 0
Vivado Log : file not found : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/p0/ipi/vivado.log
Timing Summary : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/p0/_vpl/ipi/imp/imp.runs/impl_1/updated_full_design_timing_summary_routed.rpt
All user specified timing constraints are met.
Timing Summary Report
Timer Settings
--------------
Enable Multi Corner Analysis : Yes
Enable Pessimism Removal : Yes
Pessimism Removal Resolution : Nearest Common Node
Enable Input Delay Default Clock : No
Enable Preset / Clear Arcs : No
Disable Flight Delays : No
Ignore I/O Paths : No
Timing Early Launch at Borrowing Latches : false
Corner Analyze Analyze
Name Max Paths Min Paths
------ --------- ---------
Slow Yes Yes
Fast Yes Yes
check_timing report
Table of Contents
1. checking no_clock
2. checking constant_clock
3. checking pulse_width_clock
4. checking unconstrained_internal_endpoints
5. checking no_input_delay
6. checking no_output_delay
7. checking multiple_clock
8. checking generated_clocks
9. checking loops
10. checking partial_input_delay
11. checking partial_output_delay
12. checking latch_loops
1. checking no_clock
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[10]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[11]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[12]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[13]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[14]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[15]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[2]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[3]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[4]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[5]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[6]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[7]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[8]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[9]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_valid_i_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[10]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[11]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[12]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[13]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[14]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[15]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[2]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[3]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[4]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[5]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[6]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[7]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[8]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[9]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_valid_i_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/w.w_pipe/m_valid_i_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/w0_xf_DenseNonPyrLKOpticalFlow_1_if/inst/adapter_i/axi_lite_if_i/axi_arready_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/w0_xf_DenseNonPyrLKOpticalFlow_1_if/inst/adapter_i/axi_lite_if_i/axi_awready_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/w0_xf_DenseNonPyrLKOpticalFlow_1_if/inst/adapter_i/axi_lite_if_i/axi_wready_reg/Q (HIGH)
2. checking constant_clock
There are 0 register/latch pins with constant_clock.
3. checking pulse_width_clock
There are 0 register/latch pins which need pulse_width check
4. checking unconstrained_internal_endpoints
There is 1 pin that is not constrained for maximum delay. (HIGH)
There are 0 pins that are not constrained for maximum delay due to constant clock.
5. checking no_input_delay
There are 19 input ports with no input delay specified. (HIGH)
There are 0 input ports with no input delay but user has a false path constraint.
6. checking no_output_delay
There are 21 ports with no output delay specified. (HIGH)
There are 0 ports with no output delay but user has a false path constraint
There are 0 ports with no output delay but with a timing clock defined on it or propagating through it
7. checking multiple_clock
There are 0 register/latch pins with multiple clocks.
8. checking generated_clocks
There are 0 generated clocks that are not connected to a clock source.
9. checking loops
There are 0 combinational loops in the design.
10. checking partial_input_delay
There are 0 input ports with partial input delay specified.
11. checking partial_output_delay
There are 0 ports with partial output delay specified.
12. checking latch_loops
There are 0 combinational latch loops in the design through latch input
Design Timing Summary
---------------------
WNS(ns) TNS(ns) TNS Failing Endpoints TNS Total Endpoints WHS(ns) THS(ns) THS Failing Endpoints THS Total Endpoints WPWS(ns) TPWS(ns) TPWS Failing Endpoints TPWS Total Endpoints
------- ------- --------------------- ------------------- ------- ------- --------------------- ------------------- -------- -------- ---------------------- --------------------
0.305 0.000 0 117480 0.002 0.000 0 117267 0.013 0.000 0 47485
All user specified timing constraints are met.
Clock Summary
-------------
Clock Waveform(ns) Period(ns) Frequency(MHz)
----- ------------ ---------- --------------
clk_fpga_0 {0.000 5.000} 10.000 100.000
I {0.000 1.000} 2.000 500.000
axi_dynclk_0_PXL_CLK_O {0.000 4.000} 10.000 100.000
mmcm_fbclk_out {0.000 5.000} 10.000 100.000
clk_fpga_1 {0.000 3.749} 7.499 133.351
clk_fpga_2 {0.000 2.500} 5.000 200.000
hdmi_in_clk_p {0.000 4.200} 8.400 119.048
CLKFBIN {0.000 4.200} 8.400 119.048
CLK_OUT_5x_hdmi_clk {0.000 0.840} 1.680 595.238
PixelClk_int {0.000 3.360} 8.400 119.048
sys_clock {0.000 4.000} 8.000 125.000
clk_out1_zybo_z7_20_clk_wiz_0_0 {0.000 41.667} 83.333 12.000
clkfbout_zybo_z7_20_clk_wiz_0_0 {0.000 4.000} 8.000 125.000
zybo_z7_20_i/util_bufg_fclk1/U0/BUFG_O[0] {0.000 3.750} 7.500 133.333
-------------------
Data Motion Network
-------------------
Data motion network report generated in /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports
HTML file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/reports/data_motion.html
-------------------
Design Utilization
-------------------
Partition 0
Utilization Summary : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/dnp_of/Debug/_sds/p0/_vpl/ipi/imp/imp.runs/impl_1/updated_full_design_utilization_placed.rpt
Utilization Design Information
Table of Contents
-----------------
1. Slice Logic
1.1 Summary of Registers by Type
2. Slice Logic Distribution
3. Memory
4. DSP
5. IO and GT Specific
6. Clocking
7. Specific Feature
8. Primitives
9. Black Boxes
10. Instantiated Netlists
1. Slice Logic
--------------
+----------------------------+-------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+----------------------------+-------+-------+-----------+-------+
| Slice LUTs | 29538 | 0 | 53200 | 55.52 |
| LUT as Logic | 27404 | 0 | 53200 | 51.51 |
| LUT as Memory | 2134 | 0 | 17400 | 12.26 |
| LUT as Distributed RAM | 856 | 0 | | |
| LUT as Shift Register | 1278 | 0 | | |
| Slice Registers | 43767 | 0 | 106400 | 41.13 |
| Register as Flip Flop | 43766 | 0 | 106400 | 41.13 |
| Register as Latch | 1 | 0 | 106400 | <0.01 |
| F7 Muxes | 500 | 0 | 26600 | 1.88 |
| F8 Muxes | 10 | 0 | 13300 | 0.08 |
+----------------------------+-------+-------+-----------+-------+
1.1 Summary of Registers by Type
--------------------------------
+-------+--------------+-------------+--------------+
| Total | Clock Enable | Synchronous | Asynchronous |
+-------+--------------+-------------+--------------+
| 0 | _ | - | - |
| 0 | _ | - | Set |
| 0 | _ | - | Reset |
| 0 | _ | Set | - |
| 0 | _ | Reset | - |
| 0 | Yes | - | - |
| 367 | Yes | - | Set |
| 506 | Yes | - | Reset |
| 1586 | Yes | Set | - |
| 41308 | Yes | Reset | - |
+-------+--------------+-------------+--------------+
2. Slice Logic Distribution
---------------------------
+-------------------------------------------+-------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+-------------------------------------------+-------+-------+-----------+-------+
| Slice | 11961 | 0 | 13300 | 89.93 |
| SLICEL | 8007 | 0 | | |
| SLICEM | 3954 | 0 | | |
| LUT as Logic | 27404 | 0 | 53200 | 51.51 |
| using O5 output only | 0 | | | |
| using O6 output only | 21890 | | | |
| using O5 and O6 | 5514 | | | |
| LUT as Memory | 2134 | 0 | 17400 | 12.26 |
| LUT as Distributed RAM | 856 | 0 | | |
| using O5 output only | 0 | | | |
| using O6 output only | 72 | | | |
| using O5 and O6 | 784 | | | |
| LUT as Shift Register | 1278 | 0 | | |
| using O5 output only | 146 | | | |
| using O6 output only | 680 | | | |
| using O5 and O6 | 452 | | | |
| LUT Flip Flop Pairs | 16354 | 0 | 53200 | 30.74 |
| fully used LUT-FF pairs | 3782 | | | |
| LUT-FF pairs with one unused LUT output | 11965 | | | |
| LUT-FF pairs with one unused Flip Flop | 10614 | | | |
| Unique Control Sets | 1717 | | | |
+-------------------------------------------+-------+-------+-----------+-------+
* Note: Review the Control Sets Report for more information regarding control sets.
3. Memory
---------
+-------------------+------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+-------------------+------+-------+-----------+-------+
| Block RAM Tile | 128 | 0 | 140 | 91.43 |
| RAMB36/FIFO* | 119 | 0 | 140 | 85.00 |
| FIFO36E1 only | 6 | | | |
| RAMB36E1 only | 113 | | | |
| RAMB18 | 18 | 0 | 280 | 6.43 |
| RAMB18E1 only | 18 | | | |
+-------------------+------+-------+-----------+-------+
* Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E1 or one FIFO18E1. However, if a FIFO18E1 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E1
4. DSP
------
+----------------+------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+----------------+------+-------+-----------+-------+
| DSPs | 37 | 0 | 220 | 16.82 |
| DSP48E1 only | 37 | | | |
+----------------+------+-------+-----------+-------+
5. IO and GT Specific
---------------------
+-----------------------------+------+-------+-----------+--------+
| Site Type | Used | Fixed | Available | Util% |
+-----------------------------+------+-------+-----------+--------+
| Bonded IOB | 58 | 58 | 125 | 46.40 |
| IOB Master Pads | 29 | | | |
| IOB Slave Pads | 26 | | | |
| Bonded IPADs | 2 | 2 | 2 | 100.00 |
| Bonded IOPADs | 130 | 130 | 130 | 100.00 |
| PHY_CONTROL | 0 | 0 | 4 | 0.00 |
| PHASER_REF | 0 | 0 | 4 | 0.00 |
| OUT_FIFO | 0 | 0 | 16 | 0.00 |
| IN_FIFO | 0 | 0 | 16 | 0.00 |
| IDELAYCTRL | 1 | 0 | 4 | 25.00 |
| IBUFDS | 4 | 4 | 121 | 3.31 |
| PHASER_OUT/PHASER_OUT_PHY | 0 | 0 | 16 | 0.00 |
| PHASER_IN/PHASER_IN_PHY | 0 | 0 | 16 | 0.00 |
| IDELAYE2/IDELAYE2_FINEDELAY | 3 | 3 | 200 | 1.50 |
| IDELAYE2 only | 3 | 3 | | |
| ILOGIC | 6 | 6 | 125 | 4.80 |
| ISERDES | 6 | 6 | | |
| OLOGIC | 8 | 8 | 125 | 6.40 |
| OSERDES | 8 | 8 | | |
+-----------------------------+------+-------+-----------+--------+
6. Clocking
-----------
+--------------+------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+--------------+------+-------+-----------+-------+
| BUFGCTRL | 6 | 0 | 32 | 18.75 |
| BUFIO | 2 | 0 | 16 | 12.50 |
| BUFIO only | 2 | 0 | | |
| MMCME2_ADV | 2 | 0 | 4 | 50.00 |
| PLLE2_ADV | 1 | 0 | 4 | 25.00 |
| BUFMRCE | 0 | 0 | 8 | 0.00 |
| BUFHCE | 0 | 0 | 72 | 0.00 |
| BUFR | 2 | 0 | 16 | 12.50 |
+--------------+------+-------+-----------+-------+
7. Specific Feature
-------------------
+-------------+------+-------+-----------+--------+
| Site Type | Used | Fixed | Available | Util% |
+-------------+------+-------+-----------+--------+
| BSCANE2 | 0 | 0 | 4 | 0.00 |
| CAPTUREE2 | 0 | 0 | 1 | 0.00 |
| DNA_PORT | 0 | 0 | 1 | 0.00 |
| EFUSE_USR | 0 | 0 | 1 | 0.00 |
| FRAME_ECCE2 | 0 | 0 | 1 | 0.00 |
| ICAPE2 | 0 | 0 | 2 | 0.00 |
| STARTUPE2 | 0 | 0 | 1 | 0.00 |
| XADC | 1 | 1 | 1 | 100.00 |
+-------------+------+-------+-----------+--------+
8. Primitives
-------------
+------------+-------+----------------------+
| Ref Name | Used | Functional Category |
+------------+-------+----------------------+
| FDRE | 41308 | Flop & Latch |
| LUT3 | 9718 | LUT |
| LUT6 | 7369 | LUT |
| LUT4 | 5355 | LUT |
| LUT2 | 5113 | LUT |
| LUT5 | 4451 | LUT |
| CARRY4 | 1924 | CarryLogic |
| FDSE | 1586 | Flop & Latch |
| SRL16E | 1270 | Distributed Memory |
| RAMD32 | 1184 | Distributed Memory |
| LUT1 | 912 | LUT |
| FDCE | 505 | Flop & Latch |
| MUXF7 | 500 | MuxFx |
| SRLC32E | 460 | Distributed Memory |
| RAMS32 | 392 | Distributed Memory |
| FDPE | 367 | Flop & Latch |
| BIBUF | 130 | IO |
| RAMB36E1 | 113 | Block Memory |
| RAMD64E | 64 | Distributed Memory |
| DSP48E1 | 37 | Block Arithmetic |
| IBUF | 31 | IO |
| RAMB18E1 | 18 | Block Memory |
| OBUF | 13 | IO |
| OBUFT | 11 | IO |
| MUXF8 | 10 | MuxFx |
| OSERDESE2 | 8 | IO |
| ISERDESE2 | 6 | IO |
| FIFO36E1 | 6 | Block Memory |
| BUFG | 6 | Clock |
| OBUFDS | 4 | IO |
| IBUFDS | 4 | IO |
| IDELAYE2 | 3 | IO |
| MMCME2_ADV | 2 | Clock |
| BUFR | 2 | Clock |
| BUFIO | 2 | Clock |
| XADC | 1 | Others |
| PS7 | 1 | Specialized Resource |
| PLLE2_ADV | 1 | Clock |
| LDCE | 1 | Flop & Latch |
| IDELAYCTRL | 1 | IO |
+------------+-------+----------------------+
9. Black Boxes
--------------
+----------+------+
| Ref Name | Used |
+----------+------+
10. Instantiated Netlists
-------------------------
+--------------------------------------------------+------+
| Ref Name | Used |
+--------------------------------------------------+------+
| zybo_z7_20_xlconstant_1_0 | 1 |
| zybo_z7_20_xlconcat_0_0 | 1 |
| zybo_z7_20_xbar_4 | 1 |
| zybo_z7_20_xbar_3 | 1 |
| zybo_z7_20_xbar_2 | 1 |
| zybo_z7_20_xbar_1 | 1 |
| zybo_z7_20_xbar_0 | 1 |
| zybo_z7_20_xadc_wiz_0_0 | 1 |
| zybo_z7_20_w0_xf_DenseNonPyrLKOpticalFlow_1_if_0 | 1 |
| zybo_z7_20_w0_xf_DenseNonPyrLKOpticalFlow_1_0 | 1 |
| zybo_z7_20_v_vid_in_axi4s_0_0 | 1 |
| zybo_z7_20_v_tc_out_0 | 1 |
| zybo_z7_20_v_tc_in_0 | 1 |
| zybo_z7_20_v_axi4s_vid_out_0_0 | 1 |
| zybo_z7_20_util_bufg_fclk1_0 | 1 |
| zybo_z7_20_sgdma2axis_dm_1_0 | 1 |
| zybo_z7_20_sgdma2axis_dm_0_0 | 1 |
| zybo_z7_20_sds_irq_const_0 | 1 |
| zybo_z7_20_s02_regslice_1 | 1 |
| zybo_z7_20_s02_regslice_0 | 1 |
| zybo_z7_20_s01_regslice_2 | 1 |
| zybo_z7_20_s01_regslice_1 | 1 |
| zybo_z7_20_s01_regslice_0 | 1 |
| zybo_z7_20_s01_data_fifo_1 | 1 |
| zybo_z7_20_s01_data_fifo_0 | 1 |
| zybo_z7_20_s00_regslice_5 | 1 |
| zybo_z7_20_s00_regslice_4 | 1 |
| zybo_z7_20_s00_regslice_3 | 1 |
| zybo_z7_20_s00_regslice_2 | 1 |
| zybo_z7_20_s00_regslice_1 | 1 |
| zybo_z7_20_s00_regslice_0 | 1 |
| zybo_z7_20_s00_data_fifo_2 | 1 |
| zybo_z7_20_s00_data_fifo_1 | 1 |
| zybo_z7_20_s00_data_fifo_0 | 1 |
| zybo_z7_20_rgb2dvi_1_0 | 1 |
| zybo_z7_20_pwm_rgb_0 | 1 |
| zybo_z7_20_psr_fclk1_0 | 1 |
| zybo_z7_20_psr_fclk0_0 | 1 |
| zybo_z7_20_processing_system7_0_0 | 1 |
| zybo_z7_20_proc_sys_reset_0_0 | 1 |
| zybo_z7_20_m02_regslice_0 | 1 |
| zybo_z7_20_m01_regslice_0 | 1 |
| zybo_z7_20_m00_regslice_3 | 1 |
| zybo_z7_20_m00_regslice_2 | 1 |
| zybo_z7_20_m00_regslice_1 | 1 |
| zybo_z7_20_m00_regslice_0 | 1 |
| zybo_z7_20_m00_data_fifo_1 | 1 |
| zybo_z7_20_m00_data_fifo_0 | 1 |
| zybo_z7_20_dvi2rgb_1_0 | 1 |
| zybo_z7_20_dm_1_0 | 1 |
| zybo_z7_20_dm_0_0 | 1 |
| zybo_z7_20_clk_wiz_0_0 | 1 |
| zybo_z7_20_axis_subset_converter_out_0 | 1 |
| zybo_z7_20_axis_subset_converter_in_0 | 1 |
| zybo_z7_20_axis2sgdma_dm_1_0 | 1 |
| zybo_z7_20_axis2sgdma_dm_0_0 | 1 |
| zybo_z7_20_axi_vdma_1_0 | 1 |
| zybo_z7_20_axi_vdma_0_0 | 1 |
| zybo_z7_20_axi_i2s_adi_0_0 | 1 |
| zybo_z7_20_axi_gpio_video_0 | 1 |
| zybo_z7_20_axi_gpio_sw_btn_0 | 1 |
| zybo_z7_20_axi_gpio_led_0 | 1 |
| zybo_z7_20_axi_gpio_eth_0 | 1 |
| zybo_z7_20_axi_dynclk_0_0 | 1 |
| zybo_z7_20_auto_us_df_1 | 1 |
| zybo_z7_20_auto_us_df_0 | 1 |
| zybo_z7_20_auto_ss_slid_1 | 1 |
| zybo_z7_20_auto_ss_slid_0 | 1 |
| zybo_z7_20_auto_pc_4 | 1 |
| zybo_z7_20_auto_pc_3 | 1 |
| zybo_z7_20_auto_pc_2 | 1 |
| zybo_z7_20_auto_pc_1 | 1 |
| zybo_z7_20_auto_pc_0 | 1 |
+--------------------------------------------------+------+
Configuration -> Portを選択する
Port:/dev/ttyUSB1、Baud Rate:115200を選択する
(c) Copyright 2012-2017 Xilinx, Inc. All Rights Reserved.
#-----------------------------------------------------------
# Tool version : sds++ 2017.4 SW Build 2086221 on Fri Dec 15 20:55:10 MST 2017
# Start time : Sun Mar 25 04:22:41 JST 2018
# Command line : sds++ --sysroot=/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot -L=/lib -L=/usr/lib -Wl,-rpath-link=/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/lib,-rpath-link=/home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20/sw/sysroot/usr/lib -sdcard ../data --remote_ip_cache /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/ip_cache -o bilateral_ex.elf ./src/xf_bilateral_filter_accel.o ./src/xf_bilateral_filter_tb.o -lglib-2.0 -ldrm -lv4l2subdev -lmediactl -lopencv_imgcodecs -lopencv_core -llzma -ltiff -lpng16 -lz -ljpeg -lopencv_imgproc -ldl -lrt -lwebp -lopencv_features2d -lopencv_flann -lopencv_calib3d -dmclkid 0 -sds-sys-config linux -sds-proc linux -sds-pf /home/masaaki/reVISION-Zybo-Z7-20/sdsoc/zybo_z7_20/export/zybo_z7_20
# Log file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/reports/sds.log
# Journal file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/reports/sds.jou
# Report file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/reports/sds.rpt
#-----------------------------------------------------------
-------------------
Design Timing Check
-------------------
Partition 0
Vivado Log : file not found : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/p0/ipi/vivado.log
Timing Summary : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/p0/_vpl/ipi/imp/imp.runs/impl_1/updated_full_design_timing_summary_routed.rpt
All user specified timing constraints are met.
Timing Summary Report
Timer Settings
--------------
Enable Multi Corner Analysis : Yes
Enable Pessimism Removal : Yes
Pessimism Removal Resolution : Nearest Common Node
Enable Input Delay Default Clock : No
Enable Preset / Clear Arcs : No
Disable Flight Delays : No
Ignore I/O Paths : No
Timing Early Launch at Borrowing Latches : false
Corner Analyze Analyze
Name Max Paths Min Paths
------ --------- ---------
Slow Yes Yes
Fast Yes Yes
check_timing report
Table of Contents
1. checking no_clock
2. checking constant_clock
3. checking pulse_width_clock
4. checking unconstrained_internal_endpoints
5. checking no_input_delay
6. checking no_output_delay
7. checking multiple_clock
8. checking generated_clocks
9. checking loops
10. checking partial_input_delay
11. checking partial_output_delay
12. checking latch_loops
1. checking no_clock
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[10]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[11]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[12]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[13]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[14]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[15]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[2]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[3]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[4]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[5]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[6]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[7]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[8]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_payload_i_reg[9]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/ar.ar_pipe/m_valid_i_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[10]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[11]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[12]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[13]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[14]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[15]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[2]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[3]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[4]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[5]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[6]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[7]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[8]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_payload_i_reg[9]/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/aw.aw_pipe/m_valid_i_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/axi_ic_processing_system7_0_M_AXI_GP1/m00_couplers/m00_regslice/inst/w.w_pipe/m_valid_i_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/w0_xf_bilateralFilter_1_if/inst/adapter_i/axi_lite_if_i/axi_arready_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/w0_xf_bilateralFilter_1_if/inst/adapter_i/axi_lite_if_i/axi_awready_reg/Q (HIGH)
There is 1 register/latch pin with no clock driven by root clock pin: zybo_z7_20_i/w0_xf_bilateralFilter_1_if/inst/adapter_i/axi_lite_if_i/axi_wready_reg/Q (HIGH)
2. checking constant_clock
There are 0 register/latch pins with constant_clock.
3. checking pulse_width_clock
There are 0 register/latch pins which need pulse_width check
4. checking unconstrained_internal_endpoints
There is 1 pin that is not constrained for maximum delay. (HIGH)
There are 0 pins that are not constrained for maximum delay due to constant clock.
5. checking no_input_delay
There are 19 input ports with no input delay specified. (HIGH)
There are 0 input ports with no input delay but user has a false path constraint.
6. checking no_output_delay
There are 21 ports with no output delay specified. (HIGH)
There are 0 ports with no output delay but user has a false path constraint
There are 0 ports with no output delay but with a timing clock defined on it or propagating through it
7. checking multiple_clock
There are 0 register/latch pins with multiple clocks.
8. checking generated_clocks
There are 0 generated clocks that are not connected to a clock source.
9. checking loops
There are 0 combinational loops in the design.
10. checking partial_input_delay
There are 0 input ports with partial input delay specified.
11. checking partial_output_delay
There are 0 ports with partial output delay specified.
12. checking latch_loops
There are 0 combinational latch loops in the design through latch input
Design Timing Summary
---------------------
WNS(ns) TNS(ns) TNS Failing Endpoints TNS Total Endpoints WHS(ns) THS(ns) THS Failing Endpoints THS Total Endpoints WPWS(ns) TPWS(ns) TPWS Failing Endpoints TPWS Total Endpoints
------- ------- --------------------- ------------------- ------- ------- --------------------- ------------------- -------- -------- ---------------------- --------------------
0.423 0.000 0 87494 0.051 0.000 0 87281 0.013 0.000 0 36432
All user specified timing constraints are met.
Clock Summary
-------------
Clock Waveform(ns) Period(ns) Frequency(MHz)
----- ------------ ---------- --------------
clk_fpga_0 {0.000 5.000} 10.000 100.000
I {0.000 1.000} 2.000 500.000
axi_dynclk_0_PXL_CLK_O {0.000 4.000} 10.000 100.000
mmcm_fbclk_out {0.000 5.000} 10.000 100.000
clk_fpga_1 {0.000 3.749} 7.499 133.351
clk_fpga_2 {0.000 2.500} 5.000 200.000
hdmi_in_clk_p {0.000 4.200} 8.400 119.048
CLKFBIN {0.000 4.200} 8.400 119.048
CLK_OUT_5x_hdmi_clk {0.000 0.840} 1.680 595.238
PixelClk_int {0.000 3.360} 8.400 119.048
sys_clock {0.000 4.000} 8.000 125.000
clk_out1_zybo_z7_20_clk_wiz_0_0 {0.000 41.667} 83.333 12.000
clkfbout_zybo_z7_20_clk_wiz_0_0 {0.000 4.000} 8.000 125.000
zybo_z7_20_i/util_bufg_fclk1/U0/BUFG_O[0] {0.000 3.750} 7.500 133.333
-------------------
Data Motion Network
-------------------
Data motion network report generated in /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/reports
HTML file : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/reports/data_motion.html
-------------------
Design Utilization
-------------------
Partition 0
Utilization Summary : /home/masaaki/sdx_workspaces/revisio_zybo_z7_20_ws/bilateral_ex/Debug/_sds/p0/_vpl/ipi/imp/imp.runs/impl_1/updated_full_design_utilization_placed.rpt
Utilization Design Information
Table of Contents
-----------------
1. Slice Logic
1.1 Summary of Registers by Type
2. Slice Logic Distribution
3. Memory
4. DSP
5. IO and GT Specific
6. Clocking
7. Specific Feature
8. Primitives
9. Black Boxes
10. Instantiated Netlists
1. Slice Logic
--------------
+----------------------------+-------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+----------------------------+-------+-------+-----------+-------+
| Slice LUTs | 21009 | 0 | 53200 | 39.49 |
| LUT as Logic | 19537 | 0 | 53200 | 36.72 |
| LUT as Memory | 1472 | 0 | 17400 | 8.46 |
| LUT as Distributed RAM | 832 | 0 | | |
| LUT as Shift Register | 640 | 0 | | |
| Slice Registers | 33711 | 0 | 106400 | 31.68 |
| Register as Flip Flop | 33710 | 0 | 106400 | 31.68 |
| Register as Latch | 1 | 0 | 106400 | <0.01 |
| F7 Muxes | 415 | 0 | 26600 | 1.56 |
| F8 Muxes | 9 | 0 | 13300 | 0.07 |
+----------------------------+-------+-------+-----------+-------+
1.1 Summary of Registers by Type
--------------------------------
+-------+--------------+-------------+--------------+
| Total | Clock Enable | Synchronous | Asynchronous |
+-------+--------------+-------------+--------------+
| 0 | _ | - | - |
| 0 | _ | - | Set |
| 0 | _ | - | Reset |
| 0 | _ | Set | - |
| 0 | _ | Reset | - |
| 0 | Yes | - | - |
| 324 | Yes | - | Set |
| 455 | Yes | - | Reset |
| 1014 | Yes | Set | - |
| 31918 | Yes | Reset | - |
+-------+--------------+-------------+--------------+
2. Slice Logic Distribution
---------------------------
+-------------------------------------------+-------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+-------------------------------------------+-------+-------+-----------+-------+
| Slice | 9828 | 0 | 13300 | 73.89 |
| SLICEL | 6608 | 0 | | |
| SLICEM | 3220 | 0 | | |
| LUT as Logic | 19537 | 0 | 53200 | 36.72 |
| using O5 output only | 2 | | | |
| using O6 output only | 15265 | | | |
| using O5 and O6 | 4270 | | | |
| LUT as Memory | 1472 | 0 | 17400 | 8.46 |
| LUT as Distributed RAM | 832 | 0 | | |
| using O5 output only | 0 | | | |
| using O6 output only | 72 | | | |
| using O5 and O6 | 760 | | | |
| LUT as Shift Register | 640 | 0 | | |
| using O5 output only | 63 | | | |
| using O6 output only | 243 | | | |
| using O5 and O6 | 334 | | | |
| LUT Flip Flop Pairs | 11470 | 0 | 53200 | 21.56 |
| fully used LUT-FF pairs | 3028 | | | |
| LUT-FF pairs with one unused LUT output | 7936 | | | |
| LUT-FF pairs with one unused Flip Flop | 7105 | | | |
| Unique Control Sets | 1406 | | | |
+-------------------------------------------+-------+-------+-----------+-------+
* Note: Review the Control Sets Report for more information regarding control sets.
3. Memory
---------
+-------------------+------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+-------------------+------+-------+-----------+-------+
| Block RAM Tile | 34.5 | 0 | 140 | 24.64 |
| RAMB36/FIFO* | 24 | 0 | 140 | 17.14 |
| FIFO36E1 only | 6 | | | |
| RAMB36E1 only | 18 | | | |
| RAMB18 | 21 | 0 | 280 | 7.50 |
| RAMB18E1 only | 21 | | | |
+-------------------+------+-------+-----------+-------+
* Note: Each Block RAM Tile only has one FIFO logic available and therefore can accommodate only one FIFO36E1 or one FIFO18E1. However, if a FIFO18E1 occupies a Block RAM Tile, that tile can still accommodate a RAMB18E1
4. DSP
------
+----------------+------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+----------------+------+-------+-----------+-------+
| DSPs | 19 | 0 | 220 | 8.64 |
| DSP48E1 only | 19 | | | |
+----------------+------+-------+-----------+-------+
5. IO and GT Specific
---------------------
+-----------------------------+------+-------+-----------+--------+
| Site Type | Used | Fixed | Available | Util% |
+-----------------------------+------+-------+-----------+--------+
| Bonded IOB | 58 | 58 | 125 | 46.40 |
| IOB Master Pads | 29 | | | |
| IOB Slave Pads | 26 | | | |
| Bonded IPADs | 2 | 2 | 2 | 100.00 |
| Bonded IOPADs | 130 | 130 | 130 | 100.00 |
| PHY_CONTROL | 0 | 0 | 4 | 0.00 |
| PHASER_REF | 0 | 0 | 4 | 0.00 |
| OUT_FIFO | 0 | 0 | 16 | 0.00 |
| IN_FIFO | 0 | 0 | 16 | 0.00 |
| IDELAYCTRL | 1 | 0 | 4 | 25.00 |
| IBUFDS | 4 | 4 | 121 | 3.31 |
| PHASER_OUT/PHASER_OUT_PHY | 0 | 0 | 16 | 0.00 |
| PHASER_IN/PHASER_IN_PHY | 0 | 0 | 16 | 0.00 |
| IDELAYE2/IDELAYE2_FINEDELAY | 3 | 3 | 200 | 1.50 |
| IDELAYE2 only | 3 | 3 | | |
| ILOGIC | 6 | 6 | 125 | 4.80 |
| ISERDES | 6 | 6 | | |
| OLOGIC | 8 | 8 | 125 | 6.40 |
| OSERDES | 8 | 8 | | |
+-----------------------------+------+-------+-----------+--------+
6. Clocking
-----------
+--------------+------+-------+-----------+-------+
| Site Type | Used | Fixed | Available | Util% |
+--------------+------+-------+-----------+-------+
| BUFGCTRL | 6 | 0 | 32 | 18.75 |
| BUFIO | 2 | 0 | 16 | 12.50 |
| BUFIO only | 2 | 0 | | |
| MMCME2_ADV | 2 | 0 | 4 | 50.00 |
| PLLE2_ADV | 1 | 0 | 4 | 25.00 |
| BUFMRCE | 0 | 0 | 8 | 0.00 |
| BUFHCE | 0 | 0 | 72 | 0.00 |
| BUFR | 2 | 0 | 16 | 12.50 |
+--------------+------+-------+-----------+-------+
7. Specific Feature
-------------------
+-------------+------+-------+-----------+--------+
| Site Type | Used | Fixed | Available | Util% |
+-------------+------+-------+-----------+--------+
| BSCANE2 | 0 | 0 | 4 | 0.00 |
| CAPTUREE2 | 0 | 0 | 1 | 0.00 |
| DNA_PORT | 0 | 0 | 1 | 0.00 |
| EFUSE_USR | 0 | 0 | 1 | 0.00 |
| FRAME_ECCE2 | 0 | 0 | 1 | 0.00 |
| ICAPE2 | 0 | 0 | 2 | 0.00 |
| STARTUPE2 | 0 | 0 | 1 | 0.00 |
| XADC | 1 | 1 | 1 | 100.00 |
+-------------+------+-------+-----------+--------+
8. Primitives
-------------
+------------+-------+----------------------+
| Ref Name | Used | Functional Category |
+------------+-------+----------------------+
| FDRE | 31918 | Flop & Latch |
| LUT3 | 6113 | LUT |
| LUT6 | 5724 | LUT |
| LUT5 | 3828 | LUT |
| LUT4 | 3735 | LUT |
| LUT2 | 3520 | LUT |
| RAMD32 | 1148 | Distributed Memory |
| CARRY4 | 1122 | CarryLogic |
| FDSE | 1014 | Flop & Latch |
| LUT1 | 887 | LUT |
| SRL16E | 879 | Distributed Memory |
| FDCE | 454 | Flop & Latch |
| MUXF7 | 415 | MuxFx |
| RAMS32 | 380 | Distributed Memory |
| FDPE | 324 | Flop & Latch |
| BIBUF | 130 | IO |
| SRLC32E | 95 | Distributed Memory |
| RAMD64E | 64 | Distributed Memory |
| IBUF | 31 | IO |
| RAMB18E1 | 21 | Block Memory |
| DSP48E1 | 19 | Block Arithmetic |
| RAMB36E1 | 18 | Block Memory |
| OBUF | 13 | IO |
| OBUFT | 11 | IO |
| MUXF8 | 9 | MuxFx |
| OSERDESE2 | 8 | IO |
| ISERDESE2 | 6 | IO |
| FIFO36E1 | 6 | Block Memory |
| BUFG | 6 | Clock |
| OBUFDS | 4 | IO |
| IBUFDS | 4 | IO |
| IDELAYE2 | 3 | IO |
| MMCME2_ADV | 2 | Clock |
| BUFR | 2 | Clock |
| BUFIO | 2 | Clock |
| XADC | 1 | Others |
| PS7 | 1 | Specialized Resource |
| PLLE2_ADV | 1 | Clock |
| LDCE | 1 | Flop & Latch |
| IDELAYCTRL | 1 | IO |
+------------+-------+----------------------+
9. Black Boxes
--------------
+----------+------+
| Ref Name | Used |
+----------+------+
10. Instantiated Netlists
-------------------------
+-----------------------------------------+------+
| Ref Name | Used |
+-----------------------------------------+------+
| zybo_z7_20_xlconstant_1_0 | 1 |
| zybo_z7_20_xlconcat_0_0 | 1 |
| zybo_z7_20_xbar_4 | 1 |
| zybo_z7_20_xbar_3 | 1 |
| zybo_z7_20_xbar_2 | 1 |
| zybo_z7_20_xbar_1 | 1 |
| zybo_z7_20_xbar_0 | 1 |
| zybo_z7_20_xadc_wiz_0_0 | 1 |
| zybo_z7_20_w0_xf_bilateralFilter_1_if_0 | 1 |
| zybo_z7_20_w0_xf_bilateralFilter_1_0 | 1 |
| zybo_z7_20_v_vid_in_axi4s_0_0 | 1 |
| zybo_z7_20_v_tc_out_0 | 1 |
| zybo_z7_20_v_tc_in_0 | 1 |
| zybo_z7_20_v_axi4s_vid_out_0_0 | 1 |
| zybo_z7_20_util_bufg_fclk1_0 | 1 |
| zybo_z7_20_sgdma2axis_dm_0_0 | 1 |
| zybo_z7_20_sds_irq_const_0 | 1 |
| zybo_z7_20_s01_regslice_2 | 1 |
| zybo_z7_20_s01_regslice_1 | 1 |
| zybo_z7_20_s01_regslice_0 | 1 |
| zybo_z7_20_s00_regslice_4 | 1 |
| zybo_z7_20_s00_regslice_3 | 1 |
| zybo_z7_20_s00_regslice_2 | 1 |
| zybo_z7_20_s00_regslice_1 | 1 |
| zybo_z7_20_s00_regslice_0 | 1 |
| zybo_z7_20_s00_data_fifo_2 | 1 |
| zybo_z7_20_s00_data_fifo_1 | 1 |
| zybo_z7_20_s00_data_fifo_0 | 1 |
| zybo_z7_20_rgb2dvi_1_0 | 1 |
| zybo_z7_20_pwm_rgb_0 | 1 |
| zybo_z7_20_psr_fclk1_0 | 1 |
| zybo_z7_20_psr_fclk0_0 | 1 |
| zybo_z7_20_processing_system7_0_0 | 1 |
| zybo_z7_20_proc_sys_reset_0_0 | 1 |
| zybo_z7_20_m02_regslice_0 | 1 |
| zybo_z7_20_m01_regslice_0 | 1 |
| zybo_z7_20_m00_regslice_3 | 1 |
| zybo_z7_20_m00_regslice_2 | 1 |
| zybo_z7_20_m00_regslice_1 | 1 |
| zybo_z7_20_m00_regslice_0 | 1 |
| zybo_z7_20_m00_data_fifo_1 | 1 |
| zybo_z7_20_m00_data_fifo_0 | 1 |
| zybo_z7_20_dvi2rgb_1_0 | 1 |
| zybo_z7_20_dm_1_0 | 1 |
| zybo_z7_20_dm_0_0 | 1 |
| zybo_z7_20_clk_wiz_0_0 | 1 |
| zybo_z7_20_axis_subset_converter_out_0 | 1 |
| zybo_z7_20_axis_subset_converter_in_0 | 1 |
| zybo_z7_20_axis2sgdma_dm_1_0 | 1 |
| zybo_z7_20_axi_vdma_1_0 | 1 |
| zybo_z7_20_axi_vdma_0_0 | 1 |
| zybo_z7_20_axi_i2s_adi_0_0 | 1 |
| zybo_z7_20_axi_gpio_video_0 | 1 |
| zybo_z7_20_axi_gpio_sw_btn_0 | 1 |
| zybo_z7_20_axi_gpio_led_0 | 1 |
| zybo_z7_20_axi_gpio_eth_0 | 1 |
| zybo_z7_20_axi_dynclk_0_0 | 1 |
| zybo_z7_20_auto_us_df_1 | 1 |
| zybo_z7_20_auto_us_df_0 | 1 |
| zybo_z7_20_auto_ss_slid_0 | 1 |
| zybo_z7_20_auto_pc_4 | 1 |
| zybo_z7_20_auto_pc_3 | 1 |
| zybo_z7_20_auto_pc_2 | 1 |
| zybo_z7_20_auto_pc_1 | 1 |
| zybo_z7_20_auto_pc_0 | 1 |
+-----------------------------------------+------+
[INFO] sourcing bitbake
[INFO] generating plnxtool conf
[INFO] generating meta-plnx-generated layer
~/reVISION-Zybo-Z7-20/linux/Zybo-Z7-20/build/misc/plnx-generated ~/reVISION-Zybo-Z7-20/linux/Zybo-Z7-20
~/reVISION-Zybo-Z7-20/linux/Zybo-Z7-20
[INFO] generating machine configuration
[INFO] generating bbappends for project . This may take time !
~/reVISION-Zybo-Z7-20/linux/Zybo-Z7-20/build/misc/plnx-generated ~/reVISION-Zybo-Z7-20/linux/Zybo-Z7-20
~/reVISION-Zybo-Z7-20/linux/Zybo-Z7-20
[INFO] generating u-boot configuration files
[INFO] generating kernel configuration files
[INFO] generating kconfig for Rootfs
Generate rootfs kconfig
[INFO] oldconfig rootfs
[INFO] generating petalinux-user-image.bb
[INFO] building project
[INFO] sourcing bitbake
INFO: bitbake petalinux-user-image
Parsing recipes: 100% |##########################################| Time: 0:01:20
Parsing of 2473 .bb files complete (0 cached, 2473 parsed). 3266 targets, 226 skipped, 0 masked, 0 errors.
NOTE: Resolving any missing task queue dependencies
Initialising tasks: 100% |#######################################| Time: 0:00:09
Checking sstate mirror object availability: 100% |###############| Time: 0:01:21
NOTE: Executing SetScene Tasks
NOTE: Executing RunQueue Tasks
fsbl-2017.4+gitAUTOINC+77448ae629-r0 do_compile: NOTE: fsbl: compiling from external source tree /opt/pkg/petalinux/tools/hsm/data/embeddedsw
NOTE: Tasks Summary: Attempted 4788 tasks of which 3798 didn't need to be rerun and all succeeded.
INFO: Copying Images from deploy to images
INFO: Creating images/linux directory
NOTE: Failed to copy built images to tftp dir: /var/lib/tftpboot
[INFO] successfully built project
Xilinx Zynq UltraScale+ MPSoC ZU3EG SBVA484
Micron 2 GB (512M x32) LPDDR4 Memory
Delkin 16 GB MicroSD card + adapter
Pre-loaded with PetaLinux environment
Wi-Fi / Bluetooth
Mini DisplayPort (MiniDP or mDP)
1x USB 3.0 Type Micro-B upstream port
2x USB 3.0, 1x USB 2.0 Type A downstream ports
40-pin 96Boards Low-speed expansion header
60-pin 96Boards High speed expansion header
85mm x 54mm form factor
Linaro 96Boards Consumer Edition compatible
だけを生かしてC シミュレーションを行った。"curve_data_0_100.h"
hw_err_cnt = 12 sw_err_cnt = 20
hw_err_cnt = 5 sw_err_cnt = 20
を生かして、C シミュレーションを行った。#include "curve_data_2500_2600.h"
hw_err_cnt = 29 sw_err_cnt = 11
hw_err_cnt = 16 sw_err_cnt = 11
を生かして、C シミュレーションを行った。#include "curve_data_5000_5100.h"
hw_err_cnt = 34 sw_err_cnt = 15
hw_err_cnt = 44 sw_err_cnt = 15
だけを生かしてC シミュレーションを行った。#include "curve_data_0_100.h"
hw_err_cnt = 17 sw_err_cnt = 20
hw_err_cnt = 10 sw_err_cnt = 20
を生かして、C シミュレーションを行った。#include "curve_data_2500_2600.h"
hw_err_cnt = 33 sw_err_cnt = 11
hw_err_cnt = 16 sw_err_cnt = 11
を生かして、C シミュレーションを行った。#include "curve_data_5000_5100.h"
hw_err_cnt = 29 sw_err_cnt = 15
hw_err_cnt = 42 sw_err_cnt = 15
つまり、0 番目から 300 番目の白線画像でやってみた。結果はエラーになった。#include "curve_data_0_100.h"
なので、Linux のVivado HLS 2017.3 でやってみることにした。cc1plus.exe: out of memory allocating 16008 bytes
の内の hw_err_cnt がハードウェア化関数でのエラーの数を表す。sw_err_cnt が float で実装したソフトウェアのエラーの数を示す。今回はハードウェアのエラーが 8 個で、ソフトウェアのエラーが 20 個だった。思いがけなくソフトウェアのエラー数が多かった。hw_err_cnt = 8 sw_err_cnt = 20
の 2500 番目から 2800 番目の白線画像でやってみた。結果を示す。#include "curve_data_2500_2600.h"
hw_err_cnt = 29 sw_err_cnt = 11
で、ハードウェアが 14 個、ソフトウェアが 11 個間違った。hw_err_cnt = 14 sw_err_cnt = 11
で C シミュレーションを行った。結果を示す。#include "curve_data_5000_5100.h"
hw_err_cnt = 37 sw_err_cnt = 15
で、エラー数はハードウェアが 37 個、ソフトウェアが 15 個だった。やはり、ハードウェアのエラー数が多くなっている。hw_err_cnt = 46 sw_err_cnt = 15
をコメントアウトして、#define NUM_ITERATIONS 300
を生かした。#define NUM_ITERATIONS 2
ERROR: [XFORM 203-733] An internal stream 'outs_conv_layer.V.data.data0.V' (all_layers/all_layers.cpp:42) with default size is used in a non-dataflow region, which may result in deadlock. Please consider to resize the stream using the directive 'set_directive_stream' or the 'HLS stream' pragma.
// all_layers.h
// 2018/03/13 by marsee
//
#ifndef __ALL_LAYER_H__
#define __ALL_LAYER_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int W, int I, int U, int TI, int TD>
struct ap_fixed2_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float2_axis{
struct data {
float data0;
float data1;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float1_axis{
struct data {
float data0;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_OUTPUT_LAYER 3
typedef ap_uint<2> output_type;
typedef ap_fixed<12,7,AP_TRN,AP_WRAP> out_affine_type;
#endif
// all_layers.cpp
// 2018/03/12 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "all_layers.h"
int conv_layer(hls::stream<ap_axiu<32,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int relu(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int max_pooling(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed2_axis<16,6,1,1,1> >& outs);
int affine_layer1(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<19,7,1,1,1> >& outs);
int relu_affine1(hls::stream<ap_fixed1_axis<19,7,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<19,7,1,1,1> >& outs);
int affine_layer2(hls::stream<ap_fixed1_axis<19,7,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<12,7,1,1,1> >& outs);
int output_layer(hls::stream<ap_fixed1_axis<12,7,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]);
int all_layers(hls::stream<ap_axiu<32,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]){
#pragma HLS INTERFACE s_axilite port=output
#pragma HLS INTERFACE s_axilite port=dot2
#pragma HLS ARRAY_PARTITION variable=dot2 complete dim=1
#pragma HLS DATAFLOW
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outs_conv_layer;
//#pragma HLS STREAM variable=outs_conv_layer depth=312 dim=1
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outs_relu;
//#pragma HLS STREAM variable=outs_relu depth=312 dim=1
hls::stream<ap_fixed2_axis<16,6,1,1,1> > outs_max_pooling;
//#pragma HLS STREAM variable=outs_max_pooling depth=78 dim=1
hls::stream<ap_fixed1_axis<19,7,1,1,1> > outs_affine_layer1;
//#pragma HLS STREAM variable=outs_affine_layer1 depth=100 dim=1
hls::stream<ap_fixed1_axis<19,7,1,1,1> > outs_relu_affine1;
//#pragma HLS STREAM variable=outs_relu_affine1 depth=100 dim=1
hls::stream<ap_fixed1_axis<12,7,1,1,1> > outs_affine_layer2;
//#pragma HLS STREAM variable=outs_affine_layer2 depth=3 dim=1
conv_layer(ins, outs_conv_layer);
relu(outs_conv_layer, outs_relu);
max_pooling(outs_relu, outs_max_pooling);
affine_layer1(outs_max_pooling, outs_affine_layer1);
relu_affine1(outs_affine_layer1, outs_relu_affine1);
affine_layer2(outs_relu_affine1, outs_affine_layer2);
output_layer(outs_affine_layer2, output, dot2);
return(0);
}
// all_layers_tb.cpp
// 2018/03/14 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "all_layers.h"
#include "curve_data_0_100.h"
//#include "curve_data_2500_2600.h"
//#include "curve_data_5000_5100.h"
#define ALL_DATA_NUM 300
#define NUM_OF_KERNELS 2
#define COULMN_PIXELS 56
#define ROW_PIXELS 10
#define ALL_PIXELS 560
#define NUM_OF_OUTPUT 3
#define NUM_ITERATIONS 300 // C Simulation
//#define NUM_ITERATIONS 1 // C/RTL CoSimulation
int all_layers(hls::stream<ap_axiu<32,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]);
int all_layers_soft(hls::stream<ap_axiu<32,1,1,1> >& ins, output_type& output,
float dot2[NUMBER_OF_OUTPUT_LAYER]);
int main(){
using namespace std;
hls::stream<ap_axiu<32,1,1,1> > ins;
hls::stream<ap_axiu<32,1,1,1> > ins_soft;
output_type output, output_soft;
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER];
float dot2_soft[NUMBER_OF_OUTPUT_LAYER];
ap_axiu<32,1,1,1> pix;
int hw_err_cnt = 0;
int sw_err_cnt = 0;
for(int i=0; i<NUM_ITERATIONS; i++){
// ins に入力データを用意する
for(int m=0; m<5; m++){ // dummy data
pix.user = 0;
pix.data = ap_uint<32>(m);
ins << pix;
}
for(int y=0; y<ROW_PIXELS; y++){
for(int x=0; x<COULMN_PIXELS; x++){
// 1 画面分のデータを ins、ins_soft に入力する
pix.data = ap_uint<32>(t_train_256[i][y*COULMN_PIXELS+x]);
if (x==0 && y==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (x == COULMN_PIXELS-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
ins_soft << pix;
}
}
all_layers(ins, output, dot2);
all_layers_soft(ins_soft, output_soft, dot2_soft);
int t_test_num = 0;
for(int m=0; m<NUMBER_OF_OUTPUT_LAYER; m++){
if(t_test[i][m] == 1.0f){
t_test_num = m;
break;
}
}
// out と out_soft を比較する
/* cout << "output" << " = " << int(output) << " output_soft = " << int(output_soft) << endl; for(int j=0; j<NUMBER_OF_OUTPUT_LAYER; j++){ cout << "dot2[" << j << "] = " << float(dot2[j]) << " dot2_soft[" << j << "] = " << dot2_soft[j] << endl; } */
if(int(output) != t_test_num){
cout << "hw_error: i = " << i << " output = " << int(output) << " t_test_num = " << t_test_num << endl;
hw_err_cnt++;
//return(1);
}
if(int(output_soft) != t_test_num){
cout << "sw_error: i = "<< i << " output_soft = " << int(output_soft) << " t_test_num" " = " << t_test_num << endl;
sw_err_cnt++;
//return(1);
}
if(int(output) != t_test_num || int(output_soft) != t_test_num){
for(int j=0; j<NUMBER_OF_OUTPUT_LAYER; j++){
cout << "dot2[" << j << "] = " << fixed << setprecision(8) << float(dot2[j]) << " dot2_soft[" << j << "] = " << dot2_soft[j] << endl;
}
cout << endl;
}
}
cout << "hw_err_cnt = " << hw_err_cnt << " sw_err_cnt = " << sw_err_cnt << endl;
return(0);
}
// output_layer.h
// 2018/03/11 by marsee
// 2018/03/12 : 出力フォーマットを変更
//
#ifndef __OUTPUT_LAYER_H__
#define __OUTPUT_LAYER_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float1_axis{
struct data {
float data0;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_OUTPUT_LAYER 3
typedef struct {
ap_fixed<12,7,AP_TRN,AP_WRAP> data [NUMBER_OF_OUTPUT_LAYER];
} mdata_type;
typedef struct {
float data [NUMBER_OF_OUTPUT_LAYER];
} fmdata_type;
typedef ap_fixed<12,7,AP_TRN,AP_WRAP> out_affine_type;
typedef ap_uint<2> output_type;
#endif
// output_layer.cpp
// 2018/03/11 by marsee
// 2018/03/12 : 出力フォーマットを変更
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "output_layer.h"
int output_layer(hls::stream<ap_fixed1_axis<12,7,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]){
#pragma HLS ARRAY_PARTITION variable=dot2 complete dim=1
#pragma HLS INTERFACE s_axilite port=output
#pragma HLS INTERFACE s_axilite port=dot2
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
ap_fixed1_axis<12,7,1,1,1> stdata;
mdata_type af2;
int max_num;
out_affine_type max_val;
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
#pragma HLS PIPELINE II=1
if(i != 0) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
af2.data[i] = stdata.data.data0;
}
max_val = 0;
Loop3: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
#pragma HLS UNROLL
dot2[i] = af2.data[i];
if(i == 0){
max_val = af2.data[0];
max_num = 0;
} else if (max_val < af2.data[i]){
max_val = af2.data[i];
max_num = i;
}
}
output = output_type(max_num);
return(0);
}
// output_layer_tb.cpp
// 2018/03/12 by marsee
// 2018/03/12 : 出力フォーマットを変更
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "output_layer.h"
#include "affine_layer2_output.h"
int output_layer(hls::stream<ap_fixed1_axis<12,7,1,1,1> >& ins, output_type& output,
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER]);
int output_layer_soft(hls::stream<float1_axis<1,1,1> >& ins, output_type& output,
float dot2[NUMBER_OF_OUTPUT_LAYER]);
int main(){
using namespace std;
hls::stream<ap_fixed1_axis<12,7,1,1,1> > ins;
hls::stream<float1_axis<1,1,1> > ins_soft;
ap_fixed1_axis<12,7,1,1,1> pix;
float1_axis<1,1,1> fpix;
output_type out, out_soft;
out_affine_type dot2[NUMBER_OF_OUTPUT_LAYER];
float fdot2[NUMBER_OF_OUTPUT_LAYER];
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data.data0 = (out_affine_type)i;
ins << pix;
fpix.user = 0;
fpix.data.data0 = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int i=0; i < NUMBER_OF_OUTPUT_LAYER; i++){
pix.data.data0 = affine2_out[i];
fpix.data.data0 = affine2_fout[i];
if (i == 0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == NUMBER_OF_OUTPUT_LAYER-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
output_layer(ins, out, dot2);
output_layer_soft(ins_soft, out_soft, fdot2);
// out と out_soft を比較する
cout << "out" << " = " << int(out) << " out_soft" " = " << int(out_soft) << endl;
for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
cout << "dot2[" << i << "] = " << float(dot2[i]) << " fdot2[" << i << "] = " << fdot2[i] << endl;
}
if(out != out_soft){
cout << "error: out" << " = " << int(out) << " out_soft" " = " << int(out_soft) << endl;
//return(1);
}
return(0);
}
int output_layer_soft(hls::stream<float1_axis<1,1,1> >& ins, output_type& output,
float dot2[NUMBER_OF_OUTPUT_LAYER]){
float1_axis<1,1,1> stdata;
fmdata_type af2;
int max_num;
float max_val;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
if(i != 0) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
af2.data[i] = stdata.data.data0;
}
max_val = 0;
Loop3: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
dot2[i] = af2.data[i];
if(i == 0){
max_val = af2.data[0];
max_num = 0;
} else if (max_val < af2.data[i]){
max_val = af2.data[i];
max_num = i;
}
}
output = output_type(max_num);
return(0);
}
// output_layer.h
// 2018/03/11 by marsee
//
#ifndef __OUTPUT_LAYER_H__
#define __OUTPUT_LAYER_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float1_axis{
struct data {
float data0;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_OUTPUT_LAYER 3
typedef struct {
ap_fixed<12,7,AP_TRN,AP_WRAP> data [NUMBER_OF_OUTPUT_LAYER];
} mdata_type;
typedef struct {
float data [NUMBER_OF_OUTPUT_LAYER];
} fmdata_type;
typedef struct {
ap_uint<1> data [NUMBER_OF_OUTPUT_LAYER];
} out_data_type;
typedef ap_fixed<12,7,AP_TRN,AP_WRAP> out_affine_type;
typedef ap_uint<1> output_type;
#endif
// output_layer.cpp
// 2018/03/11 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "output_layer.h"
int output_layer(hls::stream<ap_fixed1_axis<12,7,1,1,1> >& ins,
out_data_type& output){
#pragma HLS DATA_PACK variable=output
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
ap_fixed1_axis<12,7,1,1,1> stdata;
mdata_type af2;
int max_num;
out_affine_type max_val;
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
#pragma HLS PIPELINE II=1
if(i != 0) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
af2.data[i] = stdata.data.data0;
}
max_val = 0;
Loop3: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
#pragma HLS UNROLL
if(i == 0){
max_val = af2.data[0];
max_num = 0;
} else if (max_val < af2.data[i]){
max_val = af2.data[i];
max_num = i;
}
}
Loop4: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
#pragma HLS UNROLL
if(i == max_num)
output.data[i] = output_type(1);
else
output.data[i] = output_type(0);
}
return(0);
}
// output_layer_tb.cpp
// 2018/03/12 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "output_layer.h"
#include "affine_layer2_output.h"
int output_layer(hls::stream<ap_fixed1_axis<12,7,1,1,1> >& ins,
out_data_type& output);
int output_layer_soft(hls::stream<float1_axis<1,1,1> >& ins,
out_data_type& output);
int main(){
using namespace std;
hls::stream<ap_fixed1_axis<12,7,1,1,1> > ins;
hls::stream<float1_axis<1,1,1> > ins_soft;
ap_fixed1_axis<12,7,1,1,1> pix;
float1_axis<1,1,1> fpix;
out_data_type out, out_soft;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data.data0 = (out_affine_type)i;
ins << pix;
fpix.user = 0;
fpix.data.data0 = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int i=0; i < NUMBER_OF_OUTPUT_LAYER; i++){
pix.data.data0 = affine2_out[i];
fpix.data.data0 = affine2_fout[i];
if (i == 0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == NUMBER_OF_OUTPUT_LAYER-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
output_layer(ins, out);
output_layer_soft(ins_soft, out_soft);
// out と out_soft を比較する
for(int i=0; i < NUMBER_OF_OUTPUT_LAYER; i++){
cout << "out" << "[" << i << "] = " << int(out.data[i]) << " out_soft" << "[" << i << "] = " << int(out_soft.data[i]) << endl;
if(out.data[i] != out_soft.data[i]){
cerr << "Error: out" << "[" << i << "] = " << int(out.data[i]) << " out_soft" << "[" << i << "] = " << int(out_soft.data[i]) << endl;
// return(1);
}
}
return(0);
}
int output_layer_soft(hls::stream<float1_axis<1,1,1> >& ins,
out_data_type& output){
float1_axis<1,1,1> stdata;
fmdata_type af2;
int max_num;
float max_val;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
if(i != 0) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
af2.data[i] = stdata.data.data0;
}
max_val = 0;
Loop3: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
if(i == 0){
max_val = af2.data[0];
max_num = 0;
} else if (max_val < af2.data[i]){
max_val = af2.data[i];
max_num = i;
}
}
Loop4: for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
if(i == max_num)
output.data[i] = output_type(1);
else
output.data[i] = output_type(0);
}
return(0);
}
// affine_layer2_output.h
// 2018/4/25 21:25:42 by marsee
//
#ifndef __AFFINE_LAYER2_OUTPUT_H__
#define __AFFINE_LAYER2_OUTPUT_H__
const float affine2_fout[3] = {
-1.87331771850586,
0.88871324062347,
-1.42323863506317
};
const ap_fixed<12,7,AP_TRN,AP_WRAP> affine2_out[3] = {
-3.87500000000000,
1.21875000000000,
-2.71875000000000
};
#endif
// affine_layer2_tb.cpp
// 2018/03/09 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "affine_layer2.h"
#include "relu_affine1_output.h"
#include "af2_weight.h"
#include "af2_bias.h"
int affine_layer2(hls::stream<ap_fixed1_axis<19,7,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<12,7,1,1,1> >& outs);
int affine_layer2_soft(hls::stream<float1_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed1_axis<19,7,1,1,1> > ins;
hls::stream<ap_fixed1_axis<12,7,1,1,1> > outs;
hls::stream<float1_axis<1,1,1> > ins_soft;
hls::stream<float1_axis<1,1,1> > outs_soft;
mdata_type dot;
fmdata_type fdot;
ap_fixed1_axis<19,7,1,1,1> pix;
float1_axis<1,1,1> fpix;
ap_fixed1_axis<12,7,1,1,1> pdata;
float1_axis<1,1,1> fpdata;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data.data0 = (affine_type)i;
ins << pix;
fpix.user = 0;
fpix.data.data0 = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int i=0; i < NUMBER_OF_MIDDLE_LAYER; i++){
pix.data.data0 = relu_affine1_out[i];
fpix.data.data0 = relu_affine1_fout[i];
if (i == 0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == NUMBER_OF_MIDDLE_LAYER-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
affine_layer2(ins, outs);
affine_layer2_soft(ins_soft, outs_soft);
// outs, outs_soft を dot[] と fdot[] に代入して比較する
for(int i=0; i<NUMBER_OF_OUTPUT_LAYER; i++){
outs >> pdata;
outs_soft >> fpdata;
dot.data[i] = pdata.data.data0;
fdot.data[i] = fpdata.data.data0;
printf("i = %d, HW = %f, SW = %f\n", i, (float)dot.data[i], fdot.data[i]);
if((double)pow((double)dot.data[i]-(double)fdot.data[i], (double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %d, HW = %f, SW = %f\n", i, (float)dot.data[i], fdot.data[i]);
//return(1);
}
}
// max_pooling の結果をヘッダファイルに出力
ofstream OH("affine_layer2_output.h");
OH << "// affine_layer2_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __AFFINE_LAYER2_OUTPUT_H__" << endl;
OH << "#define __AFFINE_LAYER2_OUTPUT_H__" << endl;
OH << endl;
OH << "const float affine2_fout[" << NUMBER_OF_OUTPUT_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_OUTPUT_LAYER ; i++){
OH << " " << fixed << setprecision(14) << fdot.data[i];
if (i == NUMBER_OF_OUTPUT_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "const ap_fixed<12,7,AP_TRN,AP_WRAP> affine2_out[" << NUMBER_OF_OUTPUT_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_OUTPUT_LAYER ; i++){
OH << " " << fixed << setprecision(14) << (float)dot.data[i];
if (i == NUMBER_OF_OUTPUT_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int affine_layer2_soft(hls::stream<float1_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs){
float1_axis<1,1,1> stdata;
float dot[NUMBER_OF_OUTPUT_LAYER];
float1_axis<1,1,1> outd;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for (int i=0; i<NUMBER_OF_MIDDLE_LAYER; i++){
if (i != 0) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
Loop4: for (int col=0; col<NUMBER_OF_OUTPUT_LAYER; col++){
if (i == 0) // 最初は 0 にクリアする
dot[col] = 0;
float dot_temp = stdata.data.data0 * af2_fweight[i][col];
dot[col] += dot_temp;
if (i == NUMBER_OF_MIDDLE_LAYER-1){ // 最後はバイアスを加算する
dot[col] += af2_fbias[col];
outd.data.data0 = dot[col];
if(col == 0)
outd.user = 1;
else
outd.user = 0;
if(col == NUMBER_OF_OUTPUT_LAYER-1)
outd.last = 1;
else
outd.last = 0;
outs << outd;
}
}
}
return(0);
}
i = 0, HW = -5.937500, SW = -6.891395
i = 1, HW = 2.625000, SW = 1.106318
i = 2, HW = -4.031250, SW = 1.138750
ERROR HW and SW results mismatch i = 2, HW = -4.031250, SW = 1.138750i = 0, HW = -3.875000, SW = -1.873318ERROR HW and SW results mismatch i = 0, HW = -3.875000, SW = -1.873318i = 1, HW = 1.218750, SW = 0.888713i = 2, HW = -2.718750, SW = -1.423239INFO: [SIM 1] CSim done with 0 errors.INFO: [SIM 3] *************** CSIM finish ***************
が終了しない。INFO: [COSIM 212-302] Starting C TB testing ...
// affine_layer2.h
// 2018/03/08 by marsee
//
#ifndef __AFFINE_LAYER2_H__
#define __AFFINE_LAYER2_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float1_axis{
struct data {
float data0;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_MIDDLE_LAYER 100
#define NUMBER_OF_OUTPUT_LAYER 3
typedef struct {
ap_fixed<12,7,AP_TRN,AP_WRAP> data [NUMBER_OF_OUTPUT_LAYER];
} mdata_type;
typedef struct {
float data [NUMBER_OF_OUTPUT_LAYER];
} fmdata_type;
typedef ap_fixed<12,7,AP_TRN,AP_WRAP> out_type;
#endif
// affine_layer2.cpp
// 2018/03/08 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "affine_layer2.h"
#include "af2_weight.h"
#include "af2_bias.h"
int affine_layer2(hls::stream<ap_fixed1_axis<19,7,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<12,7,1,1,1> >& outs){
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
ap_fixed1_axis<19,7,1,1,1> stdata;
out_type dot[NUMBER_OF_OUTPUT_LAYER];
ap_fixed1_axis<12,7,1,1,1> outd;
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for (int i=0; i<NUMBER_OF_MIDDLE_LAYER; i++){
#pragma HLS PIPELINE II=3
if (i != 0) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
Loop4: for (int col=0; col<NUMBER_OF_OUTPUT_LAYER; col++){
#pragma HLS PIPELINE II=1
if (i == 0) // 最初は 0 にクリアする
dot[col] = 0;
out_type dot_temp = stdata.data.data0 * af2_weight[i][col];
dot[col] += dot_temp;
if (i == NUMBER_OF_MIDDLE_LAYER-1){ // 最後はバイアスを加算する
dot[col] += af2_bias[col];
outd.data.data0 = dot[col];
if(col == 0)
outd.user = 1;
else
outd.user = 0;
if(col == NUMBER_OF_OUTPUT_LAYER-1)
outd.last = 1;
else
outd.last = 0;
outs << outd;
}
}
}
return(0);
}
// relu_affine1.h
// 2018/03/06 by marsee
//
#ifndef __RELU_AFFINE1_H__
#define __RELU_AFFINE1_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float1_axis{
struct data {
float data0;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_MIDDLE_LAYER 100
typedef struct {
ap_fixed<19,7,AP_TRN,AP_WRAP> data [NUMBER_OF_MIDDLE_LAYER];
} mdata_type;
typedef struct {
float data [NUMBER_OF_MIDDLE_LAYER];
} fmdata_type;
typedef ap_fixed<19,7,AP_TRN,AP_WRAP> affine_type;
#endif
// relu_affine1.cpp
// 2018/03/06 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "relu_affine1.h"
int relu_affine1(hls::stream<ap_fixed1_axis<19,7,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<19,7,1,1,1> >& outs){
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_fixed1_axis<19,7,1,1,1> af1;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> af1;
} while(af1.user == 0);
Loop1: for (int i=0; i<NUMBER_OF_MIDDLE_LAYER; i++){
#pragma HLS PIPELINE II=1
if (i != 0) // 最初の入力はすでに入力されている
ins >> af1; // AXI4-Stream からの入力
if (af1.data.data0 < affine_type(0.0)) // データが 0 以下だったら 0 にする
af1.data.data0 = affine_type(0.0);
outs << af1;
}
return(0);
}
// relu_affine1.cpp
// 2018/03/07 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "relu_affine1.h"
#include "affine_layer1_output.h"
int relu_affine1(hls::stream<ap_fixed1_axis<19,7,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<19,7,1,1,1> >& outs);
int relu_affine1_soft(hls::stream<float1_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed1_axis<19,7,1,1,1> > ins;
hls::stream<float1_axis<1,1,1> > ins_soft;
hls::stream<ap_fixed1_axis<19,7,1,1,1> > outs;
hls::stream<float1_axis<1,1,1> > outs_soft;
float relu_fout[100];
affine_type relu_out[100];
ap_fixed1_axis<19,7,1,1,1> pix;
float1_axis<1,1,1> fpix;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data.data0 = (affine_type)i;
ins << pix;
fpix.user = 0;
fpix.data.data0 = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int i=0; i < NUMBER_OF_MIDDLE_LAYER; i++){
pix.data.data0 = affine1_out[i];
fpix.data.data0 = affine1_fout[i];
if (i == 0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == NUMBER_OF_MIDDLE_LAYER-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
relu_affine1(ins, outs);
relu_affine1_soft(ins_soft, outs_soft);
// outs, outs_soft を relu_out[][], relu_fout[][] に出力する
for(int i=0; i < NUMBER_OF_MIDDLE_LAYER; i++){
outs >> pix;
outs_soft >> fpix;
relu_out[i] = pix.data.data0;
relu_fout[i] = fpix.data.data0;
printf("i = %d, HW = %f, SW = %f\n", i, (float)pix.data.data0, fpix.data.data0);
if ((double)pow((double)pix.data.data0-(double)fpix.data.data0,(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %d, HW = %f, SW = %f\n", i, (float)pix.data.data0, fpix.data.data0);
//return(1);
}
}
cout << "Success HW and SW results match" << endl;
cout << endl;
// ReLU の結果をヘッダファイルに出力
ofstream OH("relu_affine1_output.h");
OH << "// relu_affine1_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __RELU_AFFINE1_OUTPUT_H__" << endl;
OH << "#define __RELU_AFFINE1_OUTPUT_H__" << endl;
OH << endl;
OH << "const float relu_affine1_fout[" << NUMBER_OF_MIDDLE_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_MIDDLE_LAYER ; i++){
OH << " " << fixed << setprecision(14) << relu_fout[i];
if (i == NUMBER_OF_MIDDLE_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "const ap_fixed<19, 7, AP_TRN, AP_WRAP> relu_affine1_out[" << NUMBER_OF_MIDDLE_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_MIDDLE_LAYER ; i++){
OH << " " << fixed << setprecision(14) << (float)relu_out[i];
if (i == NUMBER_OF_MIDDLE_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int relu_affine1_soft(hls::stream<float1_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs){
float1_axis<1,1,1> af1;
do {
// user が 1になった時にフレームがスタートする
ins >> af1;
} while(af1.user == 0);
Loop1: for (int i=0; i<NUMBER_OF_MIDDLE_LAYER; i++){
if (i != 0) // 最初の入力はすでに入力されている
ins >> af1; // AXI4-Stream からの入力
if (af1.data.data0 < float(0.0)) // データが 0 以下だったら 0 にする
af1.data.data0 = float(0.0);
outs << af1;
}
return(0);
}
// affine_layer1_output.h
// 2018/4/25 20:40:49 by marsee
//
#ifndef __AFFINE_LAYER1_OUTPUT_H__
#define __AFFINE_LAYER1_OUTPUT_H__
const float affine1_fout[100] = {
-1.07044434547424,
2.02270197868347,
-1.05823743343353,
6.61475515365601,
-0.41641759872437,
-0.43186429142952,
-0.59285295009613,
1.61030292510986,
-0.86417144536972,
-1.58257913589478,
-1.58896744251251,
-1.18942570686340,
-1.29751002788544,
-0.29641613364220,
0.99740755558014,
-1.56636476516724,
1.15804195404053,
-0.32042244076729,
-1.29172587394714,
-1.32565450668335,
0.52028149366379,
-0.77872759103775,
-1.42185449600220,
-1.11953997612000,
-0.52264106273651,
-9.46636104583740,
1.24318540096283,
1.48950290679932,
-0.81046527624130,
-0.13629606366158,
-0.97734153270721,
-1.32893562316895,
-0.01425859332085,
1.30608248710632,
0.66073369979858,
-1.30899047851562,
1.64479529857635,
-8.37205600738525,
-0.93879777193069,
-0.23133431375027,
-0.70119810104370,
14.23405647277832,
-1.07003355026245,
-0.59557068347931,
-1.27445268630981,
-0.49398189783096,
13.72099971771240,
1.28452575206757,
0.00934629142284,
-0.96886688470840,
-0.59863859415054,
-1.30126535892487,
2.04380607604980,
-0.86902260780334,
3.03090858459473,
-1.28547608852386,
2.02088475227356,
-0.50959372520447,
2.04253935813904,
-0.87841814756393,
1.49199974536896,
3.59934496879578,
-0.50308769941330,
3.50621104240417,
-1.47892177104950,
-0.40191367268562,
-1.44002544879913,
2.27853059768677,
-1.31562995910645,
-2.41246962547302,
0.74652028083801,
3.59772849082947,
-0.15781980752945,
-0.73974257707596,
-1.69656479358673,
-0.83578699827194,
1.48410618305206,
-0.45701009035110,
-1.51373767852783,
-1.64361524581909,
-1.52793812751770,
0.05682089924812,
-0.87240159511566,
0.13656859099865,
0.39985269308090,
-1.70221209526062,
-1.72491621971130,
-0.40194845199585,
-0.78374582529068,
-0.47359892725945,
-1.49441266059875,
-0.55138188600540,
1.75030672550201,
2.03031945228577,
-0.60934787988663,
-0.40171664953232,
1.81010317802429,
1.34133112430573,
-0.33451518416405,
-0.36992445588112
};
const ap_fixed<19,7,AP_TRN,AP_WRAP> affine1_out[100] = {
-1.22485351562500,
2.79467773437500,
-1.23950195312500,
9.45532226562500,
-0.39111328125000,
-0.36645507812500,
-0.56665039062500,
2.11010742187500,
-0.92529296875000,
-1.86206054687500,
-1.90454101562500,
-1.38159179687500,
-1.51977539062500,
0.04516601562500,
1.44580078125000,
-1.82397460937500,
1.67358398437500,
0.02636718750000,
-1.50488281250000,
-1.54272460937500,
0.99658203125000,
-0.83862304687500,
-1.65795898437500,
-1.28344726562500,
-0.54541015625000,
-12.42919921875000,
2.00537109375000,
2.03759765625000,
-0.84692382812500,
0.27978515625000,
-1.04248046875000,
-1.56860351562500,
0.30810546875000,
1.99609375000000,
1.28930664062500,
-1.57373046875000,
2.31689453125000,
-10.94824218750000,
-1.01269531250000,
-0.13574218750000,
-0.75708007812500,
19.37792968750000,
-1.22167968750000,
-0.62182617187500,
-1.48803710937500,
-0.42578125000000,
18.65820312500000,
1.73413085937500,
0.23950195312500,
-1.07177734375000,
-0.56103515625000,
-1.52734375000000,
2.82861328125000,
-0.97363281250000,
4.35302734375000,
-1.52099609375000,
2.71801757812500,
-0.51782226562500,
2.75024414062500,
-0.91528320312500,
2.07055664062500,
5.08178710937500,
-0.45678710937500,
5.00292968750000,
-1.72045898437500,
-0.33129882812500,
-1.07739257812500,
3.32397460937500,
-1.51245117187500,
-2.93920898437500,
1.01025390625000,
5.14819335937500,
0.18505859375000,
-0.82324218750000,
-2.02026367187500,
-0.84716796875000,
2.06396484375000,
-0.40454101562500,
-1.78906250000000,
-1.94873046875000,
-1.20434570312500,
0.45068359375000,
-0.93212890625000,
0.40722656250000,
0.64453125000000,
-2.00488281250000,
-2.01147460937500,
-0.34863281250000,
-0.85351562500000,
-0.40234375000000,
-1.76367187500000,
-0.55029296875000,
2.29492187500000,
2.72192382812500,
-0.56152343750000,
-0.37280273437500,
2.56713867187500,
1.82861328125000,
-0.26318359375000,
-0.34667968750000
};
#endif
// affine_layer1_tb.cpp
// 2018/02/26 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "affine_layer1.h"
#include "max_pooling_output.h"
#include "af1_weight.h"
#include "af1_bias.h"
int affine_layer1(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<19,7,1,1,1> >& outs);
int affine_layer1_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs);
int main(){
using namespace std;
hls::stream<ap_fixed2_axis<16,6,1,1,1> > ins;
hls::stream<ap_fixed1_axis<19,7,1,1,1> > outs;
hls::stream<float2_axis<1,1,1> > ins_soft;
hls::stream<float1_axis<1,1,1> > outs_soft;
mdata_type dot;
fmdata_type fdot;
ap_fixed2_axis<16,6,1,1,1> pix;
float2_axis<1,1,1> fpix;
ap_fixed1_axis<19,7,1,1,1> pdata;
float1_axis<1,1,1> fpdata;
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data.data0 = (affine_type)i;
pix.data.data1 = (affine_type)i;
ins << pix;
fpix.user = 0;
fpix.data.data0 = (float)i;
fpix.data.data1 = (float)i;
ins_soft << fpix;
}
// 1 画面分のデータを ins、ins_soft に入力する
for(int j=0; j < V_PRE_LAYER_HIGHT; j++){
for(int i=0; i < H_PRE_LAYER_WIDTH; i++){
pix.data.data0 = mp_out[j*H_PRE_LAYER_WIDTH+i][0];
pix.data.data1 = mp_out[j*H_PRE_LAYER_WIDTH+i][1];
fpix.data.data0 = mp_fout[j*H_PRE_LAYER_WIDTH+i][0];
fpix.data.data1 = mp_fout[j*H_PRE_LAYER_WIDTH+i][1];
if (j==0 && i==0){ // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
fpix.user = 1;
} else {
pix.user = 0;
fpix.user = 0;
}
if (i == H_PRE_LAYER_WIDTH-1){ // 行の最後でTLASTをアサートする
pix.last = 1;
fpix.last = 1;
} else {
pix.last = 0;
fpix.last = 0;
}
ins << pix;
ins_soft << fpix;
}
}
affine_layer1(ins, outs);
affine_layer1_soft(ins_soft, outs_soft);
// outs, outs_soft を dot[] と fdot[] に代入して比較する
for(int i=0; i<NUMBER_OF_MIDDLE_LAYER; i++){
outs >> pdata;
outs_soft >> fpdata;
dot.data[i] = pdata.data.data0;
fdot.data[i] = fpdata.data.data0;
printf("i = %d, HW = %f, SW = %f\n", i, (float)dot.data[i], fdot.data[i]);
if((double)pow((double)dot.data[i]-(double)fdot.data[i], (double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %d, HW = %f, SW = %f\n", i, (float)dot.data[i], fdot.data[i]);
//return(1);
}
}
// max_pooling の結果をヘッダファイルに出力
ofstream OH("affine_layer1_output.h");
OH << "// affine_layer1_output.h" << endl;
time_t now = time(0);
struct tm* localNow = localtime(&now);
OH << "// " << localNow->tm_year+1900 << "/" << localNow->tm_mon+1 << "/" << localNow->tm_mday;
OH << " " << setw(2) << setfill('0') << localNow->tm_hour << ":" << localNow->tm_min << ":" << localNow->tm_sec << " by marsee" << endl;
OH << "//" << endl;
OH << endl;
OH << "#ifndef __AFFINE_LAYER1_OUTPUT_H__" << endl;
OH << "#define __AFFINE_LAYER1_OUTPUT_H__" << endl;
OH << endl;
OH << "const float affine1_fout[" << NUMBER_OF_MIDDLE_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_MIDDLE_LAYER ; i++){
OH << " " << fixed << setprecision(14) << fdot.data[i];
if (i == NUMBER_OF_MIDDLE_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "const ap_fixed<19,7,AP_TRN,AP_WRAP> affine1_out[" << NUMBER_OF_MIDDLE_LAYER << "] = {" << endl;
for (int i=0; i<NUMBER_OF_MIDDLE_LAYER ; i++){
OH << " " << fixed << setprecision(14) << (float)dot.data[i];
if (i == NUMBER_OF_MIDDLE_LAYER-1)
OH << endl;
else
OH << "," << endl;
}
OH << "};" << endl << endl;
OH << "#endif" << endl;
return(0);
}
int affine_layer1_soft(hls::stream<float2_axis<1,1,1> >& ins,
hls::stream<float1_axis<1,1,1> >& outs){
float2_axis<1,1,1> stdata;
float dot[100];
float1_axis<1,1,1> outd;
Loop1: do {
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for (int y=0; y<V_PRE_LAYER_HIGHT; y++){
Loop3: for (int x=0; x<H_PRE_LAYER_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
Loop4: for (int col=0; col<100; col++){
if (x==0 && y==0) // 最初は 0 にクリアする
dot[col] = 0;
dot[col] += stdata.data.data0 * af1_fweight[y*H_PRE_LAYER_WIDTH+x][col];
dot[col] += stdata.data.data1 * af1_fweight[V_PRE_LAYER_HIGHT*H_PRE_LAYER_WIDTH+y*H_PRE_LAYER_WIDTH+x][col];
if (y==V_PRE_LAYER_HIGHT-1 && x==H_PRE_LAYER_WIDTH-1){ // 最後はバイアスを加算する
dot[col] += af1_fbias[col];
outd.data.data0 = dot[col];
if(col == 0)
outd.user = 1;
else
outd.user = 0;
if(col == NUMBER_OF_MIDDLE_LAYER-1)
outd.last = 1;
else
outd.last = 0;
outs << outd;
}
}
}
}
return(0);
}
を dot_temp を使用して、こう書き直した。dot[col] += stdata.data.data0 * af1_fweight[y*H_PRE_LAYER_WIDTH+x][col];
dot[col] += stdata.data.data1 * af1_fweight[V_PRE_LAYER_HIGHT*H_PRE_LAYER_WIDTH+y*H_PRE_LAYER_WIDTH+x][col];
affine_type dot_temp = stdata.data.data0 * af1_weight[y*H_PRE_LAYER_WIDTH+x][col] +
stdata.data.data1 * af1_weight[V_PRE_LAYER_HIGHT*H_PRE_LAYER_WIDTH+y*H_PRE_LAYER_WIDTH+x][col];
dot[col] += dot_temp;
INFO: [SCHED 204-61] Pipelining result : Target II = 1, Final II = 1, Depth = 6.
WARNING: [SCHED 204-21] Estimated clock period (10.3415ns) exceeds the target (target clock period: 10ns, clock uncertainty: 1.25ns, effective delay budget: 8.75ns).
WARNING: [SCHED 204-21] The critical path consists of the following:
'mul' operation ('__Val2__', affine_layer1/affine_layer1.cpp:44) (3.36 ns)
'add' operation ('__Val2__', affine_layer1/affine_layer1.cpp:44) (3.02 ns)
'add' operation ('tmp_21', affine_layer1/affine_layer1.cpp:44) (0 ns)
'add' operation ('p_Val2_8_cast', affine_layer1/affine_layer1.cpp:49) (3.96 ns)
// affine_layer1.h
// 2018/02/25 by marsee
// 2018/03/04 : ap_fixed1_axis と float1_axis を追加
//
#ifndef __AFFINE_LAYER1_H__
#define __AFFINE_LAYER1_H__
#include <ap_fixed.h>
template<int W, int I, int U, int TI, int TD>
struct ap_fixed2_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
ap_fixed<W,I,AP_TRN,AP_WRAP> data1;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int W, int I, int U, int TI, int TD>
struct ap_fixed1_axis{
struct data {
ap_fixed<W,I,AP_TRN,AP_WRAP> data0;
} data;
ap_uint<(W+7)/8> keep;
ap_uint<(W+7)/8> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float2_axis{
struct data {
float data0;
float data1;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
template<int U, int TI, int TD>
struct float1_axis{
struct data {
float data0;
} data;
ap_uint<1> keep;
ap_uint<1> strb;
ap_uint<U> user;
ap_uint<1> last;
ap_uint<TI> id;
ap_uint<TD> dest;
};
#define NUMBER_OF_MIDDLE_LAYER 100
typedef struct {
ap_fixed<19,7,AP_TRN,AP_WRAP> data [NUMBER_OF_MIDDLE_LAYER];
} mdata_type;
typedef struct {
float data [NUMBER_OF_MIDDLE_LAYER];
} fmdata_type;
typedef ap_fixed<19,7,AP_TRN,AP_WRAP> affine_type;
#define V_PRE_LAYER_HIGHT 3
#define H_PRE_LAYER_WIDTH 26
#endif
// affine_layer1.cpp
// 2018/02/26 by marsee
//
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "affine_layer1.h"
#include "af1_weight.h"
#include "af1_bias.h"
int affine_layer1(hls::stream<ap_fixed2_axis<16,6,1,1,1> >& ins,
hls::stream<ap_fixed1_axis<19,7,1,1,1> >& outs){
//#pragma HLS ARRAY_PARTITION variable=af1_weight complete dim=1
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE axis register both port=ins
ap_fixed2_axis<16,6,1,1,1> stdata;
affine_type dot[100];
//#pragma HLS ARRAY_PARTITION variable=dot complete dim=1
ap_fixed1_axis<19,7,1,1,1> outd;
Loop1: do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> stdata;
} while(stdata.user == 0);
Loop2: for (int y=0; y<V_PRE_LAYER_HIGHT; y++){
Loop3: for (int x=0; x<H_PRE_LAYER_WIDTH; x++){
//#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> stdata; // AXI4-Stream からの入力
Loop4: for (int col=0; col<100; col++){
#pragma HLS PIPELINE II=1
if (x==0 && y==0) // 最初は 0 にクリアする
dot[col] = 0;
affine_type dot_temp = stdata.data.data0 * af1_weight[y*H_PRE_LAYER_WIDTH+x][col] +
stdata.data.data1 * af1_weight[V_PRE_LAYER_HIGHT*H_PRE_LAYER_WIDTH+y*H_PRE_LAYER_WIDTH+x][col];
dot[col] += dot_temp;
if (y==V_PRE_LAYER_HIGHT-1 && x==H_PRE_LAYER_WIDTH-1){ // 最後はバイアスを加算する
dot[col] += af1_bias[col];
outd.data.data0 = dot[col];
if(col == 0)
outd.user = 1;
else
outd.user = 0;
if(col == NUMBER_OF_MIDDLE_LAYER-1)
outd.last = 1;
else
outd.last = 0;
outs << outd;
}
}
}
}
return(0);
}
INFO: [SCHED 204-61] Pipelining result : Target II = 1, Final II = 1, Depth = 3.
WARNING: [SCHED 204-21] Estimated clock period (17.2156ns) exceeds the target (target clock period: 10ns, clock uncertainty: 1.25ns, effective delay budget: 8.75ns).
WARNING: [SCHED 204-21] The critical path consists of the following:
'load' operation ('dot_99_V_load_1') on local variable 'dot[99].V' (0 ns)
multiplexor before 'phi' operation ('dot_41_V_3') with incoming values : ('dot_99_V_145_load') (1.77 ns)
'phi' operation ('dot_41_V_3') with incoming values : ('dot_99_V_145_load') (0 ns)
'mux' operation ('__Val2__', affine_layer1/affine_layer1.cpp:38) (4.1 ns)
'add' operation ('__Val2__', affine_layer1/affine_layer1.cpp:43) (3.02 ns)
'add' operation ('__Val2__', affine_layer1/affine_layer1.cpp:44) (3.02 ns)
'add' operation ('p_Val2_8', affine_layer1/affine_layer1.cpp:47) (2.17 ns)
'select' operation ('dot[0].V', affine_layer1/affine_layer1.cpp:48) (1.37 ns)
multiplexor before 'phi' operation ('dot_99_V_5', affine_layer1/affine_layer1.cpp:48) with incoming values : ('dot_99_V_1_load') ('dot[0].V', affine_layer1/affine_layer1.cpp:44) ('dot_0_V_3_cast', affine_layer1/affine_layer1.cpp:48) (1.77 ns)
I
#pragma HLS ARRAY_PARTITION variable=dot complete dim=1
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | 1 | 2 | 3 |
4 | 5 | 6 | 7 | 8 | 9 | 10 |
11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | 19 | 20 | 21 | 22 | 23 | 24 |
25 | 26 | 27 | 28 | 29 | 30 | 31 |