enscript --highlight=cpp --color -C --tabsize=4 -w html -o output.html $1
Usage: enscript [OPTION]... [FILE]...
Mandatory arguments to long options are mandatory for short options too.
-# an alias for option -n, --copies
-1 same as --columns=1
-2 same as --columns=2
--columns=NUM specify the number of columns per page
-a, --pages=PAGES specify which pages are printed
-A, --file-align=ALIGN align separate input files to ALIGN
-b, --header=HEADER set page header
-B, --no-header no page headers
-c, --truncate-lines cut long lines (default is to wrap)
-C[START], --line-numbers[=START]
precede each line with its line number
-d an alias for option --printer
-D, --setpagedevice=KEY[:VALUE]
pass a page device definition to output
-e[CHAR], --escapes[=CHAR] enable special escape interpretation
-E[LANG], --highlight[=LANG] highlight source code
-f, --font=NAME use font NAME for body text
-F, --header-font=NAME use font NAME for header texts
-g, --print-anyway nothing (compatibility option)
-G same as --fancy-header
--fancy-header[=NAME] select fancy page header
-h, --no-job-header suppress the job header page
-H[NUM], --highlight-bars[=NUM] specify how high highlight bars are
-i, --indent=NUM set line indent to NUM characters
-I, --filter=CMD read input files through input filter CMD
-j, --borders print borders around columns
-J, an alias for option --title
-k, --page-prefeed enable page prefeed
-K, --no-page-prefeed disable page prefeed
-l, --lineprinter simulate lineprinter, this is an alias for:
--lines-per-page=66, --no-header, --portrait,
--columns=1
-L, --lines-per-page=NUM specify how many lines are printed on each page
-m, --mail send mail upon completion
-M, --media=NAME use output media NAME
-n, --copies=NUM print NUM copies of each page
-N, --newline=NL select the newline character. Possible
values for NL are: n (`\n') and r (`\r').
-o an alias for option --output
-O, --missing-characters list missing characters
-p, --output=FILE leave output to file FILE. If FILE is `-',
leave output to stdout.
-P, --printer=NAME print output to printer NAME
-q, --quiet, --silent be really quiet
-r, --landscape print in landscape mode
-R, --portrait print in portrait mode
-s, --baselineskip=NUM set baselineskip to NUM
-S, --statusdict=KEY[:VALUE]
pass a statusdict definition to the output
-t, --title=TITLE set banner page's job title to TITLE. Option
sets also the name of the input file stdin.
-T, --tabsize=NUM set tabulator size to NUM
-u[TEXT], --underlay[=TEXT] print TEXT under every page
-U, --nup=NUM print NUM logical pages on each output page
-v, --verbose tell what we are doing
-V, --version print version number
-w, --language=LANG set output language to LANG
-W, --options=APP,OPTION pass option OPTION to helper application APP
-X, --encoding=NAME use input encoding NAME
-z, --no-formfeed do not interpret form feed characters
-Z, --pass-through pass through PostScript and PCL files
without any modifications
Long-only options:
--color[=bool] create color outputs with states
--continuous-page-numbers count page numbers across input files. Don't
restart numbering at beginning of each file.
--download-font=NAME download font NAME
--extended-return-values enable extended return values
--filter-stdin=NAME specify how stdin is shown to the input filter
--footer=FOOTER set page footer
--h-column-height=HEIGHT set the horizontal column height to HEIGHT
--help print this help and exit
--help-highlight describe all supported --highlight languages
and file formats
--highlight-bar-gray=NUM print highlight bars with gray NUM (0 - 1)
--list-media list names of all known media
--margins=LEFT:RIGHT:TOP:BOTTOM
adjust page marginals
--mark-wrapped-lines[STYLE]
mark wrapped lines in the output with STYLE
--non-printable-format=FMT specify how non-printable chars are printed
--nup-columnwise layout pages in the N-up printing columnwise
--nup-xpad=NUM set the page x-padding of N-up printing to NUM
--nup-ypad=NUM set the page y-padding of N-up printing to NUM
--page-label-format=FMT set page label format to FMT
--ps-level=LEVEL set the PostScript language level that enscript
should use
--printer-options=OPTIONS pass extra options to the printer command
--rotate-even-pages rotate even-numbered pages 180 degrees
--slice=NUM print vertical slice NUM
--style=STYLE use highlight style STYLE
--swap-even-page-margins swap left and right side margins for each even
numbered page
--toc print table of contents
--ul-angle=ANGLE set underlay text's angle to ANGLE
--ul-font=NAME print underlays with font NAME
--ul-gray=NUM print underlays with gray value NUM
--ul-position=POS set underlay's starting position to POS
--ul-style=STYLE print underlays with style STYLE
--word-wrap wrap long lines from word boundaries
Report bugs to <bug-enscript@gnu.org>.
// straight_dataset_bmp.h
// 2017/07/24 by marsee
//
#ifndef __STRAIGHT_DATASET_BMP_H__
#define __STRAIGHT_DATASET_BMP_H__
#include "hls_video.h"
#define BMP_HEIGHT 600
#define BMP_WIDTH 800
#define REDUCTION_RATIO 0.075 // 1/13.3333... 60x45
#define DATASET_HEIGHT 56
#define DATASET_WIDTH 10
#define STRAIGHT_BMP_FILE_NAME straight
#define LEFT_TRUN_BMP_FILE_NAME left_turn
#define RIGHT_TRUN_BMP_FILE_NAME right_turn
#define STRAIGHT_NUM_OF_IMAGE 10
#define LEFT_TRUN_NUM_OF_IMAGE 10
#define RIGHT_TRUNNUM_OF_IMAGE 10
typedef hls::Scalar<3, unsigned char> RGB_PIXEL;
typedef hls::Mat<BMP_HEIGHT, BMP_WIDTH, HLS_8UC3> RGB_IMAGE;
typedef hls::Mat<BMP_HEIGHT, BMP_WIDTH, HLS_8UC1> GRAY_IMAGE;
#endif
// straight_dataset_bmp.cpp
// 2017/07/24 by marsee
//
#include <iostream>
#include "hls_opencv.h"
#include "straight_dataset_bmp.h"
int main(){
char straight_fn[256] = "straight";
char left_turn_fn[256] = "left_turn";
char right_turn_fn[256] = "right_turn";
char bmp_file[256];
int i=0;
// refereed to http://opencv.jp/cookbook/opencv_img.html
// straight
//for(int i=0; i<STRAIGHT_NUM_OF_IMAGE; i++){
sprintf(bmp_file, "straight%d.bmp", straight_fn, i);
cv::Mat straight_img = cv::imread(bmp_file,1);
if(straight_img.empty())
return(-1);
cv::Mat reduct_img(straight_img.rows*0.075, straight_img.cols*0.075, straight_img.type());
cv::resize(straight_img, reduct_img, reduct_img.size(), cv::INTER_CUBIC);
cv::Mat gray_img;
cv::cvtColor(straight_img, gray_img, CV_BGR2GRAY);
sprintf(bmp_file, "straight_RED%d.bmp", i);
cv::imwrite(bmp_file, gray_img);
//}
return(0);
}
Starting C simulation ...
/opt/Xilinx/Vivado_HLS/2017.2/bin/vivado_hls /home/masaaki/Vivado_HLS/straight_dataset_bmp/solution1/csim.tcl
INFO: [HLS 200-10] Running '/opt/Xilinx/Vivado_HLS/2017.2/bin/unwrapped/lnx64.o/vivado_hls'
INFO: [HLS 200-10] For user 'masaaki' on host 'masaaki-H110M4-M01' (Linux_x86_64 version 4.4.0-87-generic) on Fri Jul 28 05:06:19 JST 2017
INFO: [HLS 200-10] On os Ubuntu 16.04.2 LTS
INFO: [HLS 200-10] In directory '/home/masaaki/Vivado_HLS'
WARNING: [HLS 200-40] Environment variable 'C_INCLUDE_PATH' is set to :/usr/local/cuda/include.
INFO: [HLS 200-10] Opening project '/home/masaaki/Vivado_HLS/straight_dataset_bmp'.
INFO: [HLS 200-10] Opening solution '/home/masaaki/Vivado_HLS/straight_dataset_bmp/solution1'.
INFO: [SYN 201-201] Setting up clock 'default' with a period of 10ns.
INFO: [HLS 200-10] Setting target device to 'xc7z020clg400-1'
INFO: [SIM 211-2] *************** CSIM start ***************
INFO: [SIM 211-4] CSIM will launch GCC as the compiler.
Compiling ../../../straight_dataset_bmp.cpp in debug mode
Generating csim.exe
Makefile.rules:349: ターゲット 'csim.exe' のレシピで失敗しました
/usr/bin/ld: warning: libjpeg.so.62, needed by /opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so, not found (try using -rpath or -rpath-link)
/usr/bin/ld: warning: libtiff.so.3, needed by /opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so, not found (try using -rpath or -rpath-link)
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFReadEncodedStrip' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_read_scanlines' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_alloc_huff_table' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_set_defaults' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_write_scanlines' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_finish_compress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFReadEncodedTile' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFGetField' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFReadRGBATile' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFScanlineSize' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFSetField' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_set_quality' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFSetWarningHandler' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_resync_to_restart' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFIsTiled' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFWriteScanline' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_destroy_decompress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFRGBAImageOK' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFOpen' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_CreateDecompress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_read_header' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFReadRGBAStrip' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFSetErrorHandler' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_stdio_src' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_destroy_compress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_stdio_dest' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `TIFFClose' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_finish_decompress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_start_decompress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_start_compress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_CreateCompress' に対する定義されていない参照です
/opt/Xilinx/Vivado_HLS/2017.2/lnx64/tools/opencv/libopencv_highgui.so: `jpeg_std_error' に対する定義されていない参照です
collect2: ld returned 1 exit status
make: *** [csim.exe] エラー 1
ERROR: [SIM 211-100] CSim file generation failed: compilation error(s).
INFO: [SIM 211-3] *************** CSIM finish ***************
5
while executing
"source /home/masaaki/Vivado_HLS/straight_dataset_bmp/solution1/csim.tcl"
invoked from within
"hls::main /home/masaaki/Vivado_HLS/straight_dataset_bmp/solution1/csim.tcl"
("uplevel" body line 1)
invoked from within
"uplevel 1 hls::main {*}$args"
(procedure "hls_proc" line 5)
invoked from within
"hls_proc $argv"
Finished C simulation.
Libtiff is now configured for x86_64-unknown-linux-gnu
Installation directory: /usr/local
Documentation directory: ${prefix}/share/doc/tiff-3.9.7
C compiler: gcc -g -O2 -Wall -W
C++ compiler: g++ -g -O2
Enable runtime linker paths: no
Enable linker symbol versioning: no
Support Microsoft Document Imaging: yes
Support for internal codecs:
CCITT Group 3 & 4 algorithms: yes
Macintosh PackBits algorithm: yes
LZW algorithm: yes
ThunderScan 4-bit RLE algorithm: yes
NeXT 2-bit RLE algorithm: yes
LogLuv high dynamic range encoding: yes
Support for external codecs:
ZLIB support: yes
Pixar log-format algorithm: yes
JPEG support: yes
Old JPEG support: yes
ISO JBIG support: yes
C++ support: yes
OpenGL support: yes
-- General configuration for OpenCV 3.3.0-rc =====================================
-- Version control: 3.3.0-rc-159-g06407b4
--
-- Extra modules:
-- Location (extra): /home/masaaki/OpenCV/opencv_contrib/modules
-- Version control (extra): 3.3.0-rc-7-g067b0a6
--
-- Platform:
-- Timestamp: 2017-07-27T05:33:25Z
-- Host: Linux 4.4.0-87-generic x86_64
-- CMake: 3.3.2
-- CMake generator: Unix Makefiles
-- CMake build tool: /usr/bin/make
-- Configuration: RELEASE
--
-- CPU/HW features:
-- Baseline: SSE SSE2 SSE3
-- requested: SSE3
-- Dispatched code generation: SSE4_1 SSE4_2 FP16 AVX AVX2
-- requested: SSE4_1 SSE4_2 AVX FP16 AVX2
-- SSE4_1 (2 files): + SSSE3 SSE4_1
-- SSE4_2 (1 files): + SSSE3 SSE4_1 POPCNT SSE4_2
-- FP16 (1 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 AVX
-- AVX (5 files): + SSSE3 SSE4_1 POPCNT SSE4_2 AVX
-- AVX2 (7 files): + SSSE3 SSE4_1 POPCNT SSE4_2 FP16 FMA3 AVX AVX2
--
-- C/C++:
-- Built as dynamic libs?: YES
-- C++ Compiler: /usr/bin/c++ (ver 5.4.0)
-- C++ flags (Release): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wuninitialized -Winit-self -Wno-narrowing -Wno-delete-non-virtual-dtor -Wno-comment -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -msse -msse2 -msse3 -fvisibility=hidden -fvisibility-inlines-hidden -O3 -DNDEBUG -DNDEBUG
-- C++ flags (Debug): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wuninitialized -Winit-self -Wno-narrowing -Wno-delete-non-virtual-dtor -Wno-comment -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -msse -msse2 -msse3 -fvisibility=hidden -fvisibility-inlines-hidden -g -O0 -DDEBUG -D_DEBUG
-- C Compiler: /usr/bin/cc
-- C flags (Release): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wuninitialized -Winit-self -Wno-narrowing -Wno-comment -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -msse -msse2 -msse3 -fvisibility=hidden -O3 -DNDEBUG -DNDEBUG
-- C flags (Debug): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wuninitialized -Winit-self -Wno-narrowing -Wno-comment -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -ffunction-sections -msse -msse2 -msse3 -fvisibility=hidden -g -O0 -DDEBUG -D_DEBUG
-- Linker flags (Release):
-- Linker flags (Debug):
-- ccache: NO
-- Precompiled headers: YES
-- Extra dependencies: gtk-3 gdk-3 pangocairo-1.0 pango-1.0 atk-1.0 cairo-gobject cairo gdk_pixbuf-2.0 gio-2.0 gthread-2.0 /usr/lib/x86_64-linux-gnu/libpng.so /usr/lib/x86_64-linux-gnu/libz.so /usr/lib/x86_64-linux-gnu/libtiff.so /usr/lib/x86_64-linux-gnu/libjasper.so /usr/lib/x86_64-linux-gnu/libjpeg.so /usr/lib/x86_64-linux-gnu/libImath.so /usr/lib/x86_64-linux-gnu/libIlmImf.so /usr/lib/x86_64-linux-gnu/libIex.so /usr/lib/x86_64-linux-gnu/libHalf.so /usr/lib/x86_64-linux-gnu/libIlmThread.so gstbase-1.0 gstreamer-1.0 gobject-2.0 glib-2.0 gstvideo-1.0 gstapp-1.0 gstriff-1.0 gstpbutils-1.0 dc1394 avcodec-ffmpeg avformat-ffmpeg avutil-ffmpeg swscale-ffmpeg freetype harfbuzz dl m pthread rt cudart nppc nppi npps cufft -L/usr/local/cuda/lib64
-- 3rdparty dependencies:
--
-- OpenCV modules:
-- To be built: cudev core cudaarithm flann imgproc ml objdetect phase_unwrapping plot reg surface_matching video xphoto bgsegm cudabgsegm cudafilters cudaimgproc cudawarping dnn face freetype fuzzy img_hash imgcodecs photo shape videoio xobjdetect cudacodec highgui ts bioinspired dpm features2d line_descriptor saliency text calib3d ccalib cudafeatures2d cudalegacy cudaobjdetect cudaoptflow cudastereo datasets rgbd stereo structured_light superres tracking videostab xfeatures2d ximgproc aruco optflow stitching python2
-- Disabled: world contrib_world
-- Disabled by dependency: -
-- Unavailable: java python3 viz cnn_3dobj cvv dnn_modern hdf matlab sfm
--
-- GUI:
-- QT: NO
-- GTK+ 3.x: YES (ver 3.18.9)
-- GThread : YES (ver 2.48.2)
-- GtkGlExt: NO
-- OpenGL support: NO
-- VTK support: NO
--
-- Media I/O:
-- ZLib: /usr/lib/x86_64-linux-gnu/libz.so (ver 1.2.8)
-- JPEG: /usr/lib/x86_64-linux-gnu/libjpeg.so (ver )
-- WEBP: build (ver encoder: 0x020e)
-- PNG: /usr/lib/x86_64-linux-gnu/libpng.so (ver 1.2.54)
-- TIFF: /usr/lib/x86_64-linux-gnu/libtiff.so (ver 42 - 4.0.6)
-- JPEG 2000: /usr/lib/x86_64-linux-gnu/libjasper.so (ver 1.900.1)
-- OpenEXR: /usr/lib/x86_64-linux-gnu/libImath.so /usr/lib/x86_64-linux-gnu/libIlmImf.so /usr/lib/x86_64-linux-gnu/libIex.so /usr/lib/x86_64-linux-gnu/libHalf.so /usr/lib/x86_64-linux-gnu/libIlmThread.so (ver 2.2.0)
-- GDAL: NO
-- GDCM: NO
--
-- Video I/O:
-- DC1394 1.x: NO
-- DC1394 2.x: YES (ver 2.2.4)
-- FFMPEG: YES
-- avcodec: YES (ver 56.60.100)
-- avformat: YES (ver 56.40.101)
-- avutil: YES (ver 54.31.100)
-- swscale: YES (ver 3.1.101)
-- avresample: NO
-- GStreamer:
-- base: YES (ver 1.8.3)
-- video: YES (ver 1.8.3)
-- app: YES (ver 1.8.3)
-- riff: YES (ver 1.8.3)
-- pbutils: YES (ver 1.8.3)
-- OpenNI: NO
-- OpenNI PrimeSensor Modules: NO
-- OpenNI2: NO
-- PvAPI: NO
-- GigEVisionSDK: NO
-- Aravis SDK: NO
-- UniCap: NO
-- UniCap ucil: NO
-- V4L/V4L2: NO/YES
-- XIMEA: NO
-- Xine: NO
-- Intel Media SDK: NO
-- gPhoto2: NO
--
-- Parallel framework: pthreads
--
-- Trace: YES (with Intel ITT)
--
-- Other third-party libraries:
-- Use Intel IPP: 2017.0.2 [2017.0.2]
-- at: /home/masaaki/OpenCV/opencv/build/3rdparty/ippicv/ippicv_lnx
-- Use Intel IPP IW: prebuilt binaries (2017.0.2)
-- Use Intel IPP Async: NO
-- Use VA: NO
-- Use Intel VA-API/OpenCL: NO
-- Use Lapack: NO
-- Use Eigen: NO
-- Use Cuda: YES (ver 8.0)
-- Use OpenCL: YES
-- Use OpenVX: NO
-- Use custom HAL: NO
--
-- NVIDIA CUDA
-- Use CUFFT: YES
-- Use CUBLAS: NO
-- USE NVCUVID: NO
-- NVIDIA GPU arch: 20 30 35 37 50 52 60 61
-- NVIDIA PTX archs:
-- Use fast math: NO
--
-- OpenCL: <Dynamic loading of OpenCL library>
-- Include path: /home/masaaki/OpenCV/opencv/3rdparty/include/opencl/1.2
-- Use AMDFFT: NO
-- Use AMDBLAS: NO
--
-- Python 2:
-- Interpreter: /usr/bin/python2.7 (ver 2.7.12)
-- Libraries: /usr/lib/x86_64-linux-gnu/libpython2.7.so (ver 2.7.12)
-- numpy: /home/masaaki/.local/lib/python2.7/site-packages/numpy/core/include (ver 1.11.0)
-- packages path: lib/python2.7/dist-packages
--
-- Python 3:
-- Interpreter: /usr/bin/python3 (ver 3.5.2)
--
-- Python (for build): /usr/bin/python2.7
--
-- Java:
-- ant: NO
-- JNI: NO
-- Java wrappers: NO
-- Java tests: NO
--
-- Matlab: Matlab not found or implicitly disabled
--
-- Documentation:
-- Doxygen: NO
--
-- Tests and samples:
-- Tests: YES
-- Performance tests: YES
-- C/C++ Examples: NO
--
-- Install path: /usr/local
--
-- cvconfig.h is in: /home/masaaki/OpenCV/opencv/build
-- -----------------------------------------------------------------
--
-- Configuring done
-- Generating done
-- Build files have been written to: /home/masaaki/OpenCV/opencv/build
-- General configuration for OpenCV 3.1.0 =====================================
-- Version control: unknown
--
-- Platform:
-- Host: Linux 4.4.0-87-generic x86_64
-- CMake: 3.3.2
-- CMake generator: Unix Makefiles
-- CMake build tool: /usr/bin/make
-- Configuration: RELEASE
--
-- C/C++:
-- Built as dynamic libs?: YES
-- C++ Compiler: /usr/bin/c++ (ver 5.4.0)
-- C++ flags (Release): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wno-narrowing -Wno-delete-non-virtual-dtor -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -msse -msse2 -mno-avx -msse3 -mno-ssse3 -mno-sse4.1 -mno-sse4.2 -ffunction-sections -fvisibility=hidden -fvisibility-inlines-hidden -O3 -DNDEBUG -DNDEBUG
-- C++ flags (Debug): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wundef -Winit-self -Wpointer-arith -Wshadow -Wsign-promo -Wno-narrowing -Wno-delete-non-virtual-dtor -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -msse -msse2 -mno-avx -msse3 -mno-ssse3 -mno-sse4.1 -mno-sse4.2 -ffunction-sections -fvisibility=hidden -fvisibility-inlines-hidden -g -O0 -DDEBUG -D_DEBUG
-- C Compiler: /usr/bin/cc
-- C flags (Release): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wno-narrowing -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -msse -msse2 -mno-avx -msse3 -mno-ssse3 -mno-sse4.1 -mno-sse4.2 -ffunction-sections -fvisibility=hidden -O3 -DNDEBUG -DNDEBUG
-- C flags (Debug): -fsigned-char -W -Wall -Werror=return-type -Werror=non-virtual-dtor -Werror=address -Werror=sequence-point -Wformat -Werror=format-security -Wmissing-declarations -Wmissing-prototypes -Wstrict-prototypes -Wundef -Winit-self -Wpointer-arith -Wshadow -Wno-narrowing -fdiagnostics-show-option -Wno-long-long -pthread -fomit-frame-pointer -msse -msse2 -mno-avx -msse3 -mno-ssse3 -mno-sse4.1 -mno-sse4.2 -ffunction-sections -fvisibility=hidden -g -O0 -DDEBUG -D_DEBUG
-- Linker flags (Release):
-- Linker flags (Debug):
-- Precompiled headers: YES
-- Extra dependencies: Qt5::Core Qt5::Gui Qt5::Widgets Qt5::Test Qt5::Concurrent Qt5::OpenGL /usr/lib/x86_64-linux-gnu/libpng.so /usr/lib/x86_64-linux-gnu/libz.so /usr/lib/x86_64-linux-gnu/libtiff.so /usr/lib/x86_64-linux-gnu/libjasper.so /usr/lib/x86_64-linux-gnu/libjpeg.so /usr/lib/x86_64-linux-gnu/libImath.so /usr/lib/x86_64-linux-gnu/libIlmImf.so /usr/lib/x86_64-linux-gnu/libIex.so /usr/lib/x86_64-linux-gnu/libHalf.so /usr/lib/x86_64-linux-gnu/libIlmThread.so gstvideo-1.0 gstapp-1.0 gstbase-1.0 gstriff-1.0 gstpbutils-1.0 gstreamer-1.0 gobject-2.0 glib-2.0 dc1394 v4l1 v4l2 avcodec-ffmpeg avformat-ffmpeg avutil-ffmpeg swscale-ffmpeg dl m pthread rt /usr/lib/x86_64-linux-gnu/libGLU.so /usr/lib/x86_64-linux-gnu/libGL.so tbb atomic cudart nppc nppi npps cufft -L/usr/local/cuda/lib64
-- 3rdparty dependencies: libwebp
--
-- OpenCV modules:
-- To be built: cudev core cudaarithm flann imgproc ml video cudabgsegm cudafilters cudaimgproc cudawarping imgcodecs photo shape videoio cudacodec highgui objdetect ts features2d calib3d cudafeatures2d cudalegacy cudaobjdetect cudaoptflow cudastereo stitching superres videostab python2
-- Disabled: world
-- Disabled by dependency: -
-- Unavailable: java python3 viz
--
-- GUI:
-- QT 5.x: YES (ver 5.5.1)
-- QT OpenGL support: YES (Qt5::OpenGL 5.5.1)
-- OpenGL support: YES (/usr/lib/x86_64-linux-gnu/libGLU.so /usr/lib/x86_64-linux-gnu/libGL.so)
-- VTK support: NO
--
-- Media I/O:
-- ZLib: /usr/lib/x86_64-linux-gnu/libz.so (ver 1.2.8)
-- JPEG: /usr/lib/x86_64-linux-gnu/libjpeg.so (ver )
-- WEBP: build (ver 0.3.1)
-- PNG: /usr/lib/x86_64-linux-gnu/libpng.so (ver 1.2.54)
-- TIFF: /usr/lib/x86_64-linux-gnu/libtiff.so (ver 42 - 4.0.6)
-- JPEG 2000: /usr/lib/x86_64-linux-gnu/libjasper.so (ver 1.900.1)
-- OpenEXR: /usr/lib/x86_64-linux-gnu/libImath.so /usr/lib/x86_64-linux-gnu/libIlmImf.so /usr/lib/x86_64-linux-gnu/libIex.so /usr/lib/x86_64-linux-gnu/libHalf.so /usr/lib/x86_64-linux-gnu/libIlmThread.so (ver 2.2.0)
-- GDAL: NO
--
-- Video I/O:
-- DC1394 1.x: NO
-- DC1394 2.x: YES (ver 2.2.4)
-- FFMPEG: YES
-- codec: YES (ver 56.60.100)
-- format: YES (ver 56.40.101)
-- util: YES (ver 54.31.100)
-- swscale: YES (ver 3.1.101)
-- resample: NO
-- gentoo-style: YES
-- GStreamer:
-- base: YES (ver 1.8.3)
-- video: YES (ver 1.8.3)
-- app: YES (ver 1.8.3)
-- riff: YES (ver 1.8.3)
-- pbutils: YES (ver 1.8.3)
-- OpenNI: NO
-- OpenNI PrimeSensor Modules: NO
-- OpenNI2: NO
-- PvAPI: NO
-- GigEVisionSDK: NO
-- UniCap: NO
-- UniCap ucil: NO
-- V4L/V4L2: Using libv4l1 (ver 1.10.0) / libv4l2 (ver 1.10.0)
-- XIMEA: NO
-- Xine: NO
-- gPhoto2: NO
--
-- Parallel framework: TBB (ver 4.4 interface 9002)
--
-- Other third-party libraries:
-- Use IPP: 9.0.1 [9.0.1]
-- at: /home/masaaki/opencv-3.1.0/3rdparty/ippicv/unpack/ippicv_lnx
-- Use IPP Async: NO
-- Use VA: NO
-- Use Intel VA-API/OpenCL: NO
-- Use Eigen: NO
-- Use Cuda: YES (ver 8.0)
-- Use OpenCL: YES
-- Use custom HAL: NO
--
-- NVIDIA CUDA
-- Use CUFFT: YES
-- Use CUBLAS: NO
-- USE NVCUVID: NO
-- NVIDIA GPU arch: 20 21 30 35
-- NVIDIA PTX archs: 30
-- Use fast math: NO
--
-- OpenCL:
-- Version: dynamic
-- Include path: /home/masaaki/opencv-3.1.0/3rdparty/include/opencl/1.2
-- Use AMDFFT: NO
-- Use AMDBLAS: NO
--
-- Python 2:
-- Interpreter: /usr/bin/python2.7 (ver 2.7.12)
-- Libraries: /usr/lib/x86_64-linux-gnu/libpython2.7.so (ver 2.7.12)
-- numpy: /home/masaaki/.local/lib/python2.7/site-packages/numpy/core/include (ver 1.11.0)
-- packages path: lib/python2.7/dist-packages
--
-- Python 3:
-- Interpreter: /usr/bin/python3 (ver 3.5.2)
--
-- Python (for build): /usr/bin/python2.7
--
-- Java:
-- ant: NO
-- JNI: NO
-- Java wrappers: NO
-- Java tests: NO
--
-- Matlab: Matlab not found or implicitly disabled
--
-- Documentation:
-- Doxygen: NO
-- PlantUML: NO
--
-- Tests and samples:
-- Tests: YES
-- Performance tests: YES
-- C/C++ Examples: NO
--
-- Install path: /usr/local
--
-- cvconfig.h is in: /home/masaaki/opencv-3.1.0/build
-- -----------------------------------------------------------------
--
-- Configuring done
-- Generating done
-- Build files have been written to: /home/masaaki/opencv-3.1.0/build
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:120:54: error: ‘NppiGraphcutState’ has not been declared
typedef NppStatus (*init_func_t)(NppiSize oSize, NppiGraphcutState** ppStat
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:135:18: error: ‘NppiGraphcutState’ does not name a type
operator NppiGraphcutState*()
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:141:9: error: ‘NppiGraphcutState’ does not name a type
NppiGraphcutState* pState;
^
In file included from /home/masaaki/opencv-3.1.0/build/modules/cudalegacy/precomp.hpp:75:0:
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp: In constructor ‘{anonymous}::NppiGraphcutStateHandler::NppiGraphcutStateHandler(NppiSize, Npp8u*, {anonymous}::init_func_t)’:
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:127:39: error: ‘pState’ was not declared in this scope
nppSafeCall( func(sznpp, &pState, pDeviceMem) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp: In destructor ‘{anonymous}::NppiGraphcutStateHandler::~NppiGraphcutStateHandler()’:
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:132:43: error: ‘pState’ was not declared in this scope
nppSafeCall( nppiGraphcutFree(pState) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:132:49: error: ‘nppiGraphcutFree’ was not declared in this scope
nppSafeCall( nppiGraphcutFree(pState) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp: In function ‘void cv::cuda::graphcut(cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::Stream&)’:
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:174:51: error: ‘nppiGraphcutGetSize’ was not declared in this scope
nppSafeCall( nppiGraphcutGetSize(sznpp, &bufsz) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:182:61: error: ‘nppiGraphcutInitAlloc’ was not declared in this scope
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcutInitAll
^
In file included from /home/masaaki/opencv-3.1.0/build/modules/cudalegacy/precomp.hpp:75:0:
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:191:146: error: ‘nppiGraphcut_32s8u’ was not declared in this scope
nsp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:196:146: error: ‘nppiGraphcut_32f8u’ was not declared in this scope
nsp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp: In function ‘void cv::cuda::graphcut(cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::GpuMat&, cv::cuda::Stream&)’:
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:246:52: error: ‘nppiGraphcut8GetSize’ was not declared in this scope
nppSafeCall( nppiGraphcut8GetSize(sznpp, &bufsz) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:254:61: error: ‘nppiGraphcut8InitAlloc’ was not declared in this scope
NppiGraphcutStateHandler state(sznpp, buf.ptr<Npp8u>(), nppiGraphcut8InitAl
^
In file included from /home/masaaki/opencv-3.1.0/build/modules/cudalegacy/precomp.hpp:75:0:
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:267:146: error: ‘nppiGraphcut8_32s8u’ was not declared in this scope
nsp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
/home/masaaki/opencv-3.1.0/modules/cudalegacy/src/graphcuts.cpp:274:146: error: ‘nppiGraphcut8_32f8u’ was not declared in this scope
nsp.step), sznpp, labels.ptr<Npp8u>(), static_cast<int>(labels.step), state) );
^
/home/masaaki/opencv-3.1.0/modules/core/include/opencv2/core/private.cuda.hpp:165:52: note: in definition of macro ‘nppSafeCall’
#define nppSafeCall(expr) cv::cuda::checkNppError(expr, __FILE__, __LINE__, CV
^
modules/cudalegacy/CMakeFiles/opencv_cudalegacy.dir/build.make:362: ターゲット 'modules/cudalegacy/CMakeFiles/opencv_cudalegacy.dir/src/graphcuts.cpp.o' のレシピで失敗しました
make[2]: *** [modules/cudalegacy/CMakeFiles/opencv_cudalegacy.dir/src/graphcuts.cpp.o] エラー 1
CMakeFiles/Makefile2:9285: ターゲット 'modules/cudalegacy/CMakeFiles/opencv_cudalegacy.dir/all' のレシピで失敗しました
make[1]: *** [modules/cudalegacy/CMakeFiles/opencv_cudalegacy.dir/all] エラー 2
Makefile:160: ターゲット 'all' のレシピで失敗しました
make: *** [all] エラー 2
// Gabor_fiter_lh.cpp
// 2016/07/23 by marsee
// 2016/07/25 : 右白線検出用のGabor Filterを追加して、右左の白線を指定するRorL 引数を追加
// 2016/07/27 : 右白線検出用配列と左白線検出用配列を統合
//
#include <stdio.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "Gabor_filter_lh.h"
int conv_rgb2y(int rgb);
int Gabor_filter_lh(hls::stream<ap_axis<32,1,1,1> >& ins,
hls::stream<ap_axis<32,1,1,1> >& outs, ap_uint<1> & RorL){
#pragma HLS INTERFACE ap_none port=RorL
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_axis<32,1,1,1> pix;
ap_axis<32,1,1,1> gabor;
hls::LineBuffer<ARRAY_SIZE-1, HORIZONTAL_PIXEL_WIDTH, int> linebuf;
hls::Window<ARRAY_SIZE, ARRAY_SIZE, int> mbuf;
int gray_pix, val, i, j, x, y;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
for (y=0; y<VERTICAL_PIXEL_WIDTH; y++){
for (x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
mbuf.shift_pixels_left(); // mbuf の列を1ビット左シフト
for(i=0; i<ARRAY_SIZE-1; i++){
mbuf.insert_pixel(linebuf.getval(i,x), i, ARRAY_SIZE-1);
}
gray_pix = conv_rgb2y(pix.data);
mbuf.insert_pixel(gray_pix, ARRAY_SIZE-1, ARRAY_SIZE-1);
// LineBuffer の更新
linebuf.shift_pixels_up(x);
linebuf.insert_bottom_row(gray_pix, x);
// Gabor filter の演算
for (j=0, val=0; j<ARRAY_SIZE-1; j++){
for (i=0; i<ARRAY_SIZE-1; i++){
val += gabor_weight[(int)RorL][j][i] * mbuf(j,i);
}
}
val = val/256; // 256倍してあるので、1/256して戻す
if (val<0)
//val = -val; // 絶対値
val = 0; // マイナスの値を0に丸める
else if (val>255)
val = 255;
// Gabor filter・データの書き込み
gabor.data = (val<<16)+(val<<8)+val;
// 最初のARRAY_SIZE-1行とその他の行の最初のARRAY_SIZE-1列は無効データなので0とする
if (x<(ARRAY_SIZE-1) || y<(ARRAY_SIZE-1))
gabor.data = 0;
if (x==0 && y==0) // 最初のデータでは、TUSERをアサートする
gabor.user = 1;
else
gabor.user = 0;
if (x == (HORIZONTAL_PIXEL_WIDTH-1)) // 行の最後で TLAST をアサートする
gabor.last = 1;
else
gabor.last = 0;
outs << gabor; // AXI4-Stream へ出力
}
}
return(0);
}
// RGBからYへの変換
// RGBのフォーマットは、{8'd0, R(8bits), G(8bits), B(8bits)}, 1pixel = 32bits
// 輝度信号Yのみに変換する。変換式は、Y = 0.299R + 0.587G + 0.114B
// "YUVフォーマット及び YUV<->RGB変換"を参考にした。http://vision.kuee.kyoto-u.ac.jp/~hiroaki/firewire/yuv.html
// 2013/09/27 : float を止めて、すべてint にした
int conv_rgb2y(int rgb){
int r, g, b, y_f;
int y;
b = rgb & 0xff;
g = (rgb>>8) & 0xff;
r = (rgb>>16) & 0xff;
y_f = 77*r + 150*g + 29*b; //y_f = 0.299*r + 0.587*g + 0.114*b;の係数に256倍した
y = y_f >> 8; // 256で割る
return(y);
}
// Gabor_filter_lh_tb.cpp
// 2016/07/24 by marsee
// 2016/07/25 : 右白線検出用のGabor Filterを追加して、右左の白線を指定するRorL 引数を追加
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <math.h>
#include <ap_axi_sdata.h>
#include <hls_video.h>
#include "Gabor_filter_lh.h"
#include "bmp_header.h"
int Gabor_filter_lh(hls::stream<ap_axis<32,1,1,1> >& ins, hls::stream<ap_axis<32,1,1,1> >& outs, ap_uint<1> & RorL);
int conv_rgb2y_soft(int rgb);
int Gabor_filter_lh_soft(hls::stream<ap_axis<32,1,1,1> >& ins, hls::stream<ap_axis<32,1,1,1> >& outs, ap_uint<1> & RorL);
#define CLOCK_PERIOD 10
#define RIGHT_OR_LEFT LEFT_WEIGHT
#define BMP_FILE_NAME "bmp_f_0823_0.bmp"
int main()
{
using namespace std;
hls::stream<ap_axis<32,1,1,1> > ins;
hls::stream<ap_axis<32,1,1,1> > ins_soft;
hls::stream<ap_axis<32,1,1,1> > outs;
hls::stream<ap_axis<32,1,1,1> > outs_soft;
ap_axis<32,1,1,1> pix;
ap_axis<32,1,1,1> vals;
ap_axis<32,1,1,1> vals_soft;
int m_seq = 1; // M系列の値
int i;
int xor_shift;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpw, *fbmpwf;
int *rd_bmp, *hw_gabor, *sw_gabor;
int blue, green, red;
ap_uint<1> r_l;
if ((fbmpr = fopen(BMP_FILE_NAME, "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open test.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(char), 2, fbmpr);
fread(&bmpfhr.bfSize, sizeof(long), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
if ((hw_gabor =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate hw_gabor memory\n");
exit(1);
}
if ((sw_gabor =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate hw_gabor memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data = i;
ins << pix;
}
for(int j=0; j < bmpihr.biHeight; j++){
for(i=0; i < bmpihr.biWidth; i++){
pix.data = (ap_int<32>)rd_bmp[(j*bmpihr.biWidth)+i];
if (j==0 && i==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (i == bmpihr.biWidth-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
ins_soft << pix;
}
}
r_l = (ap_uint<1>)RIGHT_OR_LEFT;
Gabor_filter_lh(ins, outs, r_l);
Gabor_filter_lh_soft(ins_soft, outs_soft, r_l);
// ハードウェアとソフトウェアのラプラシアン・フィルタの値のチェック
cout << endl;
cout << "outs" << endl;
for(int j=0; j < bmpihr.biHeight; j++){
for(i=0; i < bmpihr.biWidth; i++){
outs >> vals;
outs_soft >> vals_soft;
ap_int<32> val = vals.data;
ap_int<32> val_soft = vals_soft.data;
hw_gabor[(j*bmpihr.biWidth)+i] = (int)val;
sw_gabor[(j*bmpihr.biWidth)+i] = (int)val_soft;
if ((double)pow((double)(val&0xff)-(val_soft&0xff),(double)2) > 4){ // 2乗誤差が4よりも大きい
printf("ERROR HW and SW results mismatch i = %ld, j = %ld, HW = %08x, SW = %08x\n", i, j, (int)val, (int)val_soft);
//return(1);
}
//if (vals.last)
//cout << "AXI-Stream is end" << endl;
}
}
cout << "Success HW and SW results match" << endl;
cout << endl;
// ハードウェアのラプラシアンフィルタの結果を temp_gabor.bmp へ出力する
if ((fbmpw=fopen("temp_gabor.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_gabor.bmp by binary write mode\n");
exit(1);
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = hw_gabor[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = (hw_gabor[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] >> 8) & 0xff;
red = (hw_gabor[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]>>16) & 0xff;
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
// ソフトウェアのラプラシアンフィルタの結果を temp_gabor_float.bmp へ出力する
if ((fbmpwf=fopen("temp_gabor_float.bmp", "wb")) == NULL){
fprintf(stderr, "Can't open temp_gabor_float.bmp by binary write mode\n");
exit(1);
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpwf);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpwf);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpwf);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpwf);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpwf);
// RGB データの書き込み、逆順にする
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = sw_gabor[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = (sw_gabor[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] >> 8) & 0xff;
red = (sw_gabor[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]>>16) & 0xff;
fputc(blue, fbmpwf);
fputc(green, fbmpwf);
fputc(red, fbmpwf);
}
}
fclose(fbmpwf);
free(rd_bmp);
free(hw_gabor);
return 0;
}
int Gabor_filter_lh_soft(hls::stream<ap_axis<32,1,1,1> >& ins, hls::stream<ap_axis<32,1,1,1> >& outs, ap_uint<1> & RorL){
ap_axis<32,1,1,1> pix;
ap_axis<32,1,1,1> gabor;
hls::LineBuffer<ARRAY_SIZE-1, HORIZONTAL_PIXEL_WIDTH, int> linebuf;
hls::Window<ARRAY_SIZE, ARRAY_SIZE, int> mbuf;
int gray_pix, val, i, j, x, y;
float valf;
do { // user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
for (y=0; y<VERTICAL_PIXEL_WIDTH; y++){
for (x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
mbuf.shift_left(); // mbuf の列を1ビット左シフト
for(i=ARRAY_SIZE-2; i>=0; --i){
mbuf.insert(linebuf(i,x), i+1, ARRAY_SIZE-1);
}
gray_pix = conv_rgb2y_soft(pix.data);
mbuf.insert(gray_pix, 0, ARRAY_SIZE-1);
// LineBuffer の更新
linebuf.shift_down(x);
linebuf.insert_bottom(gray_pix, x);
// Gabor filter の演算
for (j=0, valf=0; j<ARRAY_SIZE-1; j++){
for (i=0; i<ARRAY_SIZE-1; i++){
valf += gabor_fweight[(int)RorL][j][i] * (float)mbuf(ARRAY_SIZE-1-j,i);
}
}
val = (int)valf;
if (val<0)
//val = -val; // 絶対値
val = 0; // マイナスの値を0に丸める
else if (val>255)
val = 255;
// Gabor filter・データの書き込み
gabor.data = (val<<16)+(val<<8)+val;
// 最初のARRAY_SIZE-1行とその他の行の最初のARRAY_SIZE-1列は無効データなので0とする
if (x<(ARRAY_SIZE-1) || y<(ARRAY_SIZE-1))
gabor.data = 0;
if (x==0 && y==0) // 最初のデータでは、TUSERをアサートする
gabor.user = 1;
else
gabor.user = 0;
if (x == (HORIZONTAL_PIXEL_WIDTH-1)) // 行の最後で TLAST をアサートする
gabor.last = 1;
else
gabor.last = 0;
outs << gabor; // AXI4-Stream へ出力
}
}
return(0);
}
// RGBからYへの変換
// RGBのフォーマットは、{8'd0, R(8bits), G(8bits), B(8bits)}, 1pixel = 32bits
// 輝度信号Yのみに変換する。変換式は、Y = 0.299R + 0.587G + 0.114B
// "YUVフォーマット及び YUV<->RGB変換"を参考にした。http://vision.kuee.kyoto-u.ac.jp/~hiroaki/firewire/yuv.html
// 2013/09/27 : float を止めて、すべてint にした
int conv_rgb2y_soft(int rgb){
int r, g, b, y_f;
int y;
b = rgb & 0xff;
g = (rgb>>8) & 0xff;
r = (rgb>>16) & 0xff;
y_f = 77*r + 150*g + 29*b; //y_f = 0.299*r + 0.587*g + 0.114*b;の係数に256倍した
y = y_f >> 8; // 256で割る
return(y);
}
mbuf.shift_left(); // mbuf の列を1ビット左シフト
for(i=ARRAY_SIZE-2; i>=0; --i){
mbuf.insert(linebuf(i,x), i+1, ARRAY_SIZE-1);
}
gray_pix = conv_rgb2y_soft(pix.data);
mbuf.insert(gray_pix, 0, ARRAY_SIZE-1);
• shift_pixels_up()
• shift_pixels_down()
• shift_pixels_left()
• shift_pixels_right()
• insert_pixel(value,row,colum)
• insert_row()
• insert_bottom_row()
• insert_top_row()
• insert_col()
• insert_left_col()
• insert_right_col()
• getval(row, column)
mbuf.shift_pixels_left(); // mbuf の列を1ビット左シフト
for(i=0; i<ARRAY_SIZE-1; i++){
mbuf.insert_pixel(linebuf.getval(i,x), i, ARRAY_SIZE-1);
}
gray_pix = conv_rgb2y(pix.data);
mbuf.insert_pixel(gray_pix, ARRAY_SIZE-1, ARRAY_SIZE-1);
• shift_pixels_up()
• shift_pixels_down()
• insert_bottom_row()
• insert_top_row()
• getval(row,column)
./devtov pynq_mnist_cnn
./clock_settings.sh
./fpgamag pynq_fastx_wrapper.bit
#Makefile
# Referred to http://www.ie.u-ryukyu.ac.jp/~e085739/c.makefile.tuts.html
PROGRAM = pynq_mnist_cnn
OBJS = pynq_mnist_cnn.o xmnist_conv_nn.o xmnist_conv_nn_linux.o xsquare_frame_gen.o xsquare_frame_gen_linux.o
CC = gcc
CFLAGS = -Wall -O2
.SUFFIXES: .c .o
.PHONY: all
all: pynq_mnist_cnn
pynq_mnist_cnn: $(OBJS)
$(CC) -Wall -o $@ $(OBJS)
.c.o:
$(CC) $(CFLAGS) -c $<
.PHONY: clean
clean:
$(RM) $(PROGRAM) $(OBJS)
//
// pynq_mnist_cnn.c
// 2017/07/17 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <termios.h>
#include <sys/time.h>
#include "xmnist_conv_nn.h"
#include "xsquare_frame_gen.h"
#include "af1_bias_float.h"
#include "af1_weight_float.h"
#include "af2_bias_float.h"
#include "af2_weight_float.h"
#include "conv1_bias_float.h"
#include "conv1_weight_float.h"
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
#define PIXEL_NUM_OF_BYTES 4
#define ALL_DISP_ADDRESS (HORIZONTAL_PIXELS*VERTICAL_LINES*PIXEL_NUM_OF_BYTES)
int max_int(int out[10]);
int max_float(float out[10]);
int mnist_conv_nn_float(int in[22400], int addr_offset, float out[10]);
float conv_rgb2y_soft(int rgb);
int main()
{
int fd0, fd3, fd4;
int fd_udmabuf, fd_paddr;
volatile unsigned int *axis_switch_0, *axis_switch_1;
volatile unsigned int *axi_gpio_0;
volatile unsigned int *frame_buffer;
unsigned char attr[1024];
unsigned long phys_addr;
struct termios save_settings;
struct termios settings;
int xval, yval;
int inbyte_in;
int result_disp = 0;
unsigned int conv_addr;
int max_id_float;
struct timeval start_time, end_time;
XMnist_conv_nn mcnn;
XSquare_frame_gen sf_gen;
int i, res;
int max_id;
int result[10];
float result_float[10];
// Reffered to http://d.hatena.ne.jp/mFumi/20101002/1286003738
tcgetattr(0,&save_settings);
settings = save_settings;
settings.c_lflag &= ~(ECHO|ICANON);
settings.c_cc[VTIME] = 0;
settings.c_cc[VMIN] = 1;
tcsetattr(0,TCSANOW,&settings);
fcntl(0,F_SETFL,O_NONBLOCK);
// axi_gpio_0 (uio0)
fd0 = open("/dev/uio0", O_RDWR); // axi_iic_0
if (fd0 < 1){
fprintf(stderr, "/dev/uio0 (axi_gpio_0) open errorn");
exit(-1);
}
axi_gpio_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0);
if (axi_gpio_0 == MAP_FAILED){
fprintf(stderr, "axi_gpio_0 mmap errorn");
exit(-1);
}
// axis_switch_0 (uio3)
fd3 = open("/dev/uio3", O_RDWR); // axis_switch_0
if (fd3 < 1){
fprintf(stderr, "/dev/uio3 (axis_switch_0) open errorn");
exit(-1);
}
axis_switch_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd3, 0);
if (axis_switch_0 == MAP_FAILED){
fprintf(stderr, "axis_switch_0 mmap errorn");
exit(-1);
}
// axis_switch_1 (uio4)
fd4 = open("/dev/uio4", O_RDWR); // axis_switch_1
if (fd4 < 1){
fprintf(stderr, "/dev/uio4 (axis_switch_1) open errorn");
exit(-1);
}
axis_switch_1 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd4, 0);
if (axis_switch_1 == MAP_FAILED){
fprintf(stderr, "axis_switch_1 mmap errorn");
exit(-1);
}
// udmabuf4
fd_udmabuf = open("/dev/udmabuf4", O_RDWR | O_SYNC); // frame_buffer, The chache is disabled.
if (fd_udmabuf == -1){
fprintf(stderr, "/dev/udmabuf4 open errorn");
exit(-1);
}
frame_buffer = (volatile unsigned int *)mmap(NULL, (ALL_DISP_ADDRESS*3), PROT_READ|PROT_WRITE, MAP_SHARED, fd_udmabuf, 0);
if (frame_buffer == MAP_FAILED){
fprintf(stderr, "frame_buffer mmap errorn");
exit(-1);
}
// phys_addr of udmabuf4
fd_paddr = open("/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr", O_RDONLY);
if (fd_paddr == -1){
fprintf(stderr, "/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr open errorn");
exit(-1);
}
read(fd_paddr, attr, 1024);
sscanf((const char *)attr, "%lx", &phys_addr);
close(fd_paddr);
printf("phys_addr = %x\n", (unsigned int)phys_addr);
// Mnist_conv_nn, Square_frame_gen Initialize
if (XMnist_conv_nn_Initialize(&mcnn, "mnist_conv_nn_0") != XST_SUCCESS){
fprintf(stderr,"mnist_conv_nn_0 open error\n");
exit(-1);
}
if (XSquare_frame_gen_Initialize(&sf_gen, "square_frame_gen_0") != XST_SUCCESS){
fprintf(stderr,"square_frame_gen_0 open error\n");
exit(-1);
}
// square_frame_gen initialize
XSquare_frame_gen_Set_x_pos(&sf_gen, HORIZONTAL_PIXELS/2);
xval = HORIZONTAL_PIXELS/2;
XSquare_frame_gen_Set_y_pos(&sf_gen, VERTICAL_LINES/2);
yval = VERTICAL_LINES/2;
XSquare_frame_gen_Set_width(&sf_gen, 28);
XSquare_frame_gen_Set_height(&sf_gen, 28);
XSquare_frame_gen_Set_off_on(&sf_gen, 1); // on
// XSquare_frame_gen start
XSquare_frame_gen_DisableAutoRestart(&sf_gen);
while(!XSquare_frame_gen_IsIdle(&sf_gen)) ;
XSquare_frame_gen_Start(&sf_gen);
XSquare_frame_gen_EnableAutoRestart(&sf_gen);
// mnist_conv_nn initialize
XMnist_conv_nn_Set_addr_offset(&mcnn, HORIZONTAL_PIXELS/2);
XMnist_conv_nn_Set_in_r(&mcnn, (unsigned int)phys_addr+HORIZONTAL_PIXELS*(VERTICAL_LINES/2)*sizeof(int));
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_1[16] = 0x80000000; // 0x40 = 0x80000000, disable
axis_switch_1[17] = 0x0; // 0x44 = 0
axis_switch_1[0] = 0x2; // Comit registers
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_0[16] = 0x1; // 0x40 = 1;
axis_switch_0[0] = 0x2; // Comit registers
axi_gpio_0[0] = 0; // LED 0 clear
printf("mnist_conv_nn_test, <h> : left, <k> : up, <j> : down, <l> : right, <r> : result, <d> : help, <q> : exit\n");
while(1){ // main loop
inbyte_in = getchar(); fflush(stdin);
if (inbyte_in != EOF){
if (inbyte_in == 'q')
break;
else if (inbyte_in < 'A' || inbyte_in > 'z')
usleep(10000);
}
switch(inbyte_in) {
case 'h' : // left
case 'H' : // left -5
if(inbyte_in == 'h' && xval > 0)
--xval;
else if(inbyte_in == 'H' && xval >= 5)
xval -= 5;
XSquare_frame_gen_Set_x_pos(&sf_gen, xval);
XMnist_conv_nn_Set_addr_offset(&mcnn, xval);
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'l' : // right
case 'L' : // right +5
if(inbyte_in == 'l' && xval < HORIZONTAL_PIXELS-28)
xval++;
else if(inbyte_in == 'L' && xval <= HORIZONTAL_PIXELS-28-5)
xval += 5;
XSquare_frame_gen_Set_x_pos(&sf_gen, xval);
XMnist_conv_nn_Set_addr_offset(&mcnn, xval);
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'k' : // up
case 'K' : // up -5
if(inbyte_in == 'k' && yval > 0)
--yval;
else if(inbyte_in == 'K' && yval >= 5)
yval -= 5;
XSquare_frame_gen_Set_y_pos(&sf_gen, yval);
XMnist_conv_nn_Set_in_r(&mcnn, (unsigned int)phys_addr+HORIZONTAL_PIXELS*yval*sizeof(int));
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'j' : // down
case 'J' : // down +5
if(inbyte_in == 'j' && yval < VERTICAL_LINES-28)
yval++;
else if(inbyte_in == 'J' && yval <= VERTICAL_LINES-28-5)
yval += 5;
XSquare_frame_gen_Set_y_pos(&sf_gen, yval);
XMnist_conv_nn_Set_in_r(&mcnn, (unsigned int)phys_addr+HORIZONTAL_PIXELS*yval*sizeof(int));
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'r' : // result check
result_disp = 1;
break;
case 'd' : // help
case 'D' :
printf("mnist_conv_nn_test, <h> : left, <k> : up, <j> : down, <l> : right, <r> : result, <d> : help, <q> : exit\n");
break;
case 'q' : // exit
return(0);
}
if(result_disp){
printf("\nHardware\n");
// XMnist_conv_nn start
XMnist_conv_nn_DisableAutoRestart(&mcnn);
while(!XMnist_conv_nn_IsIdle(&mcnn));
gettimeofday(&start_time, NULL);
XMnist_conv_nn_Start(&mcnn);
while(!XMnist_conv_nn_IsIdle(&mcnn));
gettimeofday(&end_time, NULL);
if (end_time.tv_usec < start_time.tv_usec) {
printf("conv_time = %ld.%06ld sec\n", end_time.tv_sec - start_time.tv_sec - 1, 1000000 + end_time.tv_usec - start_time.tv_usec);
} else {
printf("conv_time = %ld.%06ld sec\n", end_time.tv_sec - start_time.tv_sec, end_time.tv_usec - start_time.tv_usec);
}
// mnist cnn result check
for(i=0; i<5; i++){
XMnist_conv_nn_Read_out_V_Words(&mcnn, i, &res, 1);
result[i*2] = res & 0x0fff;
if(result[i*2] & 0x800) // minus
result[i*2] = 0xfffff000 | result[i*2]; // Sign extension
result[i*2+1] = (res & 0x0fff0000) >> 16;
if(result[i*2+1] & 0x800) // minus
result[i*2+1] = 0xfffff000 | result[i*2+1]; // Sign extension
}
max_id = max_int(result);
axi_gpio_0[0] = max_id;
for(i=0; i<10; i++){
printf("result[%d] = %x\n", i, result[i]);
}
printf("max_id = %d\n", max_id);
printf("\nSoftware\n");
conv_addr = (unsigned int)frame_buffer+HORIZONTAL_PIXELS*yval*sizeof(int);
gettimeofday(&start_time, NULL);
mnist_conv_nn_float((int *)conv_addr, xval, result_float);
gettimeofday(&end_time, NULL);
max_id_float = max_float(result_float);
if (end_time.tv_usec < start_time.tv_usec) {
printf("conv_time = %ld.%06ld sec\n", end_time.tv_sec - start_time.tv_sec - 1, 1000000 + end_time.tv_usec - start_time.tv_usec);
} else {
printf("conv_time = %ld.%06ld sec\n", end_time.tv_sec - start_time.tv_sec, end_time.tv_usec - start_time.tv_usec);
}
for(i=0; i<10; i++){
printf("result_float[%d] = %f\n", i, result_float[i]);
}
printf("max_id_float = %d\n", max_id_float);
printf("\n");
result_disp = 0;
}
}
munmap((void *)axi_gpio_0, 0x10000);
munmap((void *)axis_switch_0, 0x10000);
munmap((void *)axis_switch_1, 0x10000);
munmap((void *)frame_buffer, (ALL_DISP_ADDRESS*3));
close(fd0);
close(fd3);
close(fd4);
close(fd_udmabuf);
// Reffered to http://d.hatena.ne.jp/mFumi/20101002/1286003738
tcsetattr(0,TCSANOW,&save_settings);
return(0);
}
int max_int(int out[10]){
int max_id;
int max = 0, i;
for(i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
int mnist_conv_nn_float(int in[22400], int addr_offset, float out[10]){
int i, j, k, m, n, col, row;
float buf[28][28];
float conv_out[10][24][24];
float pool_out[10][12][12];
float dot1[100];
float dot2[10];
//
/*for (i=0; i<28; i++){ for (j=0; j<800; j++){ if (j>=addr_offset && j<addr_offset+28) printf("%2x, ", (int)(conv_rgb2y_soft(in[i*800+j])*256.0)); } printf("\n"); } */
for(i=0; i<28; i++){
for(j=0; j<800; j++){
if (j>=addr_offset && j<addr_offset+28){
buf[i][j-addr_offset] = (float)0.99609375 - (float)conv_rgb2y_soft(in[i*800+j]);
}
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
for(i=0; i<10; i++){ //
for(j=0; j<24; j++){
for(k=0; k<24; k++){
conv_out[i][j][k] = 0;
for(m=0; m<5; m++){
for(n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_fweight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_fbias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
for(i=0; i<10; i++){
for(j=0; j<24; j += 2){
for(k=0; k<24; k += 2){
for(m=0; m<2; m++){
for(n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
for(col=0; col<100; col++){
dot1[col] = 0;
for(i=0; i<10; i++){
for(j=0; j<12; j++){
for(k=0; k<12; k++){
dot1[col] += pool_out[i][j][k]*af1_fweight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
for(col=0; col<10; col++){
dot2[col] = 0;
for(row=0; row<100; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
out[col] = dot2[col];
}
return(0);
}
int max_float(float out[10]){
int max_id, i;
float max = 0;
for(i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
float conv_rgb2y_soft(int rgb){
int r, g, b, y_f;
int y;
float y_float;
b = rgb & 0xff;
g = (rgb>>8) & 0xff;
r = (rgb>>16) & 0xff;
y_f = 77*r + 150*g + 29*b; //y_f = 0.299*r + 0.587*g + 0.114*b;
y = y_f >> 8; // /256
if (y >= 256)
y = 255;
y_float = (float)y/256.0;
return(y_float);
}
にありました。C:\Users\Masaaki\AppData\Local\lxss
/dts-v1/;
/ {
fragment@0 {
target-path = "/amba";
__overlay__ {
#address-cells = <0x1>;
#size-cells = <0x1>;
axi_gpio_0@41200000 {
compatible = "generic-uio";
reg = <0x41200000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axi_iic_0@41600000 {
compatible = "generic-uio";
reg = <0x41600000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axi_vdma_0@43000000 {
compatible = "generic-uio";
reg = <0x43000000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axis_switch_0@43C10000 {
compatible = "generic-uio";
reg = <0x43C10000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axis_switch_1@43C20000 {
compatible = "generic-uio";
reg = <0x43C20000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
bitmap_disp_cntrler_axi_master_0@43C00000 {
compatible = "generic-uio";
reg = <0x43C00000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
mnist_conv_nn_0@43C50000 {
compatible = "generic-uio";
reg = <0x43C50000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
mt9d111_inf_axis_0@43C40000 {
compatible = "generic-uio";
reg = <0x43C40000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
square_frame_gen_0@43C30000 {
compatible = "generic-uio";
reg = <0x43C30000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
udmabuf4 {
compatible = "ikwzm,udmabuf-0.10.a";
minor-number = <4>;
size = <0x00600000>;
};
fclk0 {
compatible = "ikwzm,fclkcfg-0.10.a";
clocks = <1 15>;
};
fclk1 {
compatible = "ikwzm,fclkcfg-0.10.a";
clocks = <1 16>;
};
fclk2 {
compatible = "ikwzm,fclkcfg-0.10.a";
clocks = <1 17>;
};
};
};
};
#!/bin/sh
# clock_settings.sh
echo 25000000 > /sys/class/fclkcfg/fclk1/rate
cat /sys/class/fclkcfg/fclk1/rate
echo 72000000 > /sys/class/fclkcfg/fclk2/rate
cat /sys/class/fclkcfg/fclk2/rate
// cam_disp.c
// 2017/07/15 by marsee
//
// cam_disp for pynq_mnist_cnn
//
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
#define NUMBER_OF_WRITE_FRAMES 3 // Note: If not at least 3 or more, the image is not displayed in succession.
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
#define PIXEL_NUM_OF_BYTES 4
#define ALL_DISP_ADDRESS (HORIZONTAL_PIXELS*VERTICAL_LINES*PIXEL_NUM_OF_BYTES)
#define FASTX_THRESHOLD 20
void cam_i2c_init(volatile unsigned *mt9d111_i2c_axi_lites) {
mt9d111_i2c_axi_lites[64] = 0x2; // reset tx fifo ,address is 0x100, i2c_control_reg
mt9d111_i2c_axi_lites[64] = 0x1; // enable i2c
}
void cam_i2x_write_sync(void) {
// unsigned c;
// c = *cam_i2c_rx_fifo;
// while ((c & 0x84) != 0x80)
// c = *cam_i2c_rx_fifo; // No Bus Busy and TX_FIFO_Empty = 1
usleep(1000);
}
void cam_i2c_write(volatile unsigned *mt9d111_i2c_axi_lites, unsigned int device_addr, unsigned int write_addr, unsigned int write_data){
mt9d111_i2c_axi_lites[66] = 0x100 | (device_addr & 0xfe); // Slave IIC Write Address, address is 0x108, i2c_tx_fifo
mt9d111_i2c_axi_lites[66] = write_addr;
mt9d111_i2c_axi_lites[66] = (write_data >> 8)|0xff; // first data
mt9d111_i2c_axi_lites[66] = 0x200 | (write_data & 0xff); // second data
cam_i2x_write_sync();
}
int main()
{
int fd1, fd2, fd3, fd4, fd5, fd7;
int fd_udmabuf, fd_paddr;
volatile unsigned int *axi_iic_0, *axi_vdma_0, *axis_switch_0, *axis_switch_1;
volatile unsigned int *bitmap_disp_cntrler_axim_0, *fastx_corner_det_0;
volatile unsigned int *mt9d111_inf_axis_0;
volatile unsigned int *frame_buffer;
unsigned char attr[1024];
unsigned long phys_addr;
// axi_iic_0 (uio1)
fd1 = open("/dev/uio1", O_RDWR); // axi_iic_0
if (fd1 < 1){
fprintf(stderr, "/dev/uio1 (axi_iic_0) open errorn");
exit(-1);
}
axi_iic_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0);
if (axi_iic_0 == MAP_FAILED){
fprintf(stderr, "axi_iic_0 mmap errorn");
exit(-1);
}
// axi_vdma_0 (uio2)
fd2 = open("/dev/uio2", O_RDWR); // axi_vdma_0
if (fd2 < 1){
fprintf(stderr, "/dev/uio2 (axi_vdma_0) open errorn");
exit(-1);
}
axi_vdma_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd2, 0);
if (axi_vdma_0 == MAP_FAILED){
fprintf(stderr, "axi_vdma_0 mmap errorn");
exit(-1);
}
// axis_switch_0 (uio3)
fd3 = open("/dev/uio3", O_RDWR); // axis_switch_0
if (fd3 < 1){
fprintf(stderr, "/dev/uio3 (axis_switch_0) open errorn");
exit(-1);
}
axis_switch_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd3, 0);
if (axis_switch_0 == MAP_FAILED){
fprintf(stderr, "axis_switch_0 mmap errorn");
exit(-1);
}
// axis_switch_1 (uio4)
fd4 = open("/dev/uio4", O_RDWR); // axis_switch_1
if (fd4 < 1){
fprintf(stderr, "/dev/uio4 (axis_switch_1) open errorn");
exit(-1);
}
axis_switch_1 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd4, 0);
if (axis_switch_1 == MAP_FAILED){
fprintf(stderr, "axis_switch_1 mmap errorn");
exit(-1);
}
// bitmap_disp_cntrler_axim_0 (uio5)
fd5 = open("/dev/uio5", O_RDWR); // bitmap_disp_cntrler_axim_0
if (fd5 < 1){
fprintf(stderr, "/dev/uio5 (bitmap_disp_cntrler_axim_0) open errorn");
exit(-1);
}
bitmap_disp_cntrler_axim_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd5, 0);
if (bitmap_disp_cntrler_axim_0 == MAP_FAILED){
fprintf(stderr, "bitmap_disp_cntrler_axim_0 mmap errorn");
exit(-1);
}
// mt9d111_inf_axis_0 (uio7)
fd7 = open("/dev/uio7", O_RDWR); // mt9d111_inf_axis_0
if (fd7 < 1){
fprintf(stderr, "/dev/uio7 (mt9d111_inf_axis_0) open errorn");
exit(-1);
}
mt9d111_inf_axis_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd7, 0);
if (mt9d111_inf_axis_0 == MAP_FAILED){
fprintf(stderr, "mt9d111_inf_axis_0 mmap errorn");
exit(-1);
}
// udmabuf4
fd_udmabuf = open("/dev/udmabuf4", O_RDWR | O_SYNC); // frame_buffer, The chache is disabled.
if (fd_udmabuf == -1){
fprintf(stderr, "/dev/udmabuf4 open errorn");
exit(-1);
}
frame_buffer = (volatile unsigned int *)mmap(NULL, (ALL_DISP_ADDRESS*3), PROT_READ|PROT_WRITE, MAP_SHARED, fd_udmabuf, 0);
if (frame_buffer == MAP_FAILED){
fprintf(stderr, "frame_buffer mmap errorn");
exit(-1);
}
// phys_addr of udmabuf4
fd_paddr = open("/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr", O_RDONLY);
if (fd_paddr == -1){
fprintf(stderr, "/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr open errorn");
exit(-1);
}
read(fd_paddr, attr, 1024);
sscanf(attr, "%lx", &phys_addr);
close(fd_paddr);
printf("phys_addr = %x\n", (unsigned int)phys_addr);
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_1[16] = 0x0; // 0x40 = 0
axis_switch_1[17] = 0x80000000; // 0x44 = 0x80000000, disable
axis_switch_1[18] = 0x80000000; // 0x48 = 0x80000000, disable
axis_switch_1[19] = 0x80000000; // 0x4C = 0x80000000, disable
axis_switch_1[0] = 0x2; // Comit registers
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_0[16] = 0x0; // 0x40 = 0;
axis_switch_0[0] = 0x2; // Comit registers
// AXI VDMA Initialization sequence (axi_vdma_0)
axi_vdma_0[12] = 0x4; // S2MM_VDMACR (S2MM VDMA Control Register Offset 30h) is 0x4
while ((axi_vdma_0[12] & 0x4) == 0x4) ; // Reset is progress
axi_vdma_0[12] = 0x4; // S2MM_VDMACR (S2MM VDMA Control Register Offset 30h) is 0x4
while ((axi_vdma_0[12] & 0x4) == 0x4) ; // Reset is progress
axi_vdma_0[18] = NUMBER_OF_WRITE_FRAMES; // S2MM_FRMSTORE (0x48) register
axi_vdma_0[12] = 0x00010002; // S2MM_VDMACR(IRQFrameCount = 0x1, Circular_Park = 1)
axi_vdma_0[41] = HORIZONTAL_PIXELS*PIXEL_NUM_OF_BYTES; // S2MM Horizontal Size Register(S2MM_HSIZE)0xc80 = 3200dec = 800 x 4
axi_vdma_0[42] = HORIZONTAL_PIXELS*PIXEL_NUM_OF_BYTES; // S2MM Frame Delay and Stride Register(S2MM_FRMDLY_STRIDE)0xc80 = 3200dec = 800 x 4
axi_vdma_0[43] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 1
axi_vdma_0[44] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 2
axi_vdma_0[45] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 3
axi_vdma_0[12] = 0x00010003; // S2MM_VDMACR(IRQFrameCount = 0x1, Circular_Park = 1, Run/stop = 1)
while((axi_vdma_0[13] & 0x1) == 0x1) ; // Halt? (S2MM_VDMASR 0x34)
axi_vdma_0[40] = VERTICAL_LINES; // S2MM Vertical Size (S2MM_VSIZE Offset 0xA0) 0x258 = 600dec
// CMOS Camera initialize, MT9D111
cam_i2c_init(axi_iic_0);
cam_i2c_write(axi_iic_0, 0xba, 0xf0, 0x1); // Changed regster map to IFP page 1
cam_i2c_write(axi_iic_0, 0xba, 0x97, 0x20); // RGB Mode, RGB565
mt9d111_inf_axis_0[1] = 0;
mt9d111_inf_axis_0[0] = (unsigned int)phys_addr;
bitmap_disp_cntrler_axim_0[0] = (unsigned int)phys_addr;
munmap((void *)axi_iic_0, 0x10000);
munmap((void *)axi_vdma_0, 0x10000);
munmap((void *)axis_switch_0, 0x10000);
munmap((void *)axis_switch_1, 0x10000);
munmap((void *)bitmap_disp_cntrler_axim_0, 0x10000);
munmap((void *)mt9d111_inf_axis_0, 0x10000);
munmap((void *)frame_buffer, (ALL_DISP_ADDRESS*3));
close(fd1);
close(fd2);
close(fd3);
close(fd4);
close(fd5);
close(fd7);
close(fd_udmabuf);
}
を変更する方法でもう一度やってみた。値は、expected_memory_usage=30000000
とした。expected_memory_usage=16000000
をexpected_memory_usage=30000000
に変更した。expected_memory_usage=16000000
[Common 17-348] Failed to get the license for feature 'Synthesis' and/or device 'xcvu9p-flgb2104'. Explanation: The license feature Synthesis could not be found.
Resolution: Check the status of your licenses in the Vivado License Manager. For debug help search Xilinx Support for "Licensing FAQ".
masaaki@masaaki-VirtualBox:~/aws-fpga$ source hdk_setup.sh
INFO: Using Vivado v2017.1 (64-bit)
INFO: Setting up environment variables
INFO: Using HDK shell version shell_v04151701
INFO: HDK shell is up-to-date
INFO: DDR4 model files in /home/masaaki/aws-fpga/hdk/common/verif/models/ddr4_model/ do NOT exist. Running model creation step.
INFO: Building in /home/masaaki/aws-fpga/ddr4_model_build
INFO: This could take 5-10 minutes, please be patient!
/home/masaaki/aws-fpga/hdk/common/verif/scripts/init.sh: 18: /home/masaaki/aws-fpga/hdk/common/verif/scripts/init.sh: [[: not found
/home/masaaki/aws-fpga/hdk/common/verif/scripts/init.sh: 23: /home/masaaki/aws-fpga/hdk/common/verif/scripts/init.sh: [[: not found
/home/masaaki/aws-fpga/hdk/common/verif/scripts/init.sh: 29: /home/masaaki/aws-fpga/hdk/common/verif/scripts/init.sh: [[: not found
****** Vivado v2017.1 (64-bit)
**** SW Build 1846317 on Fri Apr 14 18:54:47 MDT 2017
**** IP Build 1846188 on Fri Apr 14 20:52:08 MDT 2017
** Copyright 1986-2017 Xilinx, Inc. All Rights Reserved.
source /home/masaaki/aws-fpga/hdk/common/verif/scripts/init.tcl
# set_msg_config -severity INFO -suppress
# set_msg_config -severity STATUS -suppress
# set_msg_config -severity WARNING -suppress
CRITICAL WARNING: [Common 17-1355] You are suppressing all messages of type 'WARNING'. You may potentially disregard important DRC, CDC, and implementation messages that can negatively impact your design. If this is not desired, please run 'reset_msg_config -suppress -severity {WARNING}' to undo this change.
# set_msg_config -string {exportsim} -suppress
# set_msg_config -string {IP_Flow} -suppress
# create_project -force tmp_ddr ./tmp -part xcvu9p-flgb2104-2-i
# add_files -norecurse $::env(HDK_COMMON_DIR)/shell_stable/design/ip/ddr4_core/ddr4_core.xci
INFO: [IP_Flow 19-234] Refreshing IP repositories
INFO: [IP_Flow 19-1704] No user IP repositories specified
INFO: [IP_Flow 19-2313] Loaded Vivado IP repository '/opt/Xilinx/Vivado/2017.1/data/ip'.
add_files: Time (s): cpu = 00:00:05 ; elapsed = 00:00:07 . Memory (MB): peak = 1308.211 ; gain = 213.969 ; free physical = 474 ; free virtual = 18367
# export_ip_user_files -of_objects [get_files $::env(HDK_COMMON_DIR)/shell_stable/design/ip/ddr4_core/ddr4_core.xci] -force -quiet
# open_example_project -force -dir ./tmp/tmp_ddr_ex [get_ips ddr4_core]
INFO: [IP_Flow 19-1686] Generating 'Examples' target for IP 'ddr4_core'...
INFO: [Device 21-403] Loading part xcvu9p-flgb2104-2-i
****** Vivado v2017.1 (64-bit)
**** SW Build 1846317 on Fri Apr 14 18:54:47 MDT 2017
**** IP Build 1846188 on Fri Apr 14 20:52:08 MDT 2017
** Copyright 1986-2017 Xilinx, Inc. All Rights Reserved.
source /home/masaaki/aws-fpga/hdk/common/shell_stable/design/ip/ddr4_core/ddr4_core_ex.tcl -notrace
INFO: [open_example_project] Creating new example project...
INFO: [open_example_project] Importing original IP ...
INFO: [IP_Flow 19-234] Refreshing IP repositories
INFO: [IP_Flow 19-1704] No user IP repositories specified
INFO: [IP_Flow 19-2313] Loaded Vivado IP repository '/opt/Xilinx/Vivado/2017.1/data/ip'.
import_ip: Time (s): cpu = 00:00:05 ; elapsed = 00:00:06 . Memory (MB): peak = 1316.215 ; gain = 8.012 ; free physical = 219 ; free virtual = 17375
INFO: [open_example_project] Generating the example project IP ...
INFO: [open_example_project] Adding example synthesis HDL files ...
INFO: [open_example_project] Adding example XDC files ...
INFO: [open_example_project] Adding simulation HDL files ...
INFO: [open_example_project] Sourcing example extension scripts ...
Post processing the example_design
update_compile_order: Time (s): cpu = 00:00:08 ; elapsed = 00:00:09 . Memory (MB): peak = 1316.645 ; gain = 0.418 ; free physical = 311 ; free virtual = 17369
INFO: [open_example_project] Rebuilding all the top level IPs ...
INFO: [exportsim-Tcl-35] Exporting simulation files for "XSIM" (Xilinx Vivado Simulator)...
INFO: [exportsim-Tcl-29] Script generated: '/home/masaaki/aws-fpga/ddr4_model_build/tmp/tmp_ddr_ex/ddr4_core_ex/ddr4_core_ex.ip_user_files/sim_scripts/ddr4_core/xsim/ddr4_core.sh'
Generating merged BMM file for the design top 'sim_tb_top'...
Generating merged BMM file for the design top 'sim_tb_top'...
INFO: [exportsim-Tcl-35] Exporting simulation files for "MODELSIM" (Mentor Graphics ModelSim Simulator)...
INFO: [exportsim-Tcl-29] Script generated: '/home/masaaki/aws-fpga/ddr4_model_build/tmp/tmp_ddr_ex/ddr4_core_ex/ddr4_core_ex.ip_user_files/sim_scripts/ddr4_core/modelsim/ddr4_core.sh'
INFO: [exportsim-Tcl-35] Exporting simulation files for "QUESTA" (Mentor Graphics Questa Advanced Simulator)...
INFO: [exportsim-Tcl-29] Script generated: '/home/masaaki/aws-fpga/ddr4_model_build/tmp/tmp_ddr_ex/ddr4_core_ex/ddr4_core_ex.ip_user_files/sim_scripts/ddr4_core/questa/ddr4_core.sh'
INFO: [exportsim-Tcl-35] Exporting simulation files for "IES" (Cadence Incisive Enterprise Simulator)...
INFO: [exportsim-Tcl-29] Script generated: '/home/masaaki/aws-fpga/ddr4_model_build/tmp/tmp_ddr_ex/ddr4_core_ex/ddr4_core_ex.ip_user_files/sim_scripts/ddr4_core/ies/ddr4_core.sh'
INFO: [exportsim-Tcl-35] Exporting simulation files for "VCS" (Synopsys Verilog Compiler Simulator)...
INFO: [exportsim-Tcl-29] Script generated: '/home/masaaki/aws-fpga/ddr4_model_build/tmp/tmp_ddr_ex/ddr4_core_ex/ddr4_core_ex.ip_user_files/sim_scripts/ddr4_core/vcs/ddr4_core.sh'
INFO: [exportsim-Tcl-35] Exporting simulation files for "RIVIERA" (Aldec Riviera-PRO Simulator)...
INFO: [exportsim-Tcl-29] Script generated: '/home/masaaki/aws-fpga/ddr4_model_build/tmp/tmp_ddr_ex/ddr4_core_ex/ddr4_core_ex.ip_user_files/sim_scripts/ddr4_core/riviera/ddr4_core.sh'
INFO: [exportsim-Tcl-35] Exporting simulation files for "ACTIVEHDL" (Aldec Active-HDL Simulator)...
INFO: [exportsim-Tcl-29] Script generated: '/home/masaaki/aws-fpga/ddr4_model_build/tmp/tmp_ddr_ex/ddr4_core_ex/ddr4_core_ex.ip_user_files/sim_scripts/ddr4_core/activehdl/ddr4_core.sh'
INFO: [open_example_project] Open Example Project completed
INFO: [Common 17-206] Exiting Vivado at Sun Jul 9 18:03:27 2017...
open_example_project: Time (s): cpu = 00:00:36 ; elapsed = 00:00:44 . Memory (MB): peak = 2018.074 ; gain = 678.875 ; free physical = 531 ; free virtual = 17600
# exit
INFO: [Common 17-206] Exiting Vivado at Sun Jul 9 18:03:28 2017...
Copying files to /home/masaaki/aws-fpga/hdk/common/verif/models/ddr4_model
Copying files to /home/masaaki/aws-fpga/hdk/common/verif/models/ddr4_rdimm_wrapper
INFO: DDR4 model build passed.
INFO: ATTENTION: Don't forget to set the CL_DIR variable for the directory of your Custom Logic.
INFO: AWS HDK setup PASSED.
masaaki@masaaki-VirtualBox:~/aws-fpga$
で、メモリは10GB 程度割り当てているのだが、だめだそうだ。ERROR: YOUR INSTANCE has less memory than is necessary for certain builds. This means that your builds will take longer than expected.
To change to an instance type with more memory, please check our instance resize guide: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-instance-resize.html
をグローバル変数にしたのは、ローカル変数にしていては動作がおかしかったからだ。スタックに積むことができる大きさに制限があるのかもしれない?float buf[28][28];
float conv_out[10][24][24];
float pool_out[10][12][12];
float dot1[100];
float dot2[10];
/* * mnist_conv_soft_test.c * * Created on: 2017/07/06 * Author: ono */
#include <stdio.h>
#include <stdlib.h>
#include "xaxivdma.h"
#include "xil_io.h"
#include "xparameters.h"
#include "sleep.h"
#include "xgpio.h"
#include "xtime_l.h"
#include "xmnist_conv_nn.h"
#include "xsquare_frame_gen.h"
#include "af1_bias_float.h"
#include "af1_weight_float.h"
#include "af2_bias_float.h"
#include "af2_weight_float.h"
#include "conv1_bias_float.h"
#include "conv1_weight_float.h"
#define FRAME_BUFFER_ADDRESS 0x10000000
#define NUMBER_OF_WRITE_FRAMES 3 // Note: If not at least 3 or more, the image is not displayed in succession.
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
#define PIXEL_NUM_OF_BYTES 4
int max_int(int out[10]);
int max_float(float out[10]);
int mnist_conv_nn_float(int in[22400], int addr_offset, float out[10]);
float conv_rgb2y_soft(int rgb);
static XAxiVdma_DmaSetup Vdma0_WriteCfg;
float buf[28][28];
float conv_out[10][24][24];
float pool_out[10][12][12];
float dot1[100];
float dot2[10];
void cam_i2c_init(volatile unsigned *mt9d111_i2c_axi_lites) {
mt9d111_i2c_axi_lites[64] = 0x2; // reset tx fifo ,address is 0x100, i2c_control_reg
mt9d111_i2c_axi_lites[64] = 0x1; // enable i2c
}
void cam_i2x_write_sync(void) {
// unsigned c;
// c = *cam_i2c_rx_fifo;
// while ((c & 0x84) != 0x80)
// c = *cam_i2c_rx_fifo; // No Bus Busy and TX_FIFO_Empty = 1
usleep(1000);
}
void cam_i2c_write(volatile unsigned *mt9d111_i2c_axi_lites, unsigned int device_addr, unsigned int write_addr, unsigned int write_data){
mt9d111_i2c_axi_lites[66] = 0x100 | (device_addr & 0xfe); // Slave IIC Write Address, address is 0x108, i2c_tx_fifo
mt9d111_i2c_axi_lites[66] = write_addr;
mt9d111_i2c_axi_lites[66] = (write_data >> 8)|0xff; // first data
mt9d111_i2c_axi_lites[66] = 0x200 | (write_data & 0xff); // second data
cam_i2x_write_sync();
}
int main(){
XMnist_conv_nn mcnn;
XSquare_frame_gen sf_gen;
int inbyte_in;
int xval, yval;
int i, res;
int result[10];
float result_float[10];
static XGpio GPIOInstance_Ptr;
int XGpio_Status;
int max_id;
XAxiVdma_Config *XAxiVdma0_Config;
XAxiVdma XAxiVdma0;
int XAxiVdma0_Status;
int result_disp = 0;
int conv_addr;
int max_id_float;
XTime start_time, end_time;
// AXI VDMA Initialization sequence
XAxiVdma0_Config = XAxiVdma_LookupConfig(XPAR_CAMERA_INTERFACE_AXI_VDMA_0_DEVICE_ID); // Look up the hardware configuration for a device instance
if (XAxiVdma0_Config == NULL){
fprintf(stderr, "No AXI VDMA found\n");
return(-1);
}
XAxiVdma0_Status = XAxiVdma_CfgInitialize(&XAxiVdma0, XAxiVdma0_Config, XAxiVdma0_Config->BaseAddress); // Initialize the driver with hardware configuration
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_CfgInitialize() failed\n");
return(-1);
}
XAxiVdma_Reset(&XAxiVdma0, XAXIVDMA_WRITE);
while(XAxiVdma_ResetNotDone(&XAxiVdma0, XAXIVDMA_WRITE)) ;
XAxiVdma0_Status = XAxiVdma_SetFrmStore(&XAxiVdma0, NUMBER_OF_WRITE_FRAMES, XAXIVDMA_WRITE); // Set the number of frame store buffers to use.
Vdma0_WriteCfg.VertSizeInput = VERTICAL_LINES;
Vdma0_WriteCfg.HoriSizeInput = HORIZONTAL_PIXELS * PIXEL_NUM_OF_BYTES;
Vdma0_WriteCfg.Stride = HORIZONTAL_PIXELS * PIXEL_NUM_OF_BYTES; // Indicates the number of address bytes between the first pixels of each video line.
Vdma0_WriteCfg.FrameDelay = 0; // Indicates the minimum number of frame buffers the Genlock slave is to be behind the locked master. This field is only used if the channel is enabled for Genlock Slave operations. This field has no meaning in other Genlock modes.
Vdma0_WriteCfg.EnableCircularBuf = 1; // Indicates frame buffer Circular mode or frame buffer Park mode. 1 = Circular Mode Engine continuously circles through frame buffers.
Vdma0_WriteCfg.EnableSync = 0; // Enables Genlock or Dynamic Genlock Synchronization. 0 = Genlock or Dynamic Genlock Synchronization disabled.
Vdma0_WriteCfg.PointNum = 0; // No Gen-Lock
Vdma0_WriteCfg.EnableFrameCounter = 0; // Endless transfers
Vdma0_WriteCfg.FixedFrameStoreAddr = 0; // We are not doing parking
XAxiVdma0_Status = XAxiVdma_DmaConfig(&XAxiVdma0, XAXIVDMA_WRITE, &Vdma0_WriteCfg);
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_DmaConfig() failed\n");
return(-1);
}
// Frame buffer address set
unsigned int frame_addr = (unsigned int)FRAME_BUFFER_ADDRESS;
for (i=0; i<NUMBER_OF_WRITE_FRAMES; i++){
Vdma0_WriteCfg.FrameStoreStartAddr[i] = frame_addr;
//frame_addr += HORIZONTAL_PIXELS * PIXEL_NUM_OF_BYTES * VERTICAL_LINES;
}
XAxiVdma0_Status = XAxiVdma_DmaSetBufferAddr(&XAxiVdma0, XAXIVDMA_WRITE, Vdma0_WriteCfg.FrameStoreStartAddr);
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_DmaSetBufferAddr() failed\n");
return(-1);
}
// Mnist_conv_nn, Square_frame_gen Initialize
XMnist_conv_nn_Initialize(&mcnn, 0);
XSquare_frame_gen_Initialize(&sf_gen, 0);
// square_frame_gen initialize
XSquare_frame_gen_Set_x_pos(&sf_gen, HORIZONTAL_PIXELS/2);
xval = HORIZONTAL_PIXELS/2;
XSquare_frame_gen_Set_y_pos(&sf_gen, VERTICAL_LINES/2);
yval = VERTICAL_LINES/2;
XSquare_frame_gen_Set_width(&sf_gen, 28);
XSquare_frame_gen_Set_height(&sf_gen, 28);
XSquare_frame_gen_Set_off_on(&sf_gen, 1); // on
// XSquare_frame_gen start
XSquare_frame_gen_DisableAutoRestart(&sf_gen);
while(!XSquare_frame_gen_IsIdle(&sf_gen)) ;
XSquare_frame_gen_Start(&sf_gen);
XSquare_frame_gen_EnableAutoRestart(&sf_gen);
// mnist_conv_nn initialize
XMnist_conv_nn_Set_addr_offset(&mcnn, HORIZONTAL_PIXELS/2);
XMnist_conv_nn_Set_in_r(&mcnn, FRAME_BUFFER_ADDRESS+HORIZONTAL_PIXELS*(VERTICAL_LINES/2)*sizeof(int));
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR+0x40), 0x80000000); // disable
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR+0x44), 0x0); // square_frame_gen enable
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR), 0x2); // Commit registers
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_0_BASEADDR+0x40), 0x1);
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_0_BASEADDR), 0x2); // Commit registers
// VDMA start
XAxiVdma0_Status = XAxiVdma_DmaStart(&XAxiVdma0, XAXIVDMA_WRITE);
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_DmaStart() failed\n");
return(-1);
}
// mt9d111_inf_axis_0, axi_iic_0, bitmap_disp_cntrler_axi_master_0
volatile unsigned int *bmdc_axi_lites;
volatile unsigned int *mt9d111_axi_lites;
volatile unsigned int *mt9d111_i2c_axi_lites;
bmdc_axi_lites = (volatile unsigned *)XPAR_BITMAP_DISP_CNTRLER_AXI_MASTER_0_BASEADDR;
mt9d111_axi_lites = (volatile unsigned *)XPAR_CAMERA_INTERFACE_MT9D111_INF_AXIS_0_BASEADDR;
mt9d111_i2c_axi_lites = (volatile unsigned *)XPAR_CAMERA_INTERFACE_AXI_IIC_0_BASEADDR;
bmdc_axi_lites[0] = (volatile unsigned int)FRAME_BUFFER_ADDRESS; // Bitmap Display Controller start
mt9d111_axi_lites[0] = (volatile unsigned int)FRAME_BUFFER_ADDRESS; // Camera Interface start (Address is dummy)
// CMOS Camera initialize, MT9D111
cam_i2c_init(mt9d111_i2c_axi_lites);
cam_i2c_write(mt9d111_i2c_axi_lites, 0xba, 0xf0, 0x1); // Changed regster map to IFP page 1
cam_i2c_write(mt9d111_i2c_axi_lites, 0xba, 0x97, 0x20); // RGB Mode, RGB565
mt9d111_axi_lites[1] = 0; // One_shot_mode is disabled
// AXI GPIO Initialization
XGpio_Status = XGpio_Initialize(&GPIOInstance_Ptr,XPAR_AXI_GPIO_0_DEVICE_ID);
if(XST_SUCCESS != XGpio_Status)
print("GPIO INIT FAILED\n\r");
// AXI GPIO Set the Direction(output setting)
XGpio_SetDataDirection(&GPIOInstance_Ptr, 1, 0);
while(1){
printf("mnist_conv_nn_test, <h> : left, <k> : up, <j> : down, <l> : right, <q> : exit\n");
inbyte_in = inbyte();
switch(inbyte_in) {
case 'h' : // left
case 'H' : // left -5
if(inbyte_in == 'h' && xval > 0)
--xval;
else if(inbyte_in == 'H' && xval >= 5)
xval -= 5;
XSquare_frame_gen_Set_x_pos(&sf_gen, xval);
XMnist_conv_nn_Set_addr_offset(&mcnn, xval);
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'l' : // right
case 'L' : // right +5
if(inbyte_in == 'l' && xval < HORIZONTAL_PIXELS-28)
xval++;
else if(inbyte_in == 'L' && xval <= HORIZONTAL_PIXELS-28-5)
xval += 5;
XSquare_frame_gen_Set_x_pos(&sf_gen, xval);
XMnist_conv_nn_Set_addr_offset(&mcnn, xval);
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'k' : // up
case 'K' : // up -5
if(inbyte_in == 'k' && yval > 0)
--yval;
else if(inbyte_in == 'K' && yval >= 5)
yval -= 5;
XSquare_frame_gen_Set_y_pos(&sf_gen, yval);
XMnist_conv_nn_Set_in_r(&mcnn, FRAME_BUFFER_ADDRESS+HORIZONTAL_PIXELS*yval*sizeof(int));
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'j' : // down
case 'J' : // down +5
if(inbyte_in == 'j' && xval < VERTICAL_LINES-28)
yval++;
else if(inbyte_in == 'J' && xval <= VERTICAL_LINES-28-5)
yval += 5;
XSquare_frame_gen_Set_y_pos(&sf_gen, yval);
XMnist_conv_nn_Set_in_r(&mcnn, FRAME_BUFFER_ADDRESS+HORIZONTAL_PIXELS*yval*sizeof(int));
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
break;
case 'r' : // result check
result_disp = 1;
break;
case 'q' : // exit
return(0);
}
if(result_disp){
printf("\nHardware\n");
// XMnist_conv_nn start
XMnist_conv_nn_DisableAutoRestart(&mcnn);
while(!XMnist_conv_nn_IsIdle(&mcnn));
XTime_GetTime(&start_time);
XMnist_conv_nn_Start(&mcnn);
while(!XMnist_conv_nn_IsIdle(&mcnn));
XTime_GetTime(&end_time);
printf("conv_time = %f ms\n", (float)((long)end_time-(long)start_time)/325000.0);
// mnist cnn result check
for(i=0; i<5; i++){
XMnist_conv_nn_Read_out_V_Words(&mcnn, i, &res, 1);
result[i*2] = res & 0x0fff;
if(result[i*2] & 0x800) // minus
result[i*2] = 0xfffff000 | result[i*2]; // Sign extension
result[i*2+1] = (res & 0x0fff0000) >> 16;
if(result[i*2+1] & 0x800) // minus
result[i*2+1] = 0xfffff000 | result[i*2+1]; // Sign extension
}
max_id = max_int(result);
XGpio_DiscreteWrite(&GPIOInstance_Ptr, 1, max_id);
for(i=0; i<10; i++){
printf("result[%d] = %x\n", i, result[i]);
}
printf("max_id = %d\n", max_id);
printf("\nSoftware\n");
conv_addr = FRAME_BUFFER_ADDRESS+HORIZONTAL_PIXELS*yval*sizeof(int);
XTime_GetTime(&start_time);
mnist_conv_nn_float((int *)conv_addr, xval, result_float);
XTime_GetTime(&end_time);
max_id_float = max_float(result_float);
printf("conv_time = %f ms\n", (float)((long)end_time-(long)start_time)/325000.0);
for(i=0; i<10; i++){
printf("result_float[%d] = %f\n", i, result_float[i]);
}
printf("max_id_float = %d\n", max_id_float);
result_disp = 0;
}
}
}
int max_int(int out[10]){
int max_id;
int max, i;
for(i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
int mnist_conv_nn_float(int in[22400], int addr_offset, float out[10]){
// 手書き数字の値を表示
/*for (int i=0; i<28; i++){ for (int j=0; j<800; j++){ if (j>=addr_offset && j<addr_offset+28) printf("%2x, ", (int)(conv_rgb2y_soft(in[i*800+j])*256.0)); } printf("\n"); } */
buf_copy1: for(int i=0; i<28; i++){
buf_copy2: for(int j=0; j<800; j++){
if (j>=addr_offset && j<addr_offset+28)
buf[i][j-addr_offset] = (float)0.99609375 - (float)conv_rgb2y_soft(in[i*800+j]);
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<10; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_fweight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_fbias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<10; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<10; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
dot1[col] += pool_out[i][j][k]*af1_fweight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
out[col] = dot2[col];
}
return(0);
}
int max_float(float out[10]){
int max_id;
float max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
// RGBからYへの変換
// RGBのフォーマットは、{8'd0, R(8bits), G(8bits), B(8bits)}, 1pixel = 32bits
// 輝度信号Yのみに変換する。変換式は、Y = 0.299R + 0.587G + 0.114B
// "YUVフォーマット及び YUV<->RGB変換"を参考にした。http://vision.kuee.kyoto-u.ac.jp/~hiroaki/firewire/yuv.html
// 2013/09/27 : float を止めて、すべてint にした
// 2017/06/30 : retval を float にした
float conv_rgb2y_soft(int rgb){
int r, g, b, y_f;
int y;
float y_float;
b = rgb & 0xff;
g = (rgb>>8) & 0xff;
r = (rgb>>16) & 0xff;
y_f = 77*r + 150*g + 29*b; //y_f = 0.299*r + 0.587*g + 0.114*b;の係数に256倍した
y = y_f >> 8; // 256で割る
if (y >= 256)
y = 255;
y_float = (float)y/256.0;
return(y_float);
}
/* * xttime_test.c * * Created on: 2017/07/06 * Author: Ono */
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include "xtime_l.h"
int main(){
XTime start_time, end_time;
printf("Start\n");
XTime_GetTime(&start_time);
usleep(1000);
//sleep(10);
XTime_GetTime(&end_time);
printf("end\n");
printf("usleep_time = %lf ms\n", (double)((long long int)end_time-(long long int)start_time)/325000.0);
}
/* * mnist_conv_nn_test.c * * Created on: 2017/07/03 * Author: Masaaki */
#include <stdio.h>
#include <stdlib.h>
#include "xaxivdma.h"
#include "xil_io.h"
#include "xparameters.h"
#include "sleep.h"
#include "xgpio.h"
#include "xmnist_conv_nn.h"
#include "xsquare_frame_gen.h"
#define FRAME_BUFFER_ADDRESS 0x10000000
#define NUMBER_OF_WRITE_FRAMES 3 // Note: If not at least 3 or more, the image is not displayed in succession.
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
#define PIXEL_NUM_OF_BYTES 4
int max_int(int out[10]);
static XAxiVdma_DmaSetup Vdma0_WriteCfg;
void cam_i2c_init(volatile unsigned *mt9d111_i2c_axi_lites) {
mt9d111_i2c_axi_lites[64] = 0x2; // reset tx fifo ,address is 0x100, i2c_control_reg
mt9d111_i2c_axi_lites[64] = 0x1; // enable i2c
}
void cam_i2x_write_sync(void) {
// unsigned c;
// c = *cam_i2c_rx_fifo;
// while ((c & 0x84) != 0x80)
// c = *cam_i2c_rx_fifo; // No Bus Busy and TX_FIFO_Empty = 1
usleep(1000);
}
void cam_i2c_write(volatile unsigned *mt9d111_i2c_axi_lites, unsigned int device_addr, unsigned int write_addr, unsigned int write_data){
mt9d111_i2c_axi_lites[66] = 0x100 | (device_addr & 0xfe); // Slave IIC Write Address, address is 0x108, i2c_tx_fifo
mt9d111_i2c_axi_lites[66] = write_addr;
mt9d111_i2c_axi_lites[66] = (write_data >> 8)|0xff; // first data
mt9d111_i2c_axi_lites[66] = 0x200 | (write_data & 0xff); // second data
cam_i2x_write_sync();
}
int main(){
XMnist_conv_nn mcnn;
XSquare_frame_gen sf_gen;
int inbyte_in;
int xval, yval;
int i, res;
int result[10];
static XGpio GPIOInstance_Ptr;
int XGpio_Status;
int max_id;
XAxiVdma_Config *XAxiVdma0_Config;
XAxiVdma XAxiVdma0;
int XAxiVdma0_Status;
int result_disp = 0;
// AXI VDMA Initialization sequence
XAxiVdma0_Config = XAxiVdma_LookupConfig(XPAR_CAMERA_INTERFACE_AXI_VDMA_0_DEVICE_ID); // Look up the hardware configuration for a device instance
if (XAxiVdma0_Config == NULL){
fprintf(stderr, "No AXI VDMA found\n");
return(-1);
}
XAxiVdma0_Status = XAxiVdma_CfgInitialize(&XAxiVdma0, XAxiVdma0_Config, XAxiVdma0_Config->BaseAddress); // Initialize the driver with hardware configuration
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_CfgInitialize() failed\n");
return(-1);
}
XAxiVdma_Reset(&XAxiVdma0, XAXIVDMA_WRITE);
while(XAxiVdma_ResetNotDone(&XAxiVdma0, XAXIVDMA_WRITE)) ;
XAxiVdma0_Status = XAxiVdma_SetFrmStore(&XAxiVdma0, NUMBER_OF_WRITE_FRAMES, XAXIVDMA_WRITE); // Set the number of frame store buffers to use.
Vdma0_WriteCfg.VertSizeInput = VERTICAL_LINES;
Vdma0_WriteCfg.HoriSizeInput = HORIZONTAL_PIXELS * PIXEL_NUM_OF_BYTES;
Vdma0_WriteCfg.Stride = HORIZONTAL_PIXELS * PIXEL_NUM_OF_BYTES; // Indicates the number of address bytes between the first pixels of each video line.
Vdma0_WriteCfg.FrameDelay = 0; // Indicates the minimum number of frame buffers the Genlock slave is to be behind the locked master. This field is only used if the channel is enabled for Genlock Slave operations. This field has no meaning in other Genlock modes.
Vdma0_WriteCfg.EnableCircularBuf = 1; // Indicates frame buffer Circular mode or frame buffer Park mode. 1 = Circular Mode Engine continuously circles through frame buffers.
Vdma0_WriteCfg.EnableSync = 0; // Enables Genlock or Dynamic Genlock Synchronization. 0 = Genlock or Dynamic Genlock Synchronization disabled.
Vdma0_WriteCfg.PointNum = 0; // No Gen-Lock
Vdma0_WriteCfg.EnableFrameCounter = 0; // Endless transfers
Vdma0_WriteCfg.FixedFrameStoreAddr = 0; // We are not doing parking
XAxiVdma0_Status = XAxiVdma_DmaConfig(&XAxiVdma0, XAXIVDMA_WRITE, &Vdma0_WriteCfg);
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_DmaConfig() failed\n");
return(-1);
}
// Frame buffer address set
unsigned int frame_addr = (unsigned int)FRAME_BUFFER_ADDRESS;
for (i=0; i<NUMBER_OF_WRITE_FRAMES; i++){
Vdma0_WriteCfg.FrameStoreStartAddr[i] = frame_addr;
//frame_addr += HORIZONTAL_PIXELS * PIXEL_NUM_OF_BYTES * VERTICAL_LINES;
}
XAxiVdma0_Status = XAxiVdma_DmaSetBufferAddr(&XAxiVdma0, XAXIVDMA_WRITE, Vdma0_WriteCfg.FrameStoreStartAddr);
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_DmaSetBufferAddr() failed\n");
return(-1);
}
// Mnist_conv_nn, Square_frame_gen Initialize
XMnist_conv_nn_Initialize(&mcnn, 0);
XSquare_frame_gen_Initialize(&sf_gen, 0);
// square_frame_gen initialize
XSquare_frame_gen_Set_x_pos(&sf_gen, HORIZONTAL_PIXELS/2);
xval = HORIZONTAL_PIXELS/2;
XSquare_frame_gen_Set_y_pos(&sf_gen, VERTICAL_LINES/2);
yval = VERTICAL_LINES/2;
XSquare_frame_gen_Set_width(&sf_gen, 28);
XSquare_frame_gen_Set_height(&sf_gen, 28);
XSquare_frame_gen_Set_off_on(&sf_gen, 1); // on
// XSquare_frame_gen start
XSquare_frame_gen_DisableAutoRestart(&sf_gen);
while(!XSquare_frame_gen_IsIdle(&sf_gen)) ;
XSquare_frame_gen_Start(&sf_gen);
XSquare_frame_gen_EnableAutoRestart(&sf_gen);
// mnist_conv_nn initialize
XMnist_conv_nn_Set_addr_offset(&mcnn, HORIZONTAL_PIXELS/2);
XMnist_conv_nn_Set_in_r(&mcnn, FRAME_BUFFER_ADDRESS+HORIZONTAL_PIXELS*(VERTICAL_LINES/2)*sizeof(int));
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR+0x40), 0x80000000); // disable
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR+0x44), 0x0); // square_frame_gen enable
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR), 0x2); // Commit registers
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_0_BASEADDR+0x40), 0x1);
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_0_BASEADDR), 0x2); // Commit registers
// VDMA start
XAxiVdma0_Status = XAxiVdma_DmaStart(&XAxiVdma0, XAXIVDMA_WRITE);
if (XAxiVdma0_Status != XST_SUCCESS){
fprintf(stderr, "XAxiVdma_DmaStart() failed\n");
return(-1);
}
// mt9d111_inf_axis_0, axi_iic_0, bitmap_disp_cntrler_axi_master_0
volatile unsigned int *bmdc_axi_lites;
volatile unsigned int *mt9d111_axi_lites;
volatile unsigned int *mt9d111_i2c_axi_lites;
bmdc_axi_lites = (volatile unsigned *)XPAR_BITMAP_DISP_CNTRLER_AXI_MASTER_0_BASEADDR;
mt9d111_axi_lites = (volatile unsigned *)XPAR_CAMERA_INTERFACE_MT9D111_INF_AXIS_0_BASEADDR;
mt9d111_i2c_axi_lites = (volatile unsigned *)XPAR_CAMERA_INTERFACE_AXI_IIC_0_BASEADDR;
bmdc_axi_lites[0] = (volatile unsigned int)FRAME_BUFFER_ADDRESS; // Bitmap Display Controller start
mt9d111_axi_lites[0] = (volatile unsigned int)FRAME_BUFFER_ADDRESS; // Camera Interface start (Address is dummy)
// CMOS Camera initialize, MT9D111
cam_i2c_init(mt9d111_i2c_axi_lites);
cam_i2c_write(mt9d111_i2c_axi_lites, 0xba, 0xf0, 0x1); // Changed regster map to IFP page 1
cam_i2c_write(mt9d111_i2c_axi_lites, 0xba, 0x97, 0x20); // RGB Mode, RGB565
mt9d111_axi_lites[1] = 0; // One_shot_mode is disabled
// XMnist_conv_nn start
XMnist_conv_nn_DisableAutoRestart(&mcnn);
while(!XMnist_conv_nn_IsIdle(&mcnn));
XMnist_conv_nn_Start(&mcnn);
XMnist_conv_nn_EnableAutoRestart(&mcnn);
// AXI GPIO Initialization
XGpio_Status = XGpio_Initialize(&GPIOInstance_Ptr,XPAR_AXI_GPIO_0_DEVICE_ID);
if(XST_SUCCESS != XGpio_Status)
print("GPIO INIT FAILED\n\r");
// AXI GPIO Set the Direction(output setting)
XGpio_SetDataDirection(&GPIOInstance_Ptr, 1, 0);
while(1){
printf("mnist_conv_nn_test, <h> : left, <k> : up, <j> : down, <l> : right, <q> : exit\n");
inbyte_in = inbyte();
switch(inbyte_in) {
case 'h' : // left
if(xval > 0){
--xval;
XSquare_frame_gen_Set_x_pos(&sf_gen, xval);
XMnist_conv_nn_Set_addr_offset(&mcnn, xval);
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
}
break;
case 'l' : // right
if(xval < HORIZONTAL_PIXELS-28){
xval++;
XSquare_frame_gen_Set_x_pos(&sf_gen, xval);
XMnist_conv_nn_Set_addr_offset(&mcnn, xval);
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
}
break;
case 'k' : // up
if(yval > 0){
--yval;
XSquare_frame_gen_Set_y_pos(&sf_gen, yval);
XMnist_conv_nn_Set_in_r(&mcnn, FRAME_BUFFER_ADDRESS+HORIZONTAL_PIXELS*yval*sizeof(int));
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
}
break;
case 'j' : // down
if(yval < VERTICAL_LINES-28){
yval++;
XSquare_frame_gen_Set_y_pos(&sf_gen, yval);
XMnist_conv_nn_Set_in_r(&mcnn, FRAME_BUFFER_ADDRESS+HORIZONTAL_PIXELS*yval*sizeof(int));
printf("X_POS = %d, Y_POS = %d\n", xval, yval);
}
break;
case 'r' : // result check
result_disp = 1;
break;
case 'q' : // exit
return(0);
}
// mnist cnn result check
for(i=0; i<5; i++){
XMnist_conv_nn_Read_out_V_Words(&mcnn, i, &res, 1);
result[i*2] = res & 0x0fff;
if(result[i*2] & 0x800) // minus
result[i*2] = 0xfffff000 | result[i*2]; // Sign extension
result[i*2+1] = (res & 0x0fff0000) >> 16;
if(result[i*2+1] & 0x800) // minus
result[i*2+1] = 0xfffff000 | result[i*2+1]; // Sign extension
}
max_id = max_int(result);
XGpio_DiscreteWrite(&GPIOInstance_Ptr, 1, max_id);
if(result_disp){
for(i=0; i<10; i++){
printf("result[%d] = %x\n", i, result[i]);
}
printf("max_id = %d\n", max_id);
result_disp = 0;
}
}
}
int max_int(int out[10]){
int max_id;
int max, i;
for(i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
while(1){
print("********************** LED4 TEST Start ***********************\n\r");
print("TeraTerm: Please Set Local Echo Mode.\n\r");
print("Press '1' to show all registers\n\r");
print("Press '2' to set LED4 Enable or Disable(Toggle, Command Register)\n\r");
print("Press '3' to set LED Counter Load Register (4bits, Please input hexadecimal)\n\r");
print("Press '4' to set LED Interval Register (32bits, Please input decimal)\n\r");
print("Press '5' to exit\n\r");
print("Selection : ");
inbyte_in = inbyte();
print(" \r\n");
print(" \r\n");
switch(inbyte_in) {
case '1' : // Show all registers
val = (int)Xil_In32((u32)XPAR_LED4IP_0_S_AXI_BASEADDR);
printf("Command Register is %x\r\n", val);
val = (int)Xil_In32((u32)(XPAR_LED4IP_0_S_AXI_BASEADDR+4));
printf("LED Counter Load Register is %x\r\n", val);
val = (int)Xil_In32((u32)(XPAR_LED4IP_0_S_AXI_BASEADDR+8));
printf("LED Monitor Register is %x\r\n", val);
val = (int)Xil_In32((u32)(XPAR_LED4IP_0_S_AXI_BASEADDR+0xc));
printf("LED Interval Register is %d (decimal)\r\n", val);
break;
case '2' : // Set LED4 Enable or Disable(Toggle, Command Register)
// mnist_conv_nn10_sDMA.cpp
// 2017/06/12 by marsee
// 畳み込み層のカーネル数 10
// 2017/06/29 : アドレスオフセット導入 800x600 画像中の 28x28 を切り取ってDMAする
// | アドレスオフセット |
// *************************-手書き数字1行目-****************
// *************************-手書き数字2行目-****************
//
#include <ap_fixed.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> conv_rgb2y(int rgb);
int mnist_conv_nn(int in[22400], int addr_offset, ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]){
#pragma HLS INTERFACE s_axilite port=addr_offset
#pragma HLS INTERFACE s_axilite register port=out
#pragma HLS INTERFACE m_axi depth=22400 port=in offset=slave
#pragma HLS INTERFACE s_axilite port=return
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[28][28];
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> conv_out[10][24][24];
ap_fixed<10, 3, AP_TRN_ZERO, AP_SAT> pool_out[10][12][12];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot2[10];
buf_copy1: for(int i=0; i<28; i++){
buf_copy2: for(int j=0; j<800; j++){
#pragma HLS PIPELINE II=1
if (j>=addr_offset && j<addr_offset+28)
buf[i][j-addr_offset] = (ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)0.99609375 - conv_rgb2y(in[i*800+j]);
// 1.0 にならないように 1/256を引いておく
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<10; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<10; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<10; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
#pragma HLS PIPELINE II=3
dot1[col] += pool_out[i][j][k]*af1_weight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
out[col] = dot2[col];
}
return(0);
}
// RGBからYへの変換
// RGBのフォーマットは、{8'd0, R(8bits), G(8bits), B(8bits)}, 1pixel = 32bits
// 輝度信号Yのみに変換する。変換式は、Y = 0.299R + 0.587G + 0.114B
// "YUVフォーマット及び YUV<->RGB変換"を参考にした。http://vision.kuee.kyoto-u.ac.jp/~hiroaki/firewire/yuv.html
// 2013/09/27 : float を止めて、すべてint にした
// 2017/06/30 : ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> 出力とした
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> conv_rgb2y(int rgb){
int r, g, b, y_f;
int y;
ap_ufixed<16, 8, AP_TRN_ZERO, AP_SAT> y_ap_ufixed;
b = rgb & 0xff;
g = (rgb>>8) & 0xff;
r = (rgb>>16) & 0xff;
y_f = 77*r + 150*g + 29*b; //y_f = 0.299*r + 0.587*g + 0.114*b;の係数に256倍した
y = y_f >> 8; // 256で割る
if (y >= 256)
y = 255;
y_ap_ufixed = (ap_ufixed<16, 8, AP_TRN_ZERO, AP_SAT>)y / 256;
return((ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)y_ap_ufixed);
}
// mnist_conv_nn_sDMA_tb.cpp
// 2017/06/14 by marsee
// 畳み込み層のカーネル数 10
// 2017/06/29 : ストライドDMAのためのテストベンチ
//
#include <stdio.h>
#include <ap_fixed.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#include "bmp_header.h"
int mnist_conv_nn(int in[22400], int addr_offset, ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]);
int mnist_conv_nn_float(int in[22400], int addr_offset, float out[10]);
int max_ap_fixed(ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]);
int max_float(float out[10]);
float conv_rgb2y_soft(int rgb);
#define READ_BMP_FILE_NAME "bmp_file0.bmp"
// 8
#define X_POS 560
#define Y_POS 183
// 7
//#define X_POS 504
//#define Y_POS 184
// 5
//#define X_POS 390
//#define Y_POS 138
// 0
//#define X_POS 390
//#define Y_POS 70
#define WIDTH 28
#define HEIGHT 28
int main(){
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> result_ap_fixed[10];
float result_float[10];
int max_id_hw, max_id_sw, max_id_ref;
int *in;
int *inf;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr;
int *rd_bmp;
int blue, green, red;
if ((fbmpr = fopen(READ_BMP_FILE_NAME, "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open ");
fprintf(stderr, READ_BMP_FILE_NAME);
fprintf(stderr, " by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfSize, sizeof(uint32_t), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(uint16_t), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(uint32_t), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
if ((in =(int *)malloc(sizeof(int) * (800 * 28))) == NULL){
fprintf(stderr, "Can't allocate (ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)in memory\n");
exit(1);
}
if ((inf =(int *)malloc(sizeof(int) * (800 * 28))) == NULL){
fprintf(stderr, "Can't allocate (float)inf memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// rd_bmp を in と inf に入力
for (int y=Y_POS; y<Y_POS+HEIGHT; y++){
for (int x=0; x<bmpihr.biWidth; x++){
in[(y-Y_POS)*bmpihr.biWidth+x] = rd_bmp[y*bmpihr.biWidth+x];
inf[(y-Y_POS)*bmpihr.biWidth+x] = rd_bmp[y*bmpihr.biWidth+x];
}
}
mnist_conv_nn(in, X_POS, result_ap_fixed);
mnist_conv_nn_float(inf, X_POS, result_float);
max_id_hw = max_ap_fixed(result_ap_fixed);
max_id_sw = max_float(result_float);
printf("max_id_hw = %d\n", max_id_hw);
printf("max_id_sw = %d\n", max_id_sw);
return(0);
}
int mnist_conv_nn_float(int in[22400], int addr_offset, float out[10]){
float buf[28][28];
float conv_out[10][24][24];
float pool_out[10][12][12];
float dot1[100];
float dot2[10];
// 手書き数字の値を表示
for (int i=0; i<28; i++){
for (int j=0; j<800; j++){
if (j>=addr_offset && j<addr_offset+28)
printf("%2x, ", (int)(conv_rgb2y_soft(in[i*800+j])*256.0));
}
printf("\n");
}
buf_copy1: for(int i=0; i<28; i++){
buf_copy2: for(int j=0; j<800; j++){
if (j>=addr_offset && j<addr_offset+28)
buf[i][j-addr_offset] = (float)0.99609375 - (float)conv_rgb2y_soft(in[i*800+j]);
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<10; i++){ // カーネルの個数
CONV2: for(int j=0; j<24; j++){
CONV3: for(int k=0; k<24; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_fweight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_fbias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<10; i++){
POOL2: for(int j=0; j<24; j += 2){
POOL3: for(int k=0; k<24; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<10; i++){
af1_dot3: for(int j=0; j<12; j++){
af1_dot4: for(int k=0; k<12; k++){
dot1[col] += pool_out[i][j][k]*af1_fweight[i*12*12+j*12+k][col];
}
}
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<10; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
out[col] = dot2[col];
}
return(0);
}
int max_ap_fixed(ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[10]){
int max_id;
ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
int max_float(float out[10]){
int max_id;
float max;
for(int i=0; i<10; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
// RGBからYへの変換
// RGBのフォーマットは、{8'd0, R(8bits), G(8bits), B(8bits)}, 1pixel = 32bits
// 輝度信号Yのみに変換する。変換式は、Y = 0.299R + 0.587G + 0.114B
// "YUVフォーマット及び YUV<->RGB変換"を参考にした。http://vision.kuee.kyoto-u.ac.jp/~hiroaki/firewire/yuv.html
// 2013/09/27 : float を止めて、すべてint にした
// 2017/06/30 : retval を float にした
float conv_rgb2y_soft(int rgb){
int r, g, b, y_f;
int y;
float y_float;
b = rgb & 0xff;
g = (rgb>>8) & 0xff;
r = (rgb>>16) & 0xff;
y_f = 77*r + 150*g + 29*b; //y_f = 0.299*r + 0.587*g + 0.114*b;の係数に256倍した
y = y_f >> 8; // 256で割る
if (y >= 256)
y = 255;
y_float = (float)y/256.0;
return(y_float);
}
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | - | - | 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | 31 | - | - | - | - | - |