/*
* Device Tree for Zybo Z7 board
* Partially generated by Device Tree Generator 1.1
*
* (C) Copyright 2007-2013 Xilinx, Inc.
* (C) Copyright 2007-2013 Michal Simek
* (C) Copyright 2007-2012 PetaLogix Qld Pty Ltd
* (C) Copyright 2014 Digilent, Inc.
*
* Michal SIMEK <monstr@monstr.eu>
* Tinghui Wang <steven.wang@digilentinc.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
/* for ZYBO Z7 by marsee 2017/09/29 */
/dts-v1/;
/ {
#address-cells = <1>;
#size-cells = <1>;
compatible = "xlnx,zynq-7000";
model = "Xilinx Zynq";
aliases {
ethernet0 = &ps7_ethernet_0;
serial0 = &ps7_uart_1;
spi0 = &ps7_qspi_0;
} ;
chosen {
/* bootargs = "console=ttyPS0,115200 root=/dev/ram rw earlyprintk"; */
bootargs = "console=ttyPS0,115200 root=/dev/mmcblk0p2 rw earlyprintk rootfstype=ext4 rootwait devtmpfs.mount=1 coherent_pool=16M";
linux,stdout-path = "/amba@0/serial@e0001000";
} ;
cpus {
#address-cells = <1>;
#size-cells = <0>;
ps7_cortexa9_0: cpu@0 {
bus-handle = <&ps7_axi_interconnect_0>;
clock-latency = <1000>;
clocks = <&clkc 3>;
compatible = "arm,cortex-a9";
device_type = "cpu";
interrupt-handle = <&ps7_scugic_0>;
operating-points = <666667 1000000 333334 1000000 >;
/* operating-points = <650000 1000000 >; */
reg = <0x0>;
} ;
ps7_cortexa9_1: cpu@1 {
bus-handle = <&ps7_axi_interconnect_0>;
clocks = <&clkc 3>;
compatible = "arm,cortex-a9";
device_type = "cpu";
interrupt-handle = <&ps7_scugic_0>;
reg = <0x1>;
} ;
} ;
pmu {
compatible = "arm,cortex-a9-pmu";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 5 4>, <0 6 4>;
reg = <0xf8891000 0x1000>, <0xf8893000 0x1000>;
reg-names = "cpu0", "cpu1";
} ;
ps7_ddr_0: memory@0 {
device_type = "memory";
reg = <0x0 0x40000000>;
} ;
ps7_axi_interconnect_0: amba@0 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "xlnx,ps7-axi-interconnect-1.00.a", "simple-bus";
ranges ;
ps7_afi_0: ps7-afi@f8008000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf8008000 0x1000>;
} ;
ps7_afi_1: ps7-afi@f8009000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf8009000 0x1000>;
} ;
ps7_afi_2: ps7-afi@f800a000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf800a000 0x1000>;
} ;
ps7_afi_3: ps7-afi@f800b000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf800b000 0x1000>;
} ;
ps7_ddrc_0: ps7-ddrc@f8006000 {
compatible = "xlnx,zynq-ddrc-1.0";
reg = <0xf8006000 0x1000>;
xlnx,has-ecc = <0x0>;
} ;
ps7_dev_cfg_0: ps7-dev-cfg@f8007000 {
clock-names = "ref_clk", "fclk0", "fclk1", "fclk2", "fclk3";
clocks = <&clkc 12>, <&clkc 15>, <&clkc 16>, <&clkc 17>, <&clkc 18>;
compatible = "xlnx,zynq-devcfg-1.0";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 8 4>;
reg = <0xf8007000 0x100>;
} ;
ps7_dma_s: ps7-dma@f8003000 {
#dma-cells = <1>;
#dma-channels = <8>;
#dma-requests = <4>;
clock-names = "apb_pclk";
clocks = <&clkc 27>;
compatible = "arm,primecell", "arm,pl330";
interrupt-names = "abort", "dma0", "dma1", "dma2", "dma3",
"dma4", "dma5", "dma6", "dma7";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 13 4>, <0 14 4>, <0 15 4>, <0 16 4>, <0 17 4>, <0 40 4>, <0 41 4>, <0 42 4>, <0 43 4>;
reg = <0xf8003000 0x1000>;
} ;
ps7_ethernet_0: ps7-ethernet@e000b000 {
#address-cells = <1>;
#size-cells = <0>;
clock-names = "ref_clk", "aper_clk";
clocks = <&clkc 13>, <&clkc 30>;
compatible = "xlnx,ps7-ethernet-1.00.a";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 22 4>;
phy-handle = <&phy0>;
phy-mode = "rgmii-id";
reg = <0xe000b000 0x1000>;
xlnx,eth-mode = <0x1>;
xlnx,has-mdio = <0x1>;
xlnx,ptp-enet-clock = <108333336>;
mdio {
#address-cells = <1>;
#size-cells = <0>;
phy0: phy@1 {
compatible = "realtek,RTL8211E";
device_type = "ethernet-phy";
reg = <1>;
} ;
} ;
} ;
ps7_globaltimer_0: ps7-globaltimer@f8f00200 {
clocks = <&clkc 4>;
compatible = "arm,cortex-a9-global-timer";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <1 11 0x301>;
reg = <0xf8f00200 0x100>;
} ;
ps7_gpio_0: ps7-gpio@e000a000 {
#gpio-cells = <2>;
clocks = <&clkc 42>;
compatible = "xlnx,zynq-gpio-1.0";
emio-gpio-width = <64>;
gpio-controller ;
gpio-mask-high = <0xc0000>;
gpio-mask-low = <0xfe81>;
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 20 4>;
reg = <0xe000a000 0x1000>;
} ;
ps7_iop_bus_config_0: ps7-iop-bus-config@e0200000 {
compatible = "xlnx,ps7-iop-bus-config-1.00.a";
reg = <0xe0200000 0x1000>;
} ;
ps7_ocmc_0: ps7-ocmc@f800c000 {
compatible = "xlnx,zynq-ocmc-1.0";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 3 4>;
reg = <0xf800c000 0x1000>;
} ;
ps7_pl310_0: ps7-pl310@f8f02000 {
arm,data-latency = <3 2 2>;
arm,tag-latency = <2 2 2>;
cache-level = <2>;
cache-unified ;
compatible = "arm,pl310-cache";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 2 4>;
reg = <0xf8f02000 0x1000>;
} ;
ps7_qspi_0: ps7-qspi@e000d000 {
clock-names = "ref_clk", "pclk";
clocks = <&clkc 10>, <&clkc 43>;
compatible = "xlnx,zynq-qspi-1.0";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 19 4>;
is-dual = <0>;
num-cs = <1>;
reg = <0xe000d000 0x1000>;
xlnx,fb-clk = <0x1>;
xlnx,qspi-mode = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
flash@0 {
compatible = "n25q128";
reg = <0x0>;
spi-tx-bus-width = <1>;
spi-rx-bus-width = <4>;
spi-max-frequency = <50000000>;
#address-cells = <1>;
#size-cells = <1>;
partition@qspi-fsbl-uboot {
label = "qspi-fsbl-uboot";
reg = <0x0 0x400000>;
};
partition@qspi-linux {
label = "qspi-linux";
reg = <0x400000 0x500000>;
};
partition@qspi-device-tree {
label = "qspi-device-tree";
reg = <0x900000 0x20000>;
};
partition@qspi-user {
label = "qspi-user";
reg = <0x920000 0x6E0000>;
};
};
} ;
ps7_qspi_linear_0: ps7-qspi-linear@fc000000 {
clock-names = "ref_clk", "aper_clk";
clocks = <&clkc 10>, <&clkc 43>;
compatible = "xlnx,ps7-qspi-linear-1.00.a";
reg = <0xfc000000 0x1000000>;
} ;
ps7_scugic_0: ps7-scugic@f8f01000 {
#address-cells = <2>;
#interrupt-cells = <3>;
#size-cells = <1>;
compatible = "arm,cortex-a9-gic", "arm,gic";
interrupt-controller ;
num_cpus = <2>;
num_interrupts = <96>;
reg = <0xf8f01000 0x1000>, <0xf8f00100 0x100>;
} ;
ps7_scutimer_0: ps7-scutimer@f8f00600 {
clocks = <&clkc 4>;
compatible = "arm,cortex-a9-twd-timer";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <1 13 0x301>;
reg = <0xf8f00600 0x20>;
} ;
ps7_scuwdt_0: ps7-scuwdt@f8f00620 {
clocks = <&clkc 4>;
compatible = "xlnx,ps7-scuwdt-1.00.a";
device_type = "watchdog";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <1 14 0x301>;
reg = <0xf8f00620 0xe0>;
} ;
ps7_sd_0: ps7-sdio@e0100000 {
clock-frequency = <50000000>;
clock-names = "clk_xin", "clk_ahb";
clocks = <&clkc 21>, <&clkc 32>;
compatible = "arasan,sdhci-8.9a";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 24 4>;
reg = <0xe0100000 0x1000>;
xlnx,has-cd = <0x1>;
xlnx,has-power = <0x0>;
xlnx,has-wp = <0x1>;
} ;
ps7_slcr_0: ps7-slcr@f8000000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "xlnx,zynq-slcr", "syscon";
ranges ;
reg = <0xf8000000 0x1000>;
clkc: clkc@100 {
#clock-cells = <1>;
clock-output-names = "armpll", "ddrpll", "iopll", "cpu_6or4x", "cpu_3or2x",
"cpu_2x", "cpu_1x", "ddr2x", "ddr3x", "dci",
"lqspi", "smc", "pcap", "gem0", "gem1",
"fclk0", "fclk1", "fclk2", "fclk3", "can0",
"can1", "sdio0", "sdio1", "uart0", "uart1",
"spi0", "spi1", "dma", "usb0_aper", "usb1_aper",
"gem0_aper", "gem1_aper", "sdio0_aper", "sdio1_aper", "spi0_aper",
"spi1_aper", "can0_aper", "can1_aper", "i2c0_aper", "i2c1_aper",
"uart0_aper", "uart1_aper", "gpio_aper", "lqspi_aper", "smc_aper",
"swdt", "dbg_trc", "dbg_apb";
compatible = "xlnx,ps7-clkc";
fclk-enable = <0xf>;
ps-clk-frequency = <50000000>;
reg = <0x100 0x100>;
} ;
} ;
ps7_ttc_0: ps7-ttc@f8001000 {
clocks = <&clkc 6>;
compatible = "cdns,ttc";
interrupt-names = "ttc0", "ttc1", "ttc2";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 10 4>, <0 11 4>, <0 12 4>;
reg = <0xf8001000 0x1000>;
} ;
ps7_uart_1: serial@e0001000 {
clock-names = "uart_clk", "pclk";
clocks = <&clkc 24>, <&clkc 41>;
compatible = "xlnx,xuartps", "cdns,uart-r1p8";
current-speed = <115200>;
device_type = "serial";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 50 4>;
port-number = <0>;
reg = <0xe0001000 0x1000>;
xlnx,has-modem = <0x0>;
} ;
ps7_usb_0: ps7-usb@e0002000 {
clocks = <&clkc 28>;
compatible = "xlnx,ps7-usb-1.00.a", "xlnx,zynq-usb-1.00.a";
dr_mode = "host";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 21 4>;
phy_type = "ulpi";
reg = <0xe0002000 0x1000>;
xlnx,usb-reset = "MIO 46";
} ;
ps7_xadc: ps7-xadc@f8007100 {
clocks = <&clkc 12>;
compatible = "xlnx,zynq-xadc-1.00.a";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 7 4>;
reg = <0xf8007100 0x20>;
} ;
mt9d111_axi_iic@0x41600000 {
compatible = "generic-uio";
reg = < 0x41600000 0x10000>;
};
dmaw4gabor_0@43cb0000 {
compatible = "generic-uio";
reg = < 0x43cb0000 0x10000 >;
};
axis_switch_0@0x43c10000 {
compatible = "generic-uio";
reg = < 0x43c10000 0x10000 >;
};
axis_switch_1@0x43c20000 {
compatible = "generic-uio";
reg = < 0x43c20000 0x10000 >;
};
lap_filter_axis_0@0x43c30000 {
compatible = "generic-uio";
reg = < 0x43c30000 0x10000>;
};
mt9d111_inf_axis_0@0x43C40000 {
compatible = "generic-uio";
reg = < 0x43C40000 0x10000>;
};
bitmap_disp_cntrler_axi_master_0@0x43c00000 {
compatible = "generic-uio";
reg = < 0x43c00000 0x10000>;
};
bitmap_disp_cntrler_axi_master_1@0x43c50000 {
compatible = "generic-uio";
reg = < 0x43c50000 0x10000>;
};
axi_gpio_0@0x41200000 {
compatible = "generic-uio";
reg = < 0x41200000 0x10000>;
};
frame_buffer_bmdc@0x17800000 {
compatible = "generic-uio";
reg = < 0x17800000 0x1000000>;
};
pwm_0@0x43c60000 {
compatible = "generic-uio";
reg = < 0x43c60000 0x10000>;
};
pwm_1@0x43c70000 {
compatible = "generic-uio";
reg = < 0x43c70000 0x10000>;
};
motor_monitor_0@0x43c80000 {
compatible = "generic-uio";
reg = < 0x43c80000 0x10000>;
};
motor_monitor_1@0x43c90000 {
compatible = "generic-uio";
reg = < 0x43c90000 0x10000>;
};
dmar4resize_gray_0@0x43ca0000 {
compatible = "generic-uio";
reg = < 0x43ca0000 0x10000>;
};
rgb2hsv_0@0x43cc0000 {
compatible = "generic-uio";
reg = < 0x43cc0000 0x10000>;
};
ultrasoninc_sensor_inf_0@0x43cd0000 {
compatible = "generic-uio";
reg = < 0x43cd0000 0x10000>;
};
resize_gray_0@0x43ce0000 {
compatible = "generic-uio";
reg = < 0x43ce0000 0x10000>;
};
straight_conv_nn2_axis2_0@0x43cf0000 {
compatible = "generic-uio";
reg = < 0x43cf0000 0x10000>;
};
} ;
} ;
// wl_traceing_cnn.cpp
// 2017/09/18 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
#include <string.h>
#include "xpwm.h"
#include "xdmar4resize_gray.h"
#include "xresize_gray.h"
#include "xstraight_conv_nn2_axis2.h"
#define CMA_START_ADDRESS 0x17800000
#define VIDEO_BUFFER_START_ADDRESS 0x18000000 // Limit 0x18800000, 800*600*4 = 2MBytes * 2
#define HORIZONTAL_PIXEL 800
#define ALL_CHAR_OF_1LINE (HORIZONTAL_PIXEL/8)
#define VERTICAL_PIXEL 600
#define ALL_CHAR_OF_ROW (VERTICAL_PIXEL/8)
#define ALL_DISP_ADDRESS (HORIZONTAL_PIXEL*VERTICAL_PIXEL*4)
#define ALL_DISP_CHARACTOR (HORIZONTAL_PIXEL*VERTICAL_PIXEL)
#define DEBUG
//#define MOTOR_OFF
void cam_i2c_init(volatile unsigned *mt9d111_axi_iic) {
mt9d111_axi_iic[64] = 0x2; // reset tx fifo ,address is 0x100, i2c_control_reg
mt9d111_axi_iic[64] = 0x1; // enable i2c
}
void cam_i2x_write_sync(void) {
// unsigned c;
// c = *cam_i2c_rx_fifo;
// while ((c & 0x84) != 0x80)
// c = *cam_i2c_rx_fifo; // No Bus Busy and TX_FIFO_Empty = 1
usleep(1000);
}
void cam_i2c_write(volatile unsigned *mt9d111_axi_iic, unsigned int device_addr, unsigned int write_addr, unsigned int write_data){
mt9d111_axi_iic[66] = 0x100 | (device_addr & 0xfe); // Slave IIC Write Address, address is 0x108, i2c_tx_fifo
mt9d111_axi_iic[66] = write_addr;
mt9d111_axi_iic[66] = (write_data >> 8)|0xff; // first data
mt9d111_axi_iic[66] = 0x200 | (write_data & 0xff); // second data
cam_i2x_write_sync();
}
// Motor
//
void motor_settings(XPwm *motorLp, XPwm *motorRp){
XPwm_DisableAutoRestart(motorLp);
while(!XPwm_IsIdle(motorLp)) ;
XPwm_Start(motorLp);
XPwm_EnableAutoRestart(motorLp);
XPwm_DisableAutoRestart(motorRp);
while(!XPwm_IsIdle(motorRp)) ;
XPwm_Start(motorRp);
XPwm_EnableAutoRestart(motorRp);
}
void Stopped_Zybot(XPwm *motorLp, XPwm *motorRp){
XPwm_Set_sw_late_V(motorLp, 0);
XPwm_Set_sw_late_V(motorRp, 0);
}
int main()
{
int fd0, fd1, fd2, fd3, fd4, fd5, fd6, fd7, fd8, fd9, fd10;
volatile unsigned *bmdc_axi_lites0, *bmdc_axi_lites1;
volatile unsigned *dmaw4gabor_0;
volatile unsigned *axis_switch_0, *axis_switch_1;
volatile unsigned *mt9d111_inf_axis_0;
volatile unsigned *mt9d111_axi_iic;
volatile unsigned *axi_gpio_0;
volatile unsigned *frame_buffer_bmdc;
char attr[1024];
unsigned long phys_addr;
int i;
XDmar4resize_gray xdmar;
XResize_gray resg;
XStraight_conv_nn2_axis2 stcnn;
XPwm motorL, motorR;
XPwm *motorLp, *motorRp;
motorLp = &motorL;
motorRp = &motorR;
// Bitmap Display Controller 0 AXI4 Lite Slave (UIO6)
fd6 = open("/dev/uio6", O_RDWR); // bitmap_display_controller 0 axi4 lite
if (fd6 < 1){
fprintf(stderr, "/dev/uio6 (bitmap_disp_cntrler_axi_master_0) open error\n");
exit(-1);
}
bmdc_axi_lites0 = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd6, 0);
if (!bmdc_axi_lites0){
fprintf(stderr, "bmdc_axi_lites0 mmap error\n");
exit(-1);
}
// Bitmap Display Controller 1 AXI4 Lite Slave (UIO7)
fd7 = open("/dev/uio7", O_RDWR); // bitmap_display_controller axi4 lite
if (fd7 < 1){
fprintf(stderr, "/dev/uio7 (bitmap_disp_cntrler_axi_master_0) open error\n");
exit(-1);
}
bmdc_axi_lites1 = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd7, 0);
if (!bmdc_axi_lites1){
fprintf(stderr, "bmdc_axi_lites1 mmap error\n");
exit(-1);
}
// dmaw4gabor_0 (UIO1)
fd1 = open("/dev/uio1", O_RDWR); // dmaw4gabor_0 interface AXI4 Lite Slave
if (fd1 < 1){
fprintf(stderr, "/dev/uio1 (dmaw4gabor_0) open error\n");
exit(-1);
}
dmaw4gabor_0 = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0);
if (!dmaw4gabor_0){
fprintf(stderr, "dmaw4gabor_0 mmap error\n");
exit(-1);
}
// mt9d111 i2c AXI4 Lite Slave (UIO0)
fd0 = open("/dev/uio0", O_RDWR); // mt9d111 i2c AXI4 Lite Slave
if (fd0 < 1){
fprintf(stderr, "/dev/uio0 (mt9d111_axi_iic) open error\n");
exit(-1);
}
mt9d111_axi_iic = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0);
if (!mt9d111_axi_iic){
fprintf(stderr, "mt9d111_axi_iic mmap error\n");
exit(-1);
}
// mt9d111 inf axis AXI4 Lite Slave (UIO5)
fd5 = open("/dev/uio5", O_RDWR); // mt9d111 inf axis AXI4 Lite Slave
if (fd5 < 1){
fprintf(stderr, "/dev/uio5 (mt9d111_inf_axis_0) open error\n");
exit(-1);
}
mt9d111_inf_axis_0 = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd5, 0);
if (!mt9d111_inf_axis_0){
fprintf(stderr, "mt9d111_inf_axis_0 mmap error\n");
exit(-1);
}
// axis_switch_0 (UIO2)
fd2 = open("/dev/uio2", O_RDWR); // axis_switch_0 interface AXI4 Lite Slave
if (fd2 < 1){
fprintf(stderr, "/dev/uio2 (axis_switch_0) open error\n");
exit(-1);
}
axis_switch_0 = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd2, 0);
if (!axis_switch_0){
fprintf(stderr, "axis_switch_0 mmap error\n");
exit(-1);
}
// axis_switch_1 (UIO3)
fd3 = open("/dev/uio3", O_RDWR); // axis_switch_1 interface AXI4 Lite Slave
if (fd3 < 1){
fprintf(stderr, "/dev/uio3 (axis_switch_1) open error\n");
exit(-1);
}
axis_switch_1 = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd3, 0);
if (!axis_switch_1){
fprintf(stderr, "axis_switch_1 mmap error\n");
exit(-1);
}
// axi_gpio_0 (UIO8)
fd8 = open("/dev/uio8", O_RDWR); // axi_gpio_0 interface AXI4 Lite Slave
if (fd8 < 1){
fprintf(stderr, "/dev/uio8 (axi_gpio_0) open error\n");
exit(-1);
}
axi_gpio_0 = (volatile unsigned *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd8, 0);
if (!axi_gpio_0){
fprintf(stderr, "axi_gpio_8 mmap error\n");
exit(-1);
}
// udmabuf0
fd9 = open("/dev/udmabuf0", O_RDWR | O_SYNC); // frame_buffer, The chache is disabled.
if (fd9 == -1){
fprintf(stderr, "/dev/udmabuf0 open error\n");
exit(-1);
}
frame_buffer_bmdc = (volatile unsigned *)mmap(NULL, 5760000, PROT_READ|PROT_WRITE, MAP_SHARED, fd9, 0);
if (!frame_buffer_bmdc){
fprintf(stderr, "frame_buffer_bmdc mmap error\n");
exit(-1);
}
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_1[16] = 0x0; // 0x40 = 0
axis_switch_1[17] = 0x80000000; // 0x44 = 0x80000000, disable
axis_switch_1[18] = 0x80000000; // 0x48 = 0x80000000, disable
axis_switch_1[19] = 0x80000000; // 0x4C = 0x80000000, disable
axis_switch_1[0] = 0x2; // Comit registers
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_0[16] = 0x0; // 0x40 = 0;
axis_switch_0[0] = 0x2; // Comit registers
// phys_addr of udmabuf0
fd10 = open("/sys/devices/virtual/udmabuf/udmabuf0/phys_addr", O_RDONLY);
if (fd10 == -1){
fprintf(stderr, "/sys/devices/virtual/udmabuf/udmabuf0/phys_addr open error\n");
exit(-1);
}
read(fd10, attr, 1024);
sscanf(attr, "%lx", &phys_addr);
close(fd10);
printf("phys_addr = %x\n", (int)phys_addr);
// DMAW4Gabor Initialization sequence
dmaw4gabor_0[6] = (unsigned int)phys_addr; // Data signal of frame_buffer0
dmaw4gabor_0[8] = (unsigned int)phys_addr+ALL_DISP_ADDRESS; // Data signal of frame_buffer1
dmaw4gabor_0[0] = 0x1; // ap_start = 1
dmaw4gabor_0[0] = 0x80; // auto_restart = 1
// bitmap display controller settings
bmdc_axi_lites0[0] = (unsigned int)phys_addr; // Bitmap Display Controller 0 start
bmdc_axi_lites1[0] = (unsigned int)phys_addr; // Bitmap Display Controller 1 start
mt9d111_inf_axis_0[0] = (unsigned int)phys_addr; // Camera Interface start (Address is dummy)
// CMOS Camera initialize, MT9D111
cam_i2c_init(mt9d111_axi_iic);
cam_i2c_write(mt9d111_axi_iic, 0xba, 0xf0, 0x1); // Changed regster map to IFP page 1
cam_i2c_write(mt9d111_axi_iic, 0xba, 0x97, 0x20); // RGB Mode, RGB565
mt9d111_inf_axis_0[1] = 0;
// Initialization of xdmar4resize_gray, xresize_gray, xstraight_conv_nn2_axis2
if(XDmar4resize_gray_Initialize(&xdmar, "dmar4resize_gray_0") != XST_SUCCESS){
fprintf(stderr, "dmar4resize_gray_0 open error\n");
exit(-1);
}
if(XResize_gray_Initialize(&resg, "resize_gray_0") != XST_SUCCESS){
fprintf(stderr, "resize_gray_0 open error\n");
exit(-1);
}
if(XStraight_conv_nn2_axis2_Initialize(&stcnn, "straight_conv_nn2_axis2_0") != XST_SUCCESS){
fprintf(stderr, "straight_conv_nn2_axis2 open error\n");
exit(-1);
}
XDmar4resize_gray_Set_frame_buffer0(&xdmar ,(unsigned int)phys_addr);
XDmar4resize_gray_Set_frame_buffer1(&xdmar ,(unsigned int)phys_addr+ALL_DISP_ADDRESS);
XDmar4resize_gray_Start(&xdmar);
XDmar4resize_gray_EnableAutoRestart(&xdmar);
XResize_gray_Start(&resg);
XResize_gray_EnableAutoRestart(&resg);
XStraight_conv_nn2_axis2_Start(&stcnn);
XStraight_conv_nn2_axis2_EnableAutoRestart(&stcnn);
// Initialization of motor
if (XPwm_Initialize(motorLp, "pwm_0") != XST_SUCCESS){
fprintf(stderr,"pwm_0 (Left) open error\n");
exit(-1);
}
if (XPwm_Initialize(motorRp, "pwm_1") != XST_SUCCESS){
fprintf(stderr,"pwm_1 (Right) open error\n");
exit(-1);
}
// The Motors is rotated in the forward direction.
XPwm_Set_sw_late_V(motorLp, 0);
XPwm_Set_dir_V(motorLp, 1);
XPwm_Set_sw_late_V(motorRp, 0);
XPwm_Set_dir_V(motorRp, 0);
motor_settings(motorLp, motorRp);
// main loop
printf("White line Tracking start. \n");
while(1){
usleep(10000); // 10 ms Wait
while(!XStraight_conv_nn2_axis2_Get_outs_V_vld(&stcnn)) ;
switch((int)XStraight_conv_nn2_axis2_Get_outs_V(&stcnn)){
case 0 : // left turn
#ifndef MOTOR_OFF
XPwm_Set_sw_late_V(&motorL, 15);
XPwm_Set_sw_late_V(&motorR, 25);
#endif
#ifdef DEBUG
printf("Left turn\n"); fflush(stdout);
#endif
break;
case 1 : // straight
#ifndef MOTOR_OFF
XPwm_Set_sw_late_V(&motorL, 22);
XPwm_Set_sw_late_V(&motorR, 22);
#endif
#ifdef DEBUG
printf("Go straight\n"); fflush(stdout);
#endif
break;
default : // 2, right turn
#ifndef MOTOR_OFF
XPwm_Set_sw_late_V(&motorL, 25);
XPwm_Set_sw_late_V(&motorR, 15);
#endif
#ifdef DEBUG
printf("Right turn\n"); fflush(stdout);
#endif
}
}
munmap((void *)bmdc_axi_lites0, 0x10000);
munmap((void *)bmdc_axi_lites1, 0x10000);
munmap((void *)dmaw4gabor_0, 0x10000);
munmap((void *)mt9d111_inf_axis_0, 0x10000);
munmap((void *)mt9d111_axi_iic, 0x10000);
munmap((void *)axis_switch_0, 0x10000);
munmap((void *)axis_switch_1, 0x10000);
munmap((void *)axi_gpio_0, 0x10000);
munmap((void *)frame_buffer_bmdc, 576000);
close(fd0);
close(fd1);
close(fd2);
close(fd3);
close(fd4);
close(fd5);
close(fd6);
close(fd7);
close(fd8);
close(fd9);
return(0);
}
# Makefile(wl_traceing_cnn)
# Referred to http://www.ie.u-ryukyu.ac.jp/~e085739/c.makefile.tuts.html
PROGRAM = wl_traceing_cnn
OBJS = wl_traceing_cnn.o xpwm_linux.o xpwm.o xdmar4resize_gray_linux.o xdmar4resize_gray.o xresize_gray_linux.o xresize_gray.o xstraight_conv_nn2_axis2_linux.o xstraight_conv_nn2_axis2.o
CC = gcc
CFLAGS = -Wall -O2
.SUFFIXES: .c .o
.PHONY: all
all: wl_traceing_cnn
wl_traceing_cnn: $(OBJS)
$(CC) -Wall -o $@ $(OBJS)
.c.o:
$(CC) $(CFLAGS) -c $<
.PHONY: clean
clean:
$(RM) $(PROGRAM) $(OBJS)
/* * wl_tracing_cnn.c * * Created on: 2017/09/18 * Author: Masaaki */
// ベアメタル・アプリケーションでは、CNN がきちんと動作しているを確認するのみとする
// 2017/09/19 : 直進、左旋回、右旋回の各出力の値を表示するように変更
//
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include "xil_io.h"
#include "xparameters.h"
#include "sleep.h"
#include "xdmaw4gabor.h"
#include "xdmar4resize_gray.h"
#include "xresize_gray.h"
#include "xstraight_conv_nn2_axis2.h"
#define NUMBER_OF_WRITE_FRAMES 3 // Note: If not at least 3 or more, the image is not displayed in succession.
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
#define PIXEL_NUM_OF_BYTES 4
#define FRAME_BUFFER_ADDRESS 0x10000000
#define ALL_DISP_ADDRESS (HORIZONTAL_PIXELS*VERTICAL_LINES*PIXEL_NUM_OF_BYTES)
void cam_i2c_init(volatile unsigned *mt9d111_i2c_axi_lites) {
mt9d111_i2c_axi_lites[64] = 0x2; // reset tx fifo ,address is 0x100, i2c_control_reg
mt9d111_i2c_axi_lites[64] = 0x1; // enable i2c
}
void cam_i2x_write_sync(void) {
// unsigned c;
// c = *cam_i2c_rx_fifo;
// while ((c & 0x84) != 0x80)
// c = *cam_i2c_rx_fifo; // No Bus Busy and TX_FIFO_Empty = 1
usleep(1000);
}
void cam_i2c_write(volatile unsigned *mt9d111_i2c_axi_lites, unsigned int device_addr, unsigned int write_addr, unsigned int write_data){
mt9d111_i2c_axi_lites[66] = 0x100 | (device_addr & 0xfe); // Slave IIC Write Address, address is 0x108, i2c_tx_fifo
mt9d111_i2c_axi_lites[66] = write_addr;
mt9d111_i2c_axi_lites[66] = (write_data >> 8)|0xff; // first data
mt9d111_i2c_axi_lites[66] = 0x200 | (write_data & 0xff); // second data
cam_i2x_write_sync();
}
int main(){
XDmar4resize_gray xdmar;
XResize_gray resg;
XStraight_conv_nn2_axis2 stcnn;
XDmaw4gabor xdma4g;
int i;
uint32_t res;
int result[4];
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR+0x40), 0x0);
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR+0x44), 0x80000000); // disable
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR+0x48), 0x80000000); // disable
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_1_BASEADDR), 0x2); // Commit registers
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_0_BASEADDR+0x40), 0x0);
Xil_Out32((XPAR_CAMERA_INTERFACE_AXIS_SWITCH_0_BASEADDR), 0x2); // Commit registers
XDmaw4gabor_Initialize(&xdma4g, 0);
XDmaw4gabor_Set_frame_buffer0(&xdma4g, FRAME_BUFFER_ADDRESS);
XDmaw4gabor_Set_frame_buffer1(&xdma4g, FRAME_BUFFER_ADDRESS+ALL_DISP_ADDRESS);
XDmaw4gabor_Start(&xdma4g);
XDmaw4gabor_EnableAutoRestart(&xdma4g);
// mt9d111_inf_axis_0, axi_iic_0, bitmap_disp_cntrler_axi_master_0
volatile unsigned int *bmdc0_axi_lites;
volatile unsigned int *bmdc1_axi_lites;
volatile unsigned int *mt9d111_axi_lites;
volatile unsigned int *mt9d111_i2c_axi_lites;
bmdc0_axi_lites = (volatile unsigned *)XPAR_BITMAP_DISP_CNTRLER_AXI_MASTER_0_BASEADDR;
bmdc1_axi_lites = (volatile unsigned *)XPAR_BITMAP_DISP_CNTRLER_AXI_MASTER_1_BASEADDR;
mt9d111_axi_lites = (volatile unsigned *)XPAR_CAMERA_INTERFACE_MT9D111_INF_AXIS_0_BASEADDR;
mt9d111_i2c_axi_lites = (volatile unsigned *)XPAR_CAMERA_INTERFACE_AXI_IIC_0_BASEADDR;
bmdc0_axi_lites[0] = (volatile unsigned int)FRAME_BUFFER_ADDRESS; // Bitmap Display Controller 0 start
bmdc1_axi_lites[0] = (volatile unsigned int)FRAME_BUFFER_ADDRESS; // Bitmap Display Controller 1 start
mt9d111_axi_lites[0] = (volatile unsigned int)FRAME_BUFFER_ADDRESS; // Camera Interface start (Address is dummy)
// CMOS Camera initialize, MT9D111
cam_i2c_init(mt9d111_i2c_axi_lites);
cam_i2c_write(mt9d111_i2c_axi_lites, 0xba, 0xf0, 0x1); // Changed regster map to IFP page 1
cam_i2c_write(mt9d111_i2c_axi_lites, 0xba, 0x97, 0x20); // RGB Mode, RGB565
mt9d111_axi_lites[1] = 0; // One_shot_mode is disabled
XDmar4resize_gray_Initialize(&xdmar, 0);
XResize_gray_Initialize(&resg, 0);
XStraight_conv_nn2_axis2_Initialize(&stcnn, 0);
XDmar4resize_gray_Set_frame_buffer0(&xdmar ,FRAME_BUFFER_ADDRESS);
XDmar4resize_gray_Set_frame_buffer1(&xdmar ,FRAME_BUFFER_ADDRESS+ALL_DISP_ADDRESS);
XDmar4resize_gray_Start(&xdmar);
XDmar4resize_gray_EnableAutoRestart(&xdmar);
XResize_gray_Start(&resg);
XResize_gray_EnableAutoRestart(&resg);
XStraight_conv_nn2_axis2_Start(&stcnn);
XStraight_conv_nn2_axis2_EnableAutoRestart(&stcnn);
while(1){
sleep(1);
while(!XStraight_conv_nn2_axis2_Get_outs_V_vld(&stcnn)) ;
printf("out = %d\n", (int)XStraight_conv_nn2_axis2_Get_outs_V(&stcnn));
for(i=0; i<2; i++){
XStraight_conv_nn2_axis2_Read_dot2_V_Words(&stcnn, i, &res, 1);
result[i*2] = res & 0xffff;
if(result[i*2] & 0x8000) // minus
result[i*2] = 0xffff0000 | result[i*2]; // Sign extension
result[i*2+1] = (res & 0xffff0000) >> 16;
if(result[i*2+1] & 0x8000) // minus
result[i*2+1] = 0xffff0000 | result[i*2+1]; // Sign extension
}
for(i=0; i<3; i++)
printf("result[%d] = %x ", i , result[i]);
printf("\n");
}
return(0);
}
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x18 : Data signal of outs_V
// bit 1~0 - outs_V[1:0] (Read)
// others - reserved
// 0x1c : Control signal of outs_V
// bit 0 - outs_V_ap_vld (Read/COR)
// others - reserved
// 0x20 ~
// 0x27 : Memory 'dot2_V' (3 * 16b)
// Word n : bit [15: 0] - dot2_V[2n]
// bit [31:16] - dot2_V[2n+1]
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
// straight_conv_nn2_axis3.cpp
// 2017/09/09 by marsee
// 畳み込み層のカーネル数 2
// AXI4 Stream入力 番号出力
// 2017/09/18 : dot2[3]の出力も追加
//
#include <ap_fixed.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#define REDUSED_ROW 45
#define REDUSED_COULMN 60
#define NUM_OF_KERNELS 2
#define COULMN_PIXELS 56
#define ROW_PIXELS 10
#define ALL_PIXELS 560
#define NUM_OF_OUTPUT 3
int max_ap_fixed(ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT], ap_uint<2> &out_num);
int straight_conv_nn2_axis2(hls::stream<ap_axiu<32,1,1,1> >& ins, ap_uint<2> &outs,
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT]){
#pragma HLS INTERFACE s_axilite port=dot2
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=outs
#pragma HLS INTERFACE axis register both port=ins
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[ROW_PIXELS][COULMN_PIXELS];
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_axiu<32,1,1,1> pix;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
// 10 x 56 に整形
buf_copy1: for(int i=0; i<REDUSED_ROW; i++){
buf_copy2: for(int j=0; j<REDUSED_COULMN; j++){
if (!(i==0 && j==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
if((i>=33 && i<33+ROW_PIXELS) && (j>=2 && j<2+COULMN_PIXELS)){
buf[i-33][j-2] = (ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)((ap_ufixed<16, 8, AP_TRN_ZERO, AP_SAT>)(pix.data & 0xff) / 256);
}
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<NUM_OF_KERNELS; i++){ // カーネルの個数
CONV2: for(int j=0; j<ROW_PIXELS-4; j++){
CONV3: for(int k=0; k<COULMN_PIXELS-4; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUM_OF_KERNELS; i++){
POOL2: for(int j=0; j<ROW_PIXELS-4; j += 2){
POOL3: for(int k=0; k<COULMN_PIXELS-4; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<NUM_OF_KERNELS; i++){
af1_dot3: for(int j=0; j<(ROW_PIXELS-4)/2; j++){
af1_dot4: for(int k=0; k<(COULMN_PIXELS-4)/2; k++){
dot1[col] += pool_out[i][j][k]*af1_weight[i*((ROW_PIXELS-4)/2)*((COULMN_PIXELS-4)/2)+j*((COULMN_PIXELS-4)/2)+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<NUM_OF_OUTPUT; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
}
max_ap_fixed(dot2, outs);
return(0);
}
int max_ap_fixed(ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT], ap_uint<2> &out_num){
int max_id;
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<NUM_OF_OUTPUT; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
out_num = (ap_uint<2>)max_id;
return(0);
}
// straight_conv_nn2_axis3_tb.cpp
// 2017/09/09 by marsee
//
// 2017/09/18 : straight_conv_nn2_axis3.cpp に dot2[3]の出力も追加
//
#include <iostream>
#include "hls_opencv.h"
#include "ap_axi_sdata.h"
#include "hls_video.h"
#define MAX_HEIGHT 600
#define MAX_WIDTH 800
typedef hls::stream<ap_axiu<32,1,1,1> > AXI_STREAM;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3> RGB_IMAGE;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC1> GRAY_IMAGE;
using namespace cv;
#define NUM_OF_OUTPUT 3
#define MAX_LOOP_COUNT 11
//#define MAX_LOOP_COUNT 1 // for C/RTL Co-Simulation
#define STRAIGHT_IMAGE_NAME "straight"
#define LEFT_TURN_IMAGE_NAME "left_turn"
#define RIGHT_TURN_IMAGE_NAME "right_turn"
int straight_conv_nn2_axis2(hls::stream<ap_axiu<32,1,1,1> >& ins, ap_uint<2> &outs,
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT]);
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs);
int main_output_loop(char *buf);
int main () {
char buf[200];
sprintf(buf, "%s", STRAIGHT_IMAGE_NAME);
main_output_loop(buf);
sprintf(buf, "%s", LEFT_TURN_IMAGE_NAME);
main_output_loop(buf);
sprintf(buf, "%s", RIGHT_TURN_IMAGE_NAME);
main_output_loop(buf);
return(0);
}
int main_output_loop(char *buf){
char bmp_file_name[200];
ap_uint<2> outs;
AXI_STREAM src_axi, dst_axi;
Mat src;
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
for(int i=0; i<MAX_LOOP_COUNT; i++){
sprintf(bmp_file_name, "%s%d.bmp", buf, i);
// OpenCV で 画像を読み込む
src = imread(bmp_file_name);
// BGR から RGBへ変換
Mat src_rgb;
cvtColor(src, src_rgb, CV_BGR2RGB);
// Mat フォーマットから AXI4 Stream へ変換
cvMat2AXIvideo(src_rgb, src_axi);
// resize_gray() 関数をコール
resize_gray(src_axi, dst_axi);
straight_conv_nn2_axis2(dst_axi, outs, dot2);
printf("*%s\n", bmp_file_name);
printf("outs = %d\n", (int)outs);
for(int i=0; i<NUM_OF_OUTPUT; i++)
printf("dot2[%d] = %f ", i, (float)dot2[i]);
printf("\n");
}
return(0);
}
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs){
RGB_IMAGE org_img(600, 800);
GRAY_IMAGE org_img_g(600, 800);
GRAY_IMAGE resize_img_g(45, 60);
RGB_IMAGE resize_img(45, 60);
hls::AXIvideo2Mat(ins, org_img);
hls::CvtColor<HLS_RGB2GRAY>(org_img, org_img_g);
hls::Resize(org_img_g, resize_img_g);
hls::CvtColor<HLS_GRAY2RGB>(resize_img_g, resize_img);
hls::Mat2AXIvideo(resize_img, outs);
return(0);
}
/*
* Device Tree for Zybo board
* Partially generated by Device Tree Generator 1.1
*
* (C) Copyright 2007-2013 Xilinx, Inc.
* (C) Copyright 2007-2013 Michal Simek
* (C) Copyright 2007-2012 PetaLogix Qld Pty Ltd
* (C) Copyright 2014 Digilent, Inc.
*
* Michal SIMEK <monstr@monstr.eu>
* Tinghui Wang <steven.wang@digilentinc.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston,
* MA 02111-1307 USA
*
*/
/dts-v1/;
/ {
#address-cells = <1>;
#size-cells = <1>;
compatible = "xlnx,zynq-7000";
model = "Xilinx Zynq";
aliases {
ethernet0 = &ps7_ethernet_0;
serial0 = &ps7_uart_1;
spi0 = &ps7_qspi_0;
} ;
chosen {
/* bootargs = "console=ttyPS0,115200 root=/dev/ram rw earlyprintk"; */
bootargs = "console=ttyPS0,115200 root=/dev/mmcblk0p2 rw earlyprintk rootfstype=ext4 rootwait devtmpfs.mount=1 coherent_pool=16M";
linux,stdout-path = "/amba@0/serial@e0001000";
} ;
cpus {
#address-cells = <1>;
#size-cells = <0>;
ps7_cortexa9_0: cpu@0 {
bus-handle = <&ps7_axi_interconnect_0>;
clock-latency = <1000>;
clocks = <&clkc 3>;
compatible = "arm,cortex-a9";
device_type = "cpu";
interrupt-handle = <&ps7_scugic_0>;
/* operating-points = <666667 1000000 333334 1000000 >; */
operating-points = <650000 1000000 >;
reg = <0x0>;
} ;
ps7_cortexa9_1: cpu@1 {
bus-handle = <&ps7_axi_interconnect_0>;
clocks = <&clkc 3>;
compatible = "arm,cortex-a9";
device_type = "cpu";
interrupt-handle = <&ps7_scugic_0>;
reg = <0x1>;
} ;
} ;
pmu {
compatible = "arm,cortex-a9-pmu";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 5 4>, <0 6 4>;
reg = <0xf8891000 0x1000>, <0xf8893000 0x1000>;
reg-names = "cpu0", "cpu1";
} ;
ps7_ddr_0: memory@0 {
device_type = "memory";
reg = <0x0 0x20000000>;
} ;
ps7_axi_interconnect_0: amba@0 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "xlnx,ps7-axi-interconnect-1.00.a", "simple-bus";
ranges ;
ps7_afi_0: ps7-afi@f8008000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf8008000 0x1000>;
} ;
ps7_afi_1: ps7-afi@f8009000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf8009000 0x1000>;
} ;
ps7_afi_2: ps7-afi@f800a000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf800a000 0x1000>;
} ;
ps7_afi_3: ps7-afi@f800b000 {
compatible = "xlnx,ps7-afi-1.00.a";
reg = <0xf800b000 0x1000>;
} ;
ps7_ddrc_0: ps7-ddrc@f8006000 {
compatible = "xlnx,zynq-ddrc-1.0";
reg = <0xf8006000 0x1000>;
xlnx,has-ecc = <0x0>;
} ;
ps7_dev_cfg_0: ps7-dev-cfg@f8007000 {
clock-names = "ref_clk", "fclk0", "fclk1", "fclk2", "fclk3";
clocks = <&clkc 12>, <&clkc 15>, <&clkc 16>, <&clkc 17>, <&clkc 18>;
compatible = "xlnx,zynq-devcfg-1.0";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 8 4>;
reg = <0xf8007000 0x100>;
} ;
ps7_dma_s: ps7-dma@f8003000 {
#dma-cells = <1>;
#dma-channels = <8>;
#dma-requests = <4>;
clock-names = "apb_pclk";
clocks = <&clkc 27>;
compatible = "arm,primecell", "arm,pl330";
interrupt-names = "abort", "dma0", "dma1", "dma2", "dma3",
"dma4", "dma5", "dma6", "dma7";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 13 4>, <0 14 4>, <0 15 4>, <0 16 4>, <0 17 4>, <0 40 4>, <0 41 4>, <0 42 4>, <0 43 4>;
reg = <0xf8003000 0x1000>;
} ;
ps7_ethernet_0: ps7-ethernet@e000b000 {
#address-cells = <1>;
#size-cells = <0>;
clock-names = "ref_clk", "aper_clk";
clocks = <&clkc 13>, <&clkc 30>;
compatible = "xlnx,ps7-ethernet-1.00.a";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 22 4>;
phy-handle = <&phy0>;
phy-mode = "rgmii-id";
reg = <0xe000b000 0x1000>;
xlnx,eth-mode = <0x1>;
xlnx,has-mdio = <0x1>;
xlnx,ptp-enet-clock = <108333336>;
mdio {
#address-cells = <1>;
#size-cells = <0>;
phy0: phy@1 {
compatible = "realtek,RTL8211E";
device_type = "ethernet-phy";
reg = <1>;
} ;
} ;
} ;
ps7_globaltimer_0: ps7-globaltimer@f8f00200 {
clocks = <&clkc 4>;
compatible = "arm,cortex-a9-global-timer";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <1 11 0x301>;
reg = <0xf8f00200 0x100>;
} ;
ps7_gpio_0: ps7-gpio@e000a000 {
#gpio-cells = <2>;
clocks = <&clkc 42>;
compatible = "xlnx,zynq-gpio-1.0";
emio-gpio-width = <64>;
gpio-controller ;
gpio-mask-high = <0xc0000>;
gpio-mask-low = <0xfe81>;
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 20 4>;
reg = <0xe000a000 0x1000>;
} ;
ps7_iop_bus_config_0: ps7-iop-bus-config@e0200000 {
compatible = "xlnx,ps7-iop-bus-config-1.00.a";
reg = <0xe0200000 0x1000>;
} ;
ps7_ocmc_0: ps7-ocmc@f800c000 {
compatible = "xlnx,zynq-ocmc-1.0";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 3 4>;
reg = <0xf800c000 0x1000>;
} ;
ps7_pl310_0: ps7-pl310@f8f02000 {
arm,data-latency = <3 2 2>;
arm,tag-latency = <2 2 2>;
cache-level = <2>;
cache-unified ;
compatible = "arm,pl310-cache";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 2 4>;
reg = <0xf8f02000 0x1000>;
} ;
ps7_qspi_0: ps7-qspi@e000d000 {
clock-names = "ref_clk", "pclk";
clocks = <&clkc 10>, <&clkc 43>;
compatible = "xlnx,zynq-qspi-1.0";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 19 4>;
is-dual = <0>;
num-cs = <1>;
reg = <0xe000d000 0x1000>;
xlnx,fb-clk = <0x1>;
xlnx,qspi-mode = <0x0>;
#address-cells = <1>;
#size-cells = <0>;
flash@0 {
compatible = "n25q128";
reg = <0x0>;
spi-tx-bus-width = <1>;
spi-rx-bus-width = <4>;
spi-max-frequency = <50000000>;
#address-cells = <1>;
#size-cells = <1>;
partition@qspi-fsbl-uboot {
label = "qspi-fsbl-uboot";
reg = <0x0 0x400000>;
};
partition@qspi-linux {
label = "qspi-linux";
reg = <0x400000 0x500000>;
};
partition@qspi-device-tree {
label = "qspi-device-tree";
reg = <0x900000 0x20000>;
};
partition@qspi-user {
label = "qspi-user";
reg = <0x920000 0x6E0000>;
};
};
} ;
ps7_qspi_linear_0: ps7-qspi-linear@fc000000 {
clock-names = "ref_clk", "aper_clk";
clocks = <&clkc 10>, <&clkc 43>;
compatible = "xlnx,ps7-qspi-linear-1.00.a";
reg = <0xfc000000 0x1000000>;
} ;
ps7_scugic_0: ps7-scugic@f8f01000 {
#address-cells = <2>;
#interrupt-cells = <3>;
#size-cells = <1>;
compatible = "arm,cortex-a9-gic", "arm,gic";
interrupt-controller ;
num_cpus = <2>;
num_interrupts = <96>;
reg = <0xf8f01000 0x1000>, <0xf8f00100 0x100>;
} ;
ps7_scutimer_0: ps7-scutimer@f8f00600 {
clocks = <&clkc 4>;
compatible = "arm,cortex-a9-twd-timer";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <1 13 0x301>;
reg = <0xf8f00600 0x20>;
} ;
ps7_scuwdt_0: ps7-scuwdt@f8f00620 {
clocks = <&clkc 4>;
compatible = "xlnx,ps7-scuwdt-1.00.a";
device_type = "watchdog";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <1 14 0x301>;
reg = <0xf8f00620 0xe0>;
} ;
ps7_sd_0: ps7-sdio@e0100000 {
clock-frequency = <50000000>;
clock-names = "clk_xin", "clk_ahb";
clocks = <&clkc 21>, <&clkc 32>;
compatible = "arasan,sdhci-8.9a";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 24 4>;
reg = <0xe0100000 0x1000>;
xlnx,has-cd = <0x1>;
xlnx,has-power = <0x0>;
xlnx,has-wp = <0x1>;
} ;
ps7_slcr_0: ps7-slcr@f8000000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "xlnx,zynq-slcr", "syscon";
ranges ;
reg = <0xf8000000 0x1000>;
clkc: clkc@100 {
#clock-cells = <1>;
clock-output-names = "armpll", "ddrpll", "iopll", "cpu_6or4x", "cpu_3or2x",
"cpu_2x", "cpu_1x", "ddr2x", "ddr3x", "dci",
"lqspi", "smc", "pcap", "gem0", "gem1",
"fclk0", "fclk1", "fclk2", "fclk3", "can0",
"can1", "sdio0", "sdio1", "uart0", "uart1",
"spi0", "spi1", "dma", "usb0_aper", "usb1_aper",
"gem0_aper", "gem1_aper", "sdio0_aper", "sdio1_aper", "spi0_aper",
"spi1_aper", "can0_aper", "can1_aper", "i2c0_aper", "i2c1_aper",
"uart0_aper", "uart1_aper", "gpio_aper", "lqspi_aper", "smc_aper",
"swdt", "dbg_trc", "dbg_apb";
compatible = "xlnx,ps7-clkc";
fclk-enable = <0xf>;
ps-clk-frequency = <50000000>;
reg = <0x100 0x100>;
} ;
} ;
ps7_ttc_0: ps7-ttc@f8001000 {
clocks = <&clkc 6>;
compatible = "cdns,ttc";
interrupt-names = "ttc0", "ttc1", "ttc2";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 10 4>, <0 11 4>, <0 12 4>;
reg = <0xf8001000 0x1000>;
} ;
ps7_uart_1: serial@e0001000 {
clock-names = "uart_clk", "pclk";
clocks = <&clkc 24>, <&clkc 41>;
compatible = "xlnx,xuartps", "cdns,uart-r1p8";
current-speed = <115200>;
device_type = "serial";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 50 4>;
port-number = <0>;
reg = <0xe0001000 0x1000>;
xlnx,has-modem = <0x0>;
} ;
ps7_usb_0: ps7-usb@e0002000 {
clocks = <&clkc 28>;
compatible = "xlnx,ps7-usb-1.00.a", "xlnx,zynq-usb-1.00.a";
dr_mode = "host";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 21 4>;
phy_type = "ulpi";
reg = <0xe0002000 0x1000>;
xlnx,usb-reset = "MIO 46";
} ;
ps7_xadc: ps7-xadc@f8007100 {
clocks = <&clkc 12>;
compatible = "xlnx,zynq-xadc-1.00.a";
interrupt-parent = <&ps7_scugic_0>;
interrupts = <0 7 4>;
reg = <0xf8007100 0x20>;
} ;
mt9d111_axi_iic@0x41600000 {
compatible = "generic-uio";
reg = < 0x41600000 0x10000>;
};
dmaw4gabor_0@43cb0000 {
compatible = "generic-uio";
reg = < 0x43cb0000 0x10000 >;
};
axis_switch_0@0x43c10000 {
compatible = "generic-uio";
reg = < 0x43c10000 0x10000 >;
};
axis_switch_1@0x43c20000 {
compatible = "generic-uio";
reg = < 0x43c20000 0x10000 >;
};
lap_filter_axis_0@0x43c30000 {
compatible = "generic-uio";
reg = < 0x43c30000 0x10000>;
};
mt9d111_inf_axis_0@0x43C40000 {
compatible = "generic-uio";
reg = < 0x43C40000 0x10000>;
};
bitmap_disp_cntrler_axi_master_0@0x43c00000 {
compatible = "generic-uio";
reg = < 0x43c00000 0x10000>;
};
bitmap_disp_cntrler_axi_master_1@0x43c50000 {
compatible = "generic-uio";
reg = < 0x43c50000 0x10000>;
};
axi_gpio_0@0x41200000 {
compatible = "generic-uio";
reg = < 0x41200000 0x10000>;
};
frame_buffer_bmdc@0x17800000 {
compatible = "generic-uio";
reg = < 0x17800000 0x1000000>;
};
pwm_0@0x43c60000 {
compatible = "generic-uio";
reg = < 0x43c60000 0x10000>;
};
pwm_1@0x43c70000 {
compatible = "generic-uio";
reg = < 0x43c70000 0x10000>;
};
motor_monitor_0@0x43c80000 {
compatible = "generic-uio";
reg = < 0x43c80000 0x10000>;
};
motor_monitor_1@0x43c90000 {
compatible = "generic-uio";
reg = < 0x43c90000 0x10000>;
};
dmar4resize_gray_0@0x43ca0000 {
compatible = "generic-uio";
reg = < 0x43ca0000 0x10000>;
};
rgb2hsv_0@0x43cc0000 {
compatible = "generic-uio";
reg = < 0x43cc0000 0x10000>;
};
ultrasoninc_sensor_inf_0@0x43cd0000 {
compatible = "generic-uio";
reg = < 0x43cd0000 0x10000>;
};
resize_gray_0@0x43ce0000 {
compatible = "generic-uio";
reg = < 0x43ce0000 0x10000>;
};
straight_conv_nn2_axis2_0@0x43cf0000 {
compatible = "generic-uio";
reg = < 0x43cf0000 0x10000>;
};
} ;
} ;
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
make: 'csim.exe' は更新済みです.
*straight0.bmp
outs = 1
*straight1.bmp
outs = 1
*straight2.bmp
outs = 1
*straight3.bmp
outs = 1
*straight4.bmp
outs = 1
*straight5.bmp
outs = 1
*straight6.bmp
outs = 1
*straight7.bmp
outs = 1
*straight8.bmp
outs = 1
*straight9.bmp
outs = 1
*straight10.bmp
outs = 1
*left_turn0.bmp
outs = 0
*left_turn1.bmp
outs = 0
*left_turn2.bmp
outs = 0
*left_turn3.bmp
outs = 0
*left_turn4.bmp
outs = 0
*left_turn5.bmp
outs = 0
*left_turn6.bmp
outs = 0
*left_turn7.bmp
outs = 0
*left_turn8.bmp
outs = 0
*left_turn9.bmp
outs = 0
*left_turn10.bmp
outs = 0
*right_turn0.bmp
outs = 2
*right_turn1.bmp
outs = 2
*right_turn2.bmp
outs = 2
*right_turn3.bmp
outs = 2
*right_turn4.bmp
outs = 2
*right_turn5.bmp
outs = 2
*right_turn6.bmp
outs = 1
*right_turn7.bmp
outs = 2
*right_turn8.bmp
outs = 2
*right_turn9.bmp
outs = 2
*right_turn10.bmp
outs = 2
INFO: [SIM 1] CSim done with 0 errors.
INFO: [SIM 3] *************** CSIM finish ***************
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x18 : Data signal of outs_V
// bit 1~0 - outs_V[1:0] (Read)
// others - reserved
// 0x1c : Control signal of outs_V
// bit 0 - outs_V_ap_vld (Read/COR)
// others - reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
// straight_conv_nn2_axis2.cpp
// 2017/09/09 by marsee
// 畳み込み層のカーネル数 2
// AXI4 Stream入力 番号出力
//
#include <ap_fixed.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#define REDUSED_ROW 45
#define REDUSED_COULMN 60
#define NUM_OF_KERNELS 2
#define COULMN_PIXELS 56
#define ROW_PIXELS 10
#define ALL_PIXELS 560
#define NUM_OF_OUTPUT 3
int max_ap_fixed(ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT], ap_uint<2> &out_num);
int straight_conv_nn2_axis2(hls::stream<ap_axiu<32,1,1,1> >& ins, ap_uint<2> &outs){
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=outs
#pragma HLS INTERFACE axis register both port=ins
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[ROW_PIXELS][COULMN_PIXELS];
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
ap_axiu<32,1,1,1> pix;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
// 10 x 56 に整形
buf_copy1: for(int i=0; i<REDUSED_ROW; i++){
buf_copy2: for(int j=0; j<REDUSED_COULMN; j++){
if (!(i==0 && j==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
if((i>=33 && i<33+ROW_PIXELS) && (j>=2 && j<2+COULMN_PIXELS)){
buf[i-33][j-2] = (ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)((ap_ufixed<16, 8, AP_TRN_ZERO, AP_SAT>)(pix.data & 0xff) / 256);
}
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<NUM_OF_KERNELS; i++){ // カーネルの個数
CONV2: for(int j=0; j<ROW_PIXELS-4; j++){
CONV3: for(int k=0; k<COULMN_PIXELS-4; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUM_OF_KERNELS; i++){
POOL2: for(int j=0; j<ROW_PIXELS-4; j += 2){
POOL3: for(int k=0; k<COULMN_PIXELS-4; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<NUM_OF_KERNELS; i++){
af1_dot3: for(int j=0; j<(ROW_PIXELS-4)/2; j++){
af1_dot4: for(int k=0; k<(COULMN_PIXELS-4)/2; k++){
dot1[col] += pool_out[i][j][k]*af1_weight[i*((ROW_PIXELS-4)/2)*((COULMN_PIXELS-4)/2)+j*((COULMN_PIXELS-4)/2)+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<NUM_OF_OUTPUT; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
}
max_ap_fixed(dot2, outs);
return(0);
}
int max_ap_fixed(ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT], ap_uint<2> &out_num){
int max_id;
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<NUM_OF_OUTPUT; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
out_num = (ap_uint<2>)max_id;
return(0);
}
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
id = 0, max_id_ref = 1, max_id_hw = 2
id = 0, max_id_ref = 1, max_id_sw = 2
id = 1, max_id_ref = 1, max_id_hw = 2
id = 1, max_id_ref = 1, max_id_sw = 2
id = 5, max_id_ref = 1, max_id_hw = 2
id = 5, max_id_ref = 1, max_id_sw = 2
id = 6, max_id_ref = 1, max_id_hw = 2
id = 6, max_id_ref = 1, max_id_sw = 2
id = 10, max_id_ref = 1, max_id_hw = 2
id = 10, max_id_ref = 1, max_id_sw = 2
id = 15, max_id_ref = 1, max_id_sw = 2
id = 26, max_id_ref = 1, max_id_hw = 0
id = 27, max_id_ref = 1, max_id_hw = 0
id = 28, max_id_ref = 1, max_id_hw = 0
id = 29, max_id_ref = 1, max_id_hw = 0
id = 32, max_id_ref = 1, max_id_hw = 0
id = 33, max_id_ref = 1, max_id_hw = 0
id = 34, max_id_ref = 1, max_id_hw = 0
id = 38, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_sw = 0
id = 43, max_id_ref = 1, max_id_hw = 0
id = 44, max_id_ref = 1, max_id_hw = 0
id = 49, max_id_ref = 1, max_id_hw = 0
id = 80, max_id_ref = 0, max_id_sw = 2
id = 85, max_id_ref = 0, max_id_sw = 2
id = 86, max_id_ref = 0, max_id_sw = 2
id = 90, max_id_ref = 0, max_id_sw = 2
id = 91, max_id_ref = 0, max_id_sw = 2
id = 95, max_id_ref = 0, max_id_sw = 2
id = 96, max_id_ref = 0, max_id_sw = 2
id = 97, max_id_ref = 0, max_id_sw = 2
id = 98, max_id_ref = 0, max_id_sw = 2
id = 99, max_id_ref = 0, max_id_sw = 2
id = 100, max_id_ref = 2, max_id_hw = 0
id = 101, max_id_ref = 2, max_id_hw = 0
id = 102, max_id_ref = 2, max_id_hw = 0
id = 103, max_id_ref = 2, max_id_hw = 0
id = 104, max_id_ref = 2, max_id_hw = 0
id = 105, max_id_ref = 2, max_id_hw = 0
id = 106, max_id_ref = 2, max_id_hw = 0
id = 107, max_id_ref = 2, max_id_hw = 0
id = 108, max_id_ref = 2, max_id_hw = 0
id = 109, max_id_ref = 2, max_id_hw = 0
id = 110, max_id_ref = 2, max_id_hw = 0
id = 111, max_id_ref = 2, max_id_hw = 0
id = 112, max_id_ref = 2, max_id_hw = 0
id = 113, max_id_ref = 2, max_id_hw = 0
id = 114, max_id_ref = 2, max_id_hw = 0
id = 115, max_id_ref = 2, max_id_hw = 0
id = 116, max_id_ref = 2, max_id_hw = 0
id = 117, max_id_ref = 2, max_id_hw = 0
id = 118, max_id_ref = 2, max_id_hw = 0
id = 120, max_id_ref = 2, max_id_hw = 0
id = 121, max_id_ref = 2, max_id_hw = 0
id = 122, max_id_ref = 2, max_id_hw = 0
id = 123, max_id_ref = 2, max_id_hw = 0
id = 125, max_id_ref = 2, max_id_hw = 0
id = 126, max_id_ref = 2, max_id_hw = 0
id = 127, max_id_ref = 2, max_id_hw = 0
id = 128, max_id_ref = 2, max_id_hw = 0
id = 129, max_id_ref = 2, max_id_hw = 0
id = 130, max_id_ref = 2, max_id_hw = 0
id = 131, max_id_ref = 2, max_id_hw = 0
id = 132, max_id_ref = 2, max_id_hw = 0
id = 133, max_id_ref = 2, max_id_hw = 0
id = 134, max_id_ref = 2, max_id_hw = 0
id = 135, max_id_ref = 2, max_id_hw = 0
id = 136, max_id_ref = 2, max_id_hw = 0
id = 137, max_id_ref = 2, max_id_hw = 0
id = 138, max_id_ref = 2, max_id_hw = 0
id = 139, max_id_ref = 2, max_id_hw = 0
id = 140, max_id_ref = 2, max_id_hw = 0
id = 141, max_id_ref = 2, max_id_hw = 0
id = 142, max_id_ref = 2, max_id_hw = 0
id = 143, max_id_ref = 2, max_id_hw = 0
id = 144, max_id_ref = 2, max_id_hw = 0
id = 145, max_id_ref = 2, max_id_hw = 0
id = 146, max_id_ref = 2, max_id_hw = 0
id = 147, max_id_ref = 2, max_id_hw = 0
id = 148, max_id_ref = 2, max_id_hw = 0
id = 149, max_id_ref = 2, max_id_hw = 0
hw_err_count = 65
sw_err_count = 17
ap_fixed<11, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<11, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<14, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<14, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
id = 0, max_id_ref = 1, max_id_hw = 2
id = 0, max_id_ref = 1, max_id_sw = 2
id = 1, max_id_ref = 1, max_id_hw = 2
id = 1, max_id_ref = 1, max_id_sw = 2
id = 5, max_id_ref = 1, max_id_hw = 2
id = 5, max_id_ref = 1, max_id_sw = 2
id = 6, max_id_ref = 1, max_id_sw = 2
id = 10, max_id_ref = 1, max_id_hw = 2
id = 10, max_id_ref = 1, max_id_sw = 2
id = 15, max_id_ref = 1, max_id_sw = 2
id = 28, max_id_ref = 1, max_id_hw = 0
id = 29, max_id_ref = 1, max_id_hw = 0
id = 33, max_id_ref = 1, max_id_hw = 0
id = 34, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_sw = 0
id = 44, max_id_ref = 1, max_id_hw = 0
id = 80, max_id_ref = 0, max_id_sw = 2
id = 85, max_id_ref = 0, max_id_sw = 2
id = 86, max_id_ref = 0, max_id_sw = 2
id = 90, max_id_ref = 0, max_id_sw = 2
id = 91, max_id_ref = 0, max_id_sw = 2
id = 95, max_id_ref = 0, max_id_sw = 2
id = 96, max_id_ref = 0, max_id_sw = 2
id = 97, max_id_ref = 0, max_id_sw = 2
id = 98, max_id_ref = 0, max_id_sw = 2
id = 99, max_id_ref = 0, max_id_sw = 2
id = 101, max_id_ref = 2, max_id_hw = 0
id = 102, max_id_ref = 2, max_id_hw = 0
id = 103, max_id_ref = 2, max_id_hw = 0
id = 104, max_id_ref = 2, max_id_hw = 0
id = 106, max_id_ref = 2, max_id_hw = 0
id = 107, max_id_ref = 2, max_id_hw = 0
id = 108, max_id_ref = 2, max_id_hw = 0
id = 110, max_id_ref = 2, max_id_hw = 0
id = 111, max_id_ref = 2, max_id_hw = 0
id = 112, max_id_ref = 2, max_id_hw = 0
id = 116, max_id_ref = 2, max_id_hw = 0
id = 125, max_id_ref = 2, max_id_hw = 0
id = 126, max_id_ref = 2, max_id_hw = 0
id = 127, max_id_ref = 2, max_id_hw = 0
id = 128, max_id_ref = 2, max_id_hw = 0
id = 129, max_id_ref = 2, max_id_hw = 0
id = 130, max_id_ref = 2, max_id_hw = 0
id = 131, max_id_ref = 2, max_id_hw = 0
id = 132, max_id_ref = 2, max_id_hw = 0
id = 133, max_id_ref = 2, max_id_hw = 0
id = 134, max_id_ref = 2, max_id_hw = 0
id = 135, max_id_ref = 2, max_id_hw = 0
id = 136, max_id_ref = 2, max_id_hw = 0
id = 137, max_id_ref = 2, max_id_hw = 0
id = 138, max_id_ref = 2, max_id_hw = 0
id = 139, max_id_ref = 2, max_id_hw = 0
id = 140, max_id_ref = 2, max_id_hw = 0
id = 141, max_id_ref = 2, max_id_hw = 0
id = 142, max_id_ref = 2, max_id_hw = 0
id = 143, max_id_ref = 2, max_id_hw = 0
id = 144, max_id_ref = 2, max_id_hw = 0
id = 146, max_id_ref = 2, max_id_hw = 0
id = 147, max_id_ref = 2, max_id_hw = 0
id = 148, max_id_ref = 2, max_id_hw = 0
id = 149, max_id_ref = 2, max_id_hw = 0
hw_err_count = 45
sw_err_count = 17
ap_fixed<12, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<12, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<15, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<15, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
id = 0, max_id_ref = 1, max_id_hw = 2
id = 0, max_id_ref = 1, max_id_sw = 2
id = 1, max_id_ref = 1, max_id_hw = 2
id = 1, max_id_ref = 1, max_id_sw = 2
id = 5, max_id_ref = 1, max_id_hw = 2
id = 5, max_id_ref = 1, max_id_sw = 2
id = 6, max_id_ref = 1, max_id_hw = 2
id = 6, max_id_ref = 1, max_id_sw = 2
id = 10, max_id_ref = 1, max_id_hw = 2
id = 10, max_id_ref = 1, max_id_sw = 2
id = 15, max_id_ref = 1, max_id_sw = 2
id = 29, max_id_ref = 1, max_id_hw = 0
id = 34, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_sw = 0
id = 44, max_id_ref = 1, max_id_hw = 0
id = 80, max_id_ref = 0, max_id_sw = 2
id = 85, max_id_ref = 0, max_id_sw = 2
id = 86, max_id_ref = 0, max_id_sw = 2
id = 90, max_id_ref = 0, max_id_sw = 2
id = 91, max_id_ref = 0, max_id_sw = 2
id = 95, max_id_ref = 0, max_id_sw = 2
id = 96, max_id_ref = 0, max_id_sw = 2
id = 97, max_id_ref = 0, max_id_sw = 2
id = 98, max_id_ref = 0, max_id_sw = 2
id = 99, max_id_ref = 0, max_id_sw = 2
id = 102, max_id_ref = 2, max_id_hw = 0
id = 103, max_id_ref = 2, max_id_hw = 0
id = 106, max_id_ref = 2, max_id_hw = 0
id = 107, max_id_ref = 2, max_id_hw = 0
id = 108, max_id_ref = 2, max_id_hw = 0
id = 111, max_id_ref = 2, max_id_hw = 0
id = 112, max_id_ref = 2, max_id_hw = 0
id = 125, max_id_ref = 2, max_id_hw = 0
id = 126, max_id_ref = 2, max_id_hw = 0
id = 127, max_id_ref = 2, max_id_hw = 0
id = 128, max_id_ref = 2, max_id_hw = 0
id = 129, max_id_ref = 2, max_id_hw = 0
id = 130, max_id_ref = 2, max_id_hw = 0
id = 131, max_id_ref = 2, max_id_hw = 0
id = 132, max_id_ref = 2, max_id_hw = 0
id = 133, max_id_ref = 2, max_id_hw = 0
id = 134, max_id_ref = 2, max_id_hw = 0
id = 135, max_id_ref = 2, max_id_hw = 0
id = 136, max_id_ref = 2, max_id_hw = 0
id = 137, max_id_ref = 2, max_id_hw = 0
id = 138, max_id_ref = 2, max_id_hw = 0
id = 139, max_id_ref = 2, max_id_hw = 0
id = 140, max_id_ref = 2, max_id_hw = 0
id = 141, max_id_ref = 2, max_id_hw = 0
id = 142, max_id_ref = 2, max_id_hw = 0
id = 143, max_id_ref = 2, max_id_hw = 0
id = 144, max_id_ref = 2, max_id_hw = 0
id = 146, max_id_ref = 2, max_id_hw = 0
id = 147, max_id_ref = 2, max_id_hw = 0
id = 148, max_id_ref = 2, max_id_hw = 0
id = 149, max_id_ref = 2, max_id_hw = 0
hw_err_count = 40
sw_err_count = 17
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
id = 0, max_id_ref = 1, max_id_hw = 2
id = 0, max_id_ref = 1, max_id_sw = 2
id = 1, max_id_ref = 1, max_id_hw = 2
id = 1, max_id_ref = 1, max_id_sw = 2
id = 5, max_id_ref = 1, max_id_hw = 2
id = 5, max_id_ref = 1, max_id_sw = 2
id = 6, max_id_ref = 1, max_id_hw = 2
id = 6, max_id_ref = 1, max_id_sw = 2
id = 10, max_id_ref = 1, max_id_hw = 2
id = 10, max_id_ref = 1, max_id_sw = 2
id = 15, max_id_ref = 1, max_id_sw = 2
id = 29, max_id_ref = 1, max_id_hw = 0
id = 34, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_sw = 0
id = 44, max_id_ref = 1, max_id_hw = 0
id = 80, max_id_ref = 0, max_id_sw = 2
id = 85, max_id_ref = 0, max_id_sw = 2
id = 86, max_id_ref = 0, max_id_sw = 2
id = 90, max_id_ref = 0, max_id_sw = 2
id = 91, max_id_ref = 0, max_id_sw = 2
id = 95, max_id_ref = 0, max_id_sw = 2
id = 96, max_id_ref = 0, max_id_sw = 2
id = 97, max_id_ref = 0, max_id_sw = 2
id = 98, max_id_ref = 0, max_id_sw = 2
id = 99, max_id_ref = 0, max_id_sw = 2
id = 102, max_id_ref = 2, max_id_hw = 0
id = 107, max_id_ref = 2, max_id_hw = 0
id = 126, max_id_ref = 2, max_id_hw = 0
id = 127, max_id_ref = 2, max_id_hw = 0
id = 128, max_id_ref = 2, max_id_hw = 0
id = 129, max_id_ref = 2, max_id_hw = 0
id = 131, max_id_ref = 2, max_id_hw = 0
id = 132, max_id_ref = 2, max_id_hw = 0
id = 133, max_id_ref = 2, max_id_hw = 0
id = 134, max_id_ref = 2, max_id_hw = 0
id = 136, max_id_ref = 2, max_id_hw = 0
id = 137, max_id_ref = 2, max_id_hw = 0
id = 138, max_id_ref = 2, max_id_hw = 0
id = 139, max_id_ref = 2, max_id_hw = 0
id = 142, max_id_ref = 2, max_id_hw = 0
id = 143, max_id_ref = 2, max_id_hw = 0
id = 144, max_id_ref = 2, max_id_hw = 0
id = 147, max_id_ref = 2, max_id_hw = 0
id = 148, max_id_ref = 2, max_id_hw = 0
hw_err_count = 28
sw_err_count = 17
ap_fixed<14, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<14, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<17, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<17, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
id = 0, max_id_ref = 1, max_id_hw = 2
id = 0, max_id_ref = 1, max_id_sw = 2
id = 1, max_id_ref = 1, max_id_hw = 2
id = 1, max_id_ref = 1, max_id_sw = 2
id = 5, max_id_ref = 1, max_id_hw = 2
id = 5, max_id_ref = 1, max_id_sw = 2
id = 6, max_id_ref = 1, max_id_hw = 2
id = 6, max_id_ref = 1, max_id_sw = 2
id = 10, max_id_ref = 1, max_id_hw = 2
id = 10, max_id_ref = 1, max_id_sw = 2
id = 15, max_id_ref = 1, max_id_sw = 2
id = 34, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_sw = 0
id = 80, max_id_ref = 0, max_id_sw = 2
id = 85, max_id_ref = 0, max_id_sw = 2
id = 86, max_id_ref = 0, max_id_sw = 2
id = 90, max_id_ref = 0, max_id_sw = 2
id = 91, max_id_ref = 0, max_id_sw = 2
id = 95, max_id_ref = 0, max_id_sw = 2
id = 96, max_id_ref = 0, max_id_sw = 2
id = 97, max_id_ref = 0, max_id_sw = 2
id = 98, max_id_ref = 0, max_id_sw = 2
id = 99, max_id_ref = 0, max_id_sw = 2
id = 102, max_id_ref = 2, max_id_hw = 0
id = 107, max_id_ref = 2, max_id_hw = 0
id = 126, max_id_ref = 2, max_id_hw = 0
id = 127, max_id_ref = 2, max_id_hw = 0
id = 128, max_id_ref = 2, max_id_hw = 0
id = 129, max_id_ref = 2, max_id_hw = 0
id = 131, max_id_ref = 2, max_id_hw = 0
id = 132, max_id_ref = 2, max_id_hw = 0
id = 133, max_id_ref = 2, max_id_hw = 0
id = 134, max_id_ref = 2, max_id_hw = 0
id = 136, max_id_ref = 2, max_id_hw = 0
id = 137, max_id_ref = 2, max_id_hw = 0
id = 138, max_id_ref = 2, max_id_hw = 0
id = 139, max_id_ref = 2, max_id_hw = 0
id = 142, max_id_ref = 2, max_id_hw = 0
id = 143, max_id_ref = 2, max_id_hw = 0
id = 144, max_id_ref = 2, max_id_hw = 0
id = 147, max_id_ref = 2, max_id_hw = 0
id = 148, max_id_ref = 2, max_id_hw = 0
id = 149, max_id_ref = 2, max_id_hw = 0
hw_err_count = 27
sw_err_count = 17
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<13, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<16, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
// straight_conv_nn_tb.cpp
// 2017/08/28 by marsee
// 畳み込み層のカーネル数 2
//
#include <stdio.h>
#include <ap_fixed.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#include "straight_data.h"
#define NUM_OF_KERNELS 2
#define COULMN_PIXELS 56
#define ROW_PIXELS 10
#define ALL_PIXELS 560
#define NUM_OF_OUTPUT 3
int straight_conv_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[ALL_PIXELS], ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT]);
int straight_conv_nn_float(float in[ALL_PIXELS], float out[NUM_OF_OUTPUT]);
int max_ap_fixed(ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT]);
int max_float(float out[NUM_OF_OUTPUT]);
#define NUM_ITERATIONS 150 // C Simulation
//#define NUM_ITERATIONS 2 // C/RTL CoSimulation
int main(){
float t_tran_float[NUM_ITERATIONS][ALL_PIXELS];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> result_ap_fixed[NUM_ITERATIONS][NUM_OF_OUTPUT];
float result_float[NUM_ITERATIONS][NUM_OF_OUTPUT];
int max_id_hw, max_id_sw, max_id_ref;
for(int i=0; i<NUM_ITERATIONS; i++)
for(int j=0; j<ALL_PIXELS; j++)
t_tran_float[i][j] = (float)t_train[i][j];
for(int i=0; i<NUM_ITERATIONS; i++){
straight_conv_nn(&t_train[i][0], &result_ap_fixed[i][0]);
straight_conv_nn_float(&t_tran_float[i][0], &result_float[i][0]);
}
int hw_err_count=0;
int sw_err_count=0;
for(int i=0; i<NUM_ITERATIONS; i++){
max_id_hw = max_ap_fixed(&result_ap_fixed[i][0]);
max_id_sw = max_float(&result_float[i][0]);
max_id_ref = max_float(&t_test[i][0]);
if(max_id_ref != max_id_hw){
printf("id = %d, max_id_ref = %d, max_id_hw = %d\n", i, max_id_ref, max_id_hw);
hw_err_count++;
}
if(max_id_ref != max_id_sw){
printf("id = %d, max_id_ref = %d, max_id_sw = %d\n", i, max_id_ref, max_id_sw);
sw_err_count++;
}
}
if(hw_err_count==0 && sw_err_count==0)
printf("No Error\n");
else{
printf("hw_err_count = %d\n", hw_err_count);
printf("sw_err_count = %d\n", sw_err_count);
}
return(0);
}
int straight_conv_nn_float(float in[ALL_PIXELS], float out[NUM_OF_OUTPUT]){
float buf[ROW_PIXELS][COULMN_PIXELS];
float conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
float pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
float dot1[100];
float dot2[NUM_OF_OUTPUT];
buf_copy1: for(int i=0; i<ROW_PIXELS; i++)
buf_copy2: for(int j=0; j<COULMN_PIXELS; j++)
buf[i][j] = in[i*COULMN_PIXELS+j];
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<NUM_OF_KERNELS; i++){ // カーネルの個数
CONV2: for(int j=0; j<ROW_PIXELS-4; j++){
CONV3: for(int k=0; k<COULMN_PIXELS-4; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_fweight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_fbias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUM_OF_KERNELS; i++){
POOL2: for(int j=0; j<ROW_PIXELS-4; j += 2){
POOL3: for(int k=0; k<COULMN_PIXELS-4; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<NUM_OF_KERNELS; i++){
af1_dot3: for(int j=0; j<(ROW_PIXELS-4)/2; j++){
af1_dot4: for(int k=0; k<(COULMN_PIXELS-4)/2; k++){
dot1[col] += pool_out[i][j][k]*af1_fweight[i*((ROW_PIXELS-4)/2)*((COULMN_PIXELS-4)/2)+j*((COULMN_PIXELS-4)/2)+k][col];
}
}
}
dot1[col] += af1_fbias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<NUM_OF_OUTPUT; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_fweight[row][col];
}
dot2[col] += af2_fbias[col];
out[col] = dot2[col];
}
return(0);
}
int max_ap_fixed(ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT]){
int max_id;
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<NUM_OF_OUTPUT; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
int max_float(float out[NUM_OF_OUTPUT]){
int max_id;
float max;
for(int i=0; i<NUM_OF_OUTPUT; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
return(max_id);
}
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
Compiling ../../../straight_conv_nn_tb.cpp in debug mode
Generating csim.exe
id = 0, max_id_ref = 1, max_id_hw = 2
id = 0, max_id_ref = 1, max_id_sw = 2
id = 1, max_id_ref = 1, max_id_hw = 2
id = 1, max_id_ref = 1, max_id_sw = 2
id = 5, max_id_ref = 1, max_id_hw = 2
id = 5, max_id_ref = 1, max_id_sw = 2
id = 6, max_id_ref = 1, max_id_hw = 2
id = 6, max_id_ref = 1, max_id_sw = 2
id = 10, max_id_ref = 1, max_id_hw = 2
id = 10, max_id_ref = 1, max_id_sw = 2
id = 15, max_id_ref = 1, max_id_sw = 2
id = 26, max_id_ref = 1, max_id_hw = 0
id = 27, max_id_ref = 1, max_id_hw = 0
id = 28, max_id_ref = 1, max_id_hw = 0
id = 29, max_id_ref = 1, max_id_hw = 0
id = 32, max_id_ref = 1, max_id_hw = 0
id = 33, max_id_ref = 1, max_id_hw = 0
id = 34, max_id_ref = 1, max_id_hw = 0
id = 38, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_hw = 0
id = 39, max_id_ref = 1, max_id_sw = 0
id = 43, max_id_ref = 1, max_id_hw = 0
id = 44, max_id_ref = 1, max_id_hw = 0
id = 49, max_id_ref = 1, max_id_hw = 0
id = 80, max_id_ref = 0, max_id_sw = 2
id = 85, max_id_ref = 0, max_id_sw = 2
id = 86, max_id_ref = 0, max_id_sw = 2
id = 90, max_id_ref = 0, max_id_sw = 2
id = 91, max_id_ref = 0, max_id_sw = 2
id = 95, max_id_ref = 0, max_id_sw = 2
id = 96, max_id_ref = 0, max_id_sw = 2
id = 97, max_id_ref = 0, max_id_sw = 2
id = 98, max_id_ref = 0, max_id_sw = 2
id = 99, max_id_ref = 0, max_id_sw = 2
id = 100, max_id_ref = 2, max_id_hw = 0
id = 101, max_id_ref = 2, max_id_hw = 0
id = 102, max_id_ref = 2, max_id_hw = 0
id = 103, max_id_ref = 2, max_id_hw = 0
id = 104, max_id_ref = 2, max_id_hw = 0
id = 105, max_id_ref = 2, max_id_hw = 0
id = 106, max_id_ref = 2, max_id_hw = 0
id = 107, max_id_ref = 2, max_id_hw = 0
id = 108, max_id_ref = 2, max_id_hw = 0
id = 109, max_id_ref = 2, max_id_hw = 0
id = 110, max_id_ref = 2, max_id_hw = 0
id = 111, max_id_ref = 2, max_id_hw = 0
id = 112, max_id_ref = 2, max_id_hw = 0
id = 113, max_id_ref = 2, max_id_hw = 0
id = 114, max_id_ref = 2, max_id_hw = 0
id = 115, max_id_ref = 2, max_id_hw = 0
id = 116, max_id_ref = 2, max_id_hw = 0
id = 117, max_id_ref = 2, max_id_hw = 0
id = 118, max_id_ref = 2, max_id_hw = 0
id = 120, max_id_ref = 2, max_id_hw = 0
id = 121, max_id_ref = 2, max_id_hw = 0
id = 122, max_id_ref = 2, max_id_hw = 0
id = 123, max_id_ref = 2, max_id_hw = 0
id = 125, max_id_ref = 2, max_id_hw = 0
id = 126, max_id_ref = 2, max_id_hw = 0
id = 127, max_id_ref = 2, max_id_hw = 0
id = 128, max_id_ref = 2, max_id_hw = 0
id = 129, max_id_ref = 2, max_id_hw = 0
id = 130, max_id_ref = 2, max_id_hw = 0
id = 131, max_id_ref = 2, max_id_hw = 0
id = 132, max_id_ref = 2, max_id_hw = 0
id = 133, max_id_ref = 2, max_id_hw = 0
id = 134, max_id_ref = 2, max_id_hw = 0
id = 135, max_id_ref = 2, max_id_hw = 0
id = 136, max_id_ref = 2, max_id_hw = 0
id = 137, max_id_ref = 2, max_id_hw = 0
id = 138, max_id_ref = 2, max_id_hw = 0
id = 139, max_id_ref = 2, max_id_hw = 0
id = 140, max_id_ref = 2, max_id_hw = 0
id = 141, max_id_ref = 2, max_id_hw = 0
id = 142, max_id_ref = 2, max_id_hw = 0
id = 143, max_id_ref = 2, max_id_hw = 0
id = 144, max_id_ref = 2, max_id_hw = 0
id = 145, max_id_ref = 2, max_id_hw = 0
id = 146, max_id_ref = 2, max_id_hw = 0
id = 147, max_id_ref = 2, max_id_hw = 0
id = 148, max_id_ref = 2, max_id_hw = 0
id = 149, max_id_ref = 2, max_id_hw = 0
INFO: [SIM 1] CSim done with 0 errors.
INFO: [SIM 3] *************** CSIM finish ***************
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
Compiling ../../../straight_conv_nn2_axis2_tb.cpp in debug mode
Generating csim.exe
*straight0.bmp
outs = 1
*straight1.bmp
outs = 1
*straight2.bmp
outs = 1
*straight3.bmp
outs = 1
*straight4.bmp
outs = 1
*straight5.bmp
outs = 1
*straight6.bmp
outs = 1
*straight7.bmp
outs = 1
*straight8.bmp
outs = 1
*straight9.bmp
outs = 1
*straight10.bmp
outs = 1
*left_turn0.bmp
outs = 0
*left_turn1.bmp
outs = 0
*left_turn2.bmp
outs = 0
*left_turn3.bmp
outs = 0
*left_turn4.bmp
outs = 0
*left_turn5.bmp
outs = 0
*left_turn6.bmp
outs = 0
*left_turn7.bmp
outs = 0
*left_turn8.bmp
outs = 0
*left_turn9.bmp
outs = 0
*left_turn10.bmp
outs = 0
*right_turn0.bmp
outs = 2
*right_turn1.bmp
outs = 2
*right_turn2.bmp
outs = 0
*right_turn3.bmp
outs = 0
*right_turn4.bmp
outs = 0
*right_turn5.bmp
outs = 0
*right_turn6.bmp
outs = 1
*right_turn7.bmp
outs = 2
*right_turn8.bmp
outs = 2
*right_turn9.bmp
outs = 2
*right_turn10.bmp
outs = 2
INFO: [SIM 1] CSim done with 0 errors.
INFO: [SIM 3] *************** CSIM finish ***************
*straight_test0.bmp
outs = 1
*straight_test1.bmp
outs = 1
*straight_test2.bmp
outs = 1
*straight_test3.bmp
outs = 1
*straight_test4.bmp
outs = 1
*left_turn_test0.bmp
outs = 0
*left_turn_test1.bmp
outs = 0
*left_turn_test2.bmp
outs = 0
*left_turn_test3.bmp
outs = 0
*left_turn_test4.bmp
outs = 0
*right_turn_test0.bmp
outs = 2
*right_turn_test1.bmp
outs = 2
*right_turn_test2.bmp
outs = 2
*right_turn_test3.bmp
outs = 0
*right_turn_test4.bmp
outs = 0
// straight_conv_nn2_axis2.cpp
// 2017/09/09 by marsee
// 畳み込み層のカーネル数 2
// AXI4 Stream入力 番号出力
//
#include <ap_fixed.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#define REDUSED_ROW 45
#define REDUSED_COULMN 60
#define NUM_OF_KERNELS 2
#define COULMN_PIXELS 56
#define ROW_PIXELS 10
#define ALL_PIXELS 560
#define NUM_OF_OUTPUT 3
int max_ap_fixed(ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT], ap_uint<2> &out_num);
int straight_conv_nn2_axis2(hls::stream<ap_axiu<32,1,1,1> >& ins, ap_uint<2> &outs){
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=outs
#pragma HLS INTERFACE axis register both port=ins
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[ROW_PIXELS][COULMN_PIXELS];
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
ap_axiu<32,1,1,1> pix;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
// 10 x 56 に整形
buf_copy1: for(int i=0; i<REDUSED_ROW; i++){
buf_copy2: for(int j=0; j<REDUSED_COULMN; j++){
if (!(i==0 && j==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
if((i>=33 && i<33+ROW_PIXELS) && (j>=2 && j<2+COULMN_PIXELS)){
buf[i-33][j-2] = (ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)((ap_ufixed<16, 8, AP_TRN_ZERO, AP_SAT>)(pix.data & 0xff) / 256);
}
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<NUM_OF_KERNELS; i++){ // カーネルの個数
CONV2: for(int j=0; j<ROW_PIXELS-4; j++){
CONV3: for(int k=0; k<COULMN_PIXELS-4; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUM_OF_KERNELS; i++){
POOL2: for(int j=0; j<ROW_PIXELS-4; j += 2){
POOL3: for(int k=0; k<COULMN_PIXELS-4; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<NUM_OF_KERNELS; i++){
af1_dot3: for(int j=0; j<(ROW_PIXELS-4)/2; j++){
af1_dot4: for(int k=0; k<(COULMN_PIXELS-4)/2; k++){
dot1[col] += pool_out[i][j][k]*af1_weight[i*((ROW_PIXELS-4)/2)*((COULMN_PIXELS-4)/2)+j*((COULMN_PIXELS-4)/2)+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<NUM_OF_OUTPUT; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
}
max_ap_fixed(dot2, outs);
return(0);
}
int max_ap_fixed(ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT], ap_uint<2> &out_num){
int max_id;
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> max;
for(int i=0; i<NUM_OF_OUTPUT; i++){
if(i == 0){
max = out[0];
max_id = 0;
}else if(out[i]>max){
max = out[i];
max_id = i;
}
}
out_num = (ap_uint<2>)max_id;
return(0);
}
// straight_conv_nn2_axis2_tb.cpp
// 2017/09/09 by marsee
//
#include <iostream>
#include "hls_opencv.h"
#include "ap_axi_sdata.h"
#include "hls_video.h"
#define MAX_HEIGHT 600
#define MAX_WIDTH 800
typedef hls::stream<ap_axiu<32,1,1,1> > AXI_STREAM;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3> RGB_IMAGE;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC1> GRAY_IMAGE;
using namespace cv;
#define NUM_OF_OUTPUT 3
#define MAX_LOOP_COUNT 11
//#define MAX_LOOP_COUNT 1 // for C/RTL Co-Simulation
#define STRAIGHT_IMAGE_NAME "straight"
#define LEFT_TURN_IMAGE_NAME "left_turn"
#define RIGHT_TURN_IMAGE_NAME "right_turn"
int straight_conv_nn2_axis2(hls::stream<ap_axiu<32,1,1,1> >& ins, ap_uint<2> &outs);
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs);
int main_output_loop(char *buf);
int main () {
char buf[200];
sprintf(buf, "%s", STRAIGHT_IMAGE_NAME);
main_output_loop(buf);
sprintf(buf, "%s", LEFT_TURN_IMAGE_NAME);
main_output_loop(buf);
sprintf(buf, "%s", RIGHT_TURN_IMAGE_NAME);
main_output_loop(buf);
return(0);
}
int main_output_loop(char *buf){
char bmp_file_name[200];
ap_uint<2> outs;
AXI_STREAM src_axi, dst_axi;
Mat src;
for(int i=0; i<MAX_LOOP_COUNT; i++){
sprintf(bmp_file_name, "%s%d.bmp", buf, i);
// OpenCV で 画像を読み込む
src = imread(bmp_file_name);
// BGR から RGBへ変換
Mat src_rgb;
cvtColor(src, src_rgb, CV_BGR2RGB);
// Mat フォーマットから AXI4 Stream へ変換
cvMat2AXIvideo(src_rgb, src_axi);
// resize_gray() 関数をコール
resize_gray(src_axi, dst_axi);
straight_conv_nn2_axis2(dst_axi, outs);
printf("*%s\n", bmp_file_name);
printf("outs = %d\n", (int)outs);
}
return(0);
}
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs){
RGB_IMAGE org_img(600, 800);
GRAY_IMAGE org_img_g(600, 800);
GRAY_IMAGE resize_img_g(45, 60);
RGB_IMAGE resize_img(45, 60);
hls::AXIvideo2Mat(ins, org_img);
hls::CvtColor<HLS_RGB2GRAY>(org_img, org_img_g);
hls::Resize(org_img_g, resize_img_g);
hls::CvtColor<HLS_GRAY2RGB>(resize_img_g, resize_img);
hls::Mat2AXIvideo(resize_img, outs);
return(0);
}
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
Compiling ../../../straight_conv_nn2_axis_tb.cpp in debug mode
Generating csim.exe
*straight0.bmp
outs[0] = -6.218750 outs[1] = 3.812500 outs[2] = -3.078125
*straight1.bmp
outs[0] = -5.625000 outs[1] = 2.500000 outs[2] = -1.796875
*straight2.bmp
outs[0] = -3.796875 outs[1] = 4.015625 outs[2] = -5.890625
*straight3.bmp
outs[0] = -3.484375 outs[1] = 0.875000 outs[2] = -2.140625
*straight4.bmp
outs[0] = -0.765625 outs[1] = 2.218750 outs[2] = -6.156250
*straight5.bmp
outs[0] = -5.468750 outs[1] = 3.046875 outs[2] = -2.828125
*straight6.bmp
outs[0] = -6.500000 outs[1] = 5.062500 outs[2] = -4.234375
*straight7.bmp
outs[0] = -5.140625 outs[1] = 2.187500 outs[2] = -1.312500
*straight8.bmp
outs[0] = 1.359375 outs[1] = 1.546875 outs[2] = -8.156250
*straight9.bmp
outs[0] = -5.390625 outs[1] = 2.515625 outs[2] = -1.375000
*straight10.bmp
outs[0] = 1.718750 outs[1] = 1.640625 outs[2] = -8.796875
*left_turn0.bmp
outs[0] = 5.671875 outs[1] = -0.515625 outs[2] = -9.937500
*left_turn1.bmp
outs[0] = 5.093750 outs[1] = -3.734375 outs[2] = -3.078125
*left_turn2.bmp
outs[0] = 6.500000 outs[1] = -6.875000 outs[2] = -0.593750
*left_turn3.bmp
outs[0] = 6.078125 outs[1] = -6.546875 outs[2] = -0.515625
*left_turn4.bmp
outs[0] = 6.984375 outs[1] = -9.109375 outs[2] = 1.859375
*left_turn5.bmp
outs[0] = 7.250000 outs[1] = -9.593750 outs[2] = 2.109375
*left_turn6.bmp
outs[0] = 6.359375 outs[1] = -3.046875 outs[2] = -5.593750
*left_turn7.bmp
outs[0] = 8.843750 outs[1] = -8.890625 outs[2] = -1.203125
*left_turn8.bmp
outs[0] = 8.453125 outs[1] = -8.781250 outs[2] = -0.656250
*left_turn9.bmp
outs[0] = 5.343750 outs[1] = -1.062500 outs[2] = -7.953125
*left_turn10.bmp
outs[0] = 7.609375 outs[1] = -6.062500 outs[2] = -3.421875
*right_turn0.bmp
outs[0] = -1.578125 outs[1] = -0.890625 outs[2] = 0.843750
*right_turn1.bmp
outs[0] = 3.828125 outs[1] = -6.937500 outs[2] = 3.937500
*right_turn2.bmp
outs[0] = 3.296875 outs[1] = -5.375000 outs[2] = 2.562500
*right_turn3.bmp
outs[0] = 2.796875 outs[1] = -4.234375 outs[2] = 1.421875
*right_turn4.bmp
outs[0] = 3.562500 outs[1] = -4.625000 outs[2] = 0.843750
*right_turn5.bmp
outs[0] = 2.890625 outs[1] = -3.906250 outs[2] = 0.328125
*right_turn6.bmp
outs[0] = -2.109375 outs[1] = -0.312500 outs[2] = -1.296875
*right_turn7.bmp
outs[0] = 0.281250 outs[1] = -2.843750 outs[2] = 2.375000
*right_turn8.bmp
outs[0] = 0.671875 outs[1] = -5.812500 outs[2] = 6.656250
*right_turn9.bmp
outs[0] = 1.562500 outs[1] = -4.765625 outs[2] = 3.437500
*right_turn10.bmp
outs[0] = 3.515625 outs[1] = -7.093750 outs[2] = 4.437500
INFO: [SIM 1] CSim done with 0 errors.
INFO: [SIM 3] *************** CSIM finish ***************
2: 角度 -5 度、左車輪逸脱車体1/4
3: 角度 -10 度、左車輪逸脱車体1/4
4: 角度 0 度、左車輪逸脱車体1/2
// straight_conv_nn2_axis.cpp
// 2017/09/05 by marsee
// 畳み込み層のカーネル数 2
// AXI4 Stream入力
//
#include <ap_fixed.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "conv1_weight.h"
#include "conv1_bias.h"
#include "af1_weight.h"
#include "af1_bias.h"
#include "af2_weight.h"
#include "af2_bias.h"
#define REDUSED_ROW 45
#define REDUSED_COULMN 60
#define NUM_OF_KERNELS 2
#define COULMN_PIXELS 56
#define ROW_PIXELS 10
#define ALL_PIXELS 560
#define NUM_OF_OUTPUT 3
int straight_conv_nn2_axis(hls::stream<ap_axiu<32,1,1,1> >& ins, ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> outs[NUM_OF_OUTPUT]){
#pragma HLS INTERFACE s_axilite port=return
#pragma HLS INTERFACE s_axilite port=outs
#pragma HLS INTERFACE axis register both port=ins
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[ROW_PIXELS][COULMN_PIXELS];
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
ap_axiu<32,1,1,1> pix;
do {
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
// user が 1になった時にフレームがスタートする
ins >> pix;
} while(pix.user == 0);
// 10 x 56 に整形
buf_copy1: for(int i=0; i<REDUSED_ROW; i++){
buf_copy2: for(int j=0; j<REDUSED_COULMN; j++){
if (!(i==0 && j==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
if((i>=33 && i<33+ROW_PIXELS) && (j>=2 && j<2+COULMN_PIXELS)){
buf[i-33][j-2] = (ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT>)((ap_ufixed<16, 8, AP_TRN_ZERO, AP_SAT>)(pix.data & 0xff) / 256);
//printf("%1x", (((unsigned int)pix.data&0xff)+8)/16);
//if(j==2+COULMN_PIXELS-1)
// printf("\n");
}
}
}
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<NUM_OF_KERNELS; i++){ // カーネルの個数
CONV2: for(int j=0; j<ROW_PIXELS-4; j++){
CONV3: for(int k=0; k<COULMN_PIXELS-4; k++){
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUM_OF_KERNELS; i++){
POOL2: for(int j=0; j<ROW_PIXELS-4; j += 2){
POOL3: for(int k=0; k<COULMN_PIXELS-4; k += 2){
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<NUM_OF_KERNELS; i++){
af1_dot3: for(int j=0; j<(ROW_PIXELS-4)/2; j++){
af1_dot4: for(int k=0; k<(COULMN_PIXELS-4)/2; k++){
dot1[col] += pool_out[i][j][k]*af1_weight[i*((ROW_PIXELS-4)/2)*((COULMN_PIXELS-4)/2)+j*((COULMN_PIXELS-4)/2)+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<NUM_OF_OUTPUT; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
outs[col] = dot2[col];
}
return(0);
}
// straight_conv_nn2_axis_tb.cpp
// 2017/09/06 by marsee
//
#include <iostream>
#include "hls_opencv.h"
#include "ap_axi_sdata.h"
#include "hls_video.h"
#define MAX_HEIGHT 600
#define MAX_WIDTH 800
typedef hls::stream<ap_axiu<32,1,1,1> > AXI_STREAM;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3> RGB_IMAGE;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC1> GRAY_IMAGE;
using namespace cv;
#define NUM_OF_OUTPUT 3
#define MAX_LOOP_COUNT 11
//#define MAX_LOOP_COUNT 1 // for C/RTL Co-Simulation
int straight_conv_nn2_axis(AXI_STREAM& ins, ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> outs[NUM_OF_OUTPUT]);
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs);
int main_output_loop(char *buf);
int main () {
char buf[200];
sprintf(buf, "%s", "straight");
main_output_loop(buf);
sprintf(buf, "%s", "left_turn");
main_output_loop(buf);
sprintf(buf, "%s", "right_turn");
main_output_loop(buf);
return(0);
}
int main_output_loop(char *buf){
char bmp_file_name[200];
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> outs[NUM_OF_OUTPUT];
AXI_STREAM src_axi, dst_axi;
Mat src;
for(int i=0; i<MAX_LOOP_COUNT; i++){
sprintf(bmp_file_name, "%s%d.bmp", buf, i);
// OpenCV で 画像を読み込む
src = imread(bmp_file_name);
// Mat フォーマットから AXI4 Stream へ変換
cvMat2AXIvideo(src, src_axi);
// resize_gray() 関数をコール
resize_gray(src_axi, dst_axi);
straight_conv_nn2_axis(dst_axi, outs);
printf("*%s\n", bmp_file_name);
for(int i=0; i<NUM_OF_OUTPUT; i++)
printf("outs[%d] = %f ", i, (float)outs[i]);
printf("\n");
}
return(0);
}
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs){
RGB_IMAGE org_img(600, 800);
GRAY_IMAGE org_img_g(600, 800);
GRAY_IMAGE resize_img_g(45, 60);
RGB_IMAGE resize_img(45, 60);
hls::AXIvideo2Mat(ins, org_img);
hls::CvtColor<HLS_BGR2GRAY>(org_img, org_img_g);
hls::Resize(org_img_g, resize_img_g);
hls::CvtColor<HLS_GRAY2BGR>(resize_img_g, resize_img);
hls::Mat2AXIvideo(resize_img, outs);
return(0);
}
// resize_gray.cpp
// 2017/08/31 by marsee
// 2017/09/06 : BRG to RGB
//
#include "resize_gray.h"
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs){
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE axis register both port=ins
#pragma HLS DATAFLOW
#pragma HLS INTERFACE s_axilite port=return
RGB_IMAGE org_img(600, 800);
GRAY_IMAGE org_img_g(600, 800);
GRAY_IMAGE resize_img_g(45, 60);
RGB_IMAGE resize_img(45, 60);
hls::AXIvideo2Mat(ins, org_img);
hls::CvtColor<HLS_RGB2GRAY>(org_img, org_img_g);
hls::Resize(org_img_g, resize_img_g);
hls::CvtColor<HLS_GRAY2RGB>(resize_img_g, resize_img);
hls::Mat2AXIvideo(resize_img, outs);
return(0);
}
// resize_gray_tb.cpp
// 2017/08/31 by marsee
// 2017/09/06 : BRG to RGB
//
#include <iostream>
#include "hls_opencv.h"
#include "resize_gray.h"
using namespace cv;
#define INPUT_IMAGE "straight0.bmp"
#define OUTPUT_IMAGE "test_straight0.bmp"
#define OUTPUT_IMAGE_CV "test_straight0_cv.bmp"
void resize_gray(AXI_STREAM& ins, AXI_STREAM& outs);
void opencv_resize_gray(Mat& src, Mat& dst);
int main (int argc, char** argv) {
// OpenCV で 画像を読み込む
Mat src = imread(INPUT_IMAGE);
AXI_STREAM src_axi, dst_axi;
// BGR から RGBへ変換
Mat src_rgb;
cvtColor(src, src_rgb, CV_BGR2RGB);
// Mat フォーマットから AXI4 Stream へ変換
cvMat2AXIvideo(src_rgb, src_axi);
// resize_gray() 関数をコール
resize_gray(src_axi, dst_axi);
// AXI4 Stream から Mat フォーマットへ変換
// dst は宣言時にサイズとカラー・フォーマットを定義する必要がある
Mat dst_rgb(45, 60, CV_8UC3);
AXIvideo2cvMat(dst_axi, dst_rgb);
Mat dst;
cvtColor(dst_rgb, dst, CV_RGB2BGR);
// Mat フォーマットからファイルに書き込み
imwrite(OUTPUT_IMAGE, dst);
// opencv_resize_gray() をコール
Mat dst_cv(45, 60, CV_8UC3);
opencv_resize_gray(src, dst_cv);
imwrite(OUTPUT_IMAGE_CV, dst_cv);
// dst と dst_cv が同じ画像かどうか?比較する
for (int y=0; y<45; y++){
Vec3b* dst_ptr = dst.ptr<Vec3b>(y);
Vec3b* dst_cv_ptr = dst_cv.ptr<Vec3b>(y);
for (int x=0; x<60; x++){
Vec3b dst_bgr = dst_ptr[x];
Vec3b dst_cv_bgr = dst_cv_ptr[x];
// bgr のどれかが間違っていたらエラー
if (std::pow(dst_bgr[0]-dst_cv_bgr[0], 2.0) > 1 || std::pow(dst_bgr[1]-dst_cv_bgr[1], 2.0) > 1
|| std::pow(dst_bgr[2]-dst_cv_bgr[2], 2.0) > 1){
printf("x = %d, y = %d, Error dst=%d,%d,%d dst_cv=%d,%d,%d\n", x, y,
dst_bgr[0], dst_bgr[1], dst_bgr[0], dst_cv_bgr[0], dst_cv_bgr[1], dst_cv_bgr[2]);
//return 1;
}
}
}
printf("Test with 0 errors.\n");
return 0;
}
void opencv_resize_gray(Mat& src, Mat& dst){
Mat gray(src.rows, src.cols, CV_8UC1);
Mat img0g(45, 60, CV_8UC1);
cvtColor(src, gray, CV_BGR2GRAY);
resize(gray, img0g, img0g.size(), 0, 0, INTER_LINEAR);
cvtColor(img0g, dst, CV_GRAY2BGR);
}
// resize_gray.h
// 2017/08/31 by marsee
//
#ifndef __resize_gray_H__
#define __resize_gray_H__
#include "ap_axi_sdata.h"
#include "hls_video.h"
#define MAX_HEIGHT 600
#define MAX_WIDTH 800
typedef hls::stream<ap_axiu<32,1,1,1> > AXI_STREAM;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3> RGB_IMAGE;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC1> GRAY_IMAGE;
#endif
int straight_conv_nn(ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> in[ALL_PIXELS], ap_fixed<12, 7, AP_TRN_ZERO, AP_SAT> out[NUM_OF_OUTPUT]){
#pragma HLS ARRAY_PARTITION variable=af1_weight complete dim=1
#pragma HLS ARRAY_PARTITION variable=af2_weight complete dim=1
#pragma HLS ARRAY_PARTITION variable=af2_bias complete dim=1
#pragma HLS ARRAY_PARTITION variable=af1_bias complete dim=1
#pragma HLS ARRAY_PARTITION variable=conv1_bias complete dim=1
#pragma HLS ARRAY_PARTITION variable=conv1_weight complete dim=1
ap_ufixed<8, 0, AP_TRN_ZERO, AP_SAT> buf[ROW_PIXELS][COULMN_PIXELS];
#pragma HLS ARRAY_PARTITION variable=buf complete dim=1
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> conv_out[NUM_OF_KERNELS][ROW_PIXELS-4][COULMN_PIXELS-4];
#pragma HLS ARRAY_PARTITION variable=conv_out complete dim=1
ap_fixed<10, 6, AP_TRN_ZERO, AP_SAT> pool_out[NUM_OF_KERNELS][(ROW_PIXELS-4)/2][(COULMN_PIXELS-4)/2];
#pragma HLS ARRAY_PARTITION variable=pool_out complete dim=1
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot1[100];
#pragma HLS ARRAY_PARTITION variable=dot1 complete dim=1
ap_fixed<13, 7, AP_TRN_ZERO, AP_SAT> dot2[NUM_OF_OUTPUT];
#pragma HLS ARRAY_PARTITION variable=dot2 complete dim=1
buf_copy1: for(int i=0; i<ROW_PIXELS; i++)
buf_copy2: for(int j=0; j<COULMN_PIXELS; j++)
#pragma HLS PIPELINE II=1
buf[i][j] = in[i*COULMN_PIXELS+j];
// Convolutional Neural Network 5x5 kernel, Stride = 1, Padding = 0
// + ReLU
CONV1: for(int i=0; i<NUM_OF_KERNELS; i++){ // カーネルの個数
CONV2: for(int j=0; j<ROW_PIXELS-4; j++){
CONV3: for(int k=0; k<COULMN_PIXELS-4; k++){
#pragma HLS PIPELINE II=1
conv_out[i][j][k] = 0;
CONV4: for(int m=0; m<5; m++){
CONV5: for(int n=0; n<5; n++){
conv_out[i][j][k] += buf[j+m][k+n] * conv1_weight[i][0][m][n];
}
}
conv_out[i][j][k] += conv1_bias[i];
if(conv_out[i][j][k]<0) // ReLU
conv_out[i][j][k] = 0;
}
}
}
// Pooling Kernel = 2 x 2, Stride = 2
POOL1: for(int i=0; i<NUM_OF_KERNELS; i++){
POOL2: for(int j=0; j<ROW_PIXELS-4; j += 2){
POOL3: for(int k=0; k<COULMN_PIXELS-4; k += 2){
#pragma HLS PIPELINE II=1
POOL4: for(int m=0; m<2; m++){
POOL5: for(int n=0; n<2; n++){
if(m==0 && n==0){
pool_out[i][j/2][k/2] = conv_out[i][j][k];
} else if(pool_out[i][j/2][k/2] < conv_out[i][j+m][k+n]){
pool_out[i][j/2][k/2] = conv_out[i][j+m][k+n];
}
}
}
}
}
}
af1_dot1: for(int col=0; col<100; col++){
dot1[col] = 0;
af1_dot2: for(int i=0; i<NUM_OF_KERNELS; i++){
af1_dot3: for(int j=0; j<(ROW_PIXELS-4)/2; j++){
af1_dot4: for(int k=0; k<(COULMN_PIXELS-4)/2; k++){
#pragma HLS PIPELINE II=1
dot1[col] += pool_out[i][j][k]*af1_weight[i*((ROW_PIXELS-4)/2)*((COULMN_PIXELS-4)/2)+j*((COULMN_PIXELS-4)/2)+k][col];
}
}
}
dot1[col] += af1_bias[col];
if(dot1[col] < 0) // ReLU
dot1[col] = 0;
}
af2_dot1: for(int col=0; col<NUM_OF_OUTPUT; col++){
dot2[col] = 0;
af2_dot2: for(int row=0; row<100; row++){
#pragma HLS PIPELINE II=1
dot2[col] += dot1[row]*af2_weight[row][col];
}
dot2[col] += af2_bias[col];
out[col] = dot2[col];
}
return(0);
}
ERROR: [COSIM 212-303] Aborting co-simulation: RTL simulation failed.
ERROR: [COSIM 212-344] Rtl simulation failed.
ERROR: [COSIM 212-4] *** C/RTL co-simulation finished: FAIL ***
could not read "C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO/test/dmar4resize_gray/solution1/sim/tv/rtldatafile/sim/report/cosim.log": no such file or directory
while executing
"source C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO/test/dmar4resize_gray/solution1/cosim.tcl"
invoked from within
"hls::main C:/Users/Masaaki/Documents/VIvado_HLS/ZYBO/test/dmar4resize_gray/solution1/cosim.tcl"
("uplevel" body line 1)
invoked from within
"uplevel 1 hls::main {*}$args"
(procedure "hls_proc" line 5)
invoked from within
"hls_proc $argv"
Finished C/RTL cosimulation.
INFO: [SIM 2] *************** CSIM start ***************
INFO: [SIM 4] CSIM will launch GCC as the compiler.
Compiling ../../../dmar4resize_gray_tb.cpp in debug mode
csim.mk:73: ターゲット 'obj/dmar4resize_gray_tb.o' のレシピで失敗しました
../../../dmar4resize_gray_tb.cpp: In function ‘int main()’:
../../../dmar4resize_gray_tb.cpp:98:61: error: cast from ‘int*’ to ‘unsigned int’ loses precision [-fpermissive]
../../../dmar4resize_gray_tb.cpp:99:20: error: cast from ‘int*’ to ‘unsigned int’ loses precision [-fpermissive]
../../../dmar4resize_gray_tb.cpp:103:61: error: cast from ‘int*’ to ‘unsigned int’ loses precision [-fpermissive]
../../../dmar4resize_gray_tb.cpp:104:20: error: cast from ‘int*’ to ‘unsigned int’ loses precision [-fpermissive]
make: *** [obj/dmar4resize_gray_tb.o] エラー 1
CRITICAL WARNING: [SIM 100] 'csim_design' failed: compilation error(s).
INFO: [SIM 3] *************** CSIM finish ***************
WARNING: Hls::stream 'hls::stream
>.1' contains leftover data, which may result in RTL simulation hanging.
// dmar4resize_gray.h
// 2017/09/01 by marsee
//
#ifndef __DMAR4RESIZE_GRAY_H__
#define __DMAR4RESIZE_GRAY_H__
#define HORIZONTAL_PIXEL_WIDTH 800
#define VERTICAL_PIXEL_WIDTH 600
#define ALL_PIXEL_VALUE (HORIZONTAL_PIXEL_WIDTH*VERTICAL_PIXEL_WIDTH)
#define MAX_FRAME_NUMBER 2
#define MEMCPY_LENGTH (HORIZONTAL_PIXEL_WIDTH*4)
#endif
// dmar4resize_gray.cpp
// 2017/09/01 by marsee
//
// if RorL=0 then frame_buffer1 read
// if RorL=1 then frame_buffer0 read
//
#include <stdio.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "dmar4resize_gray.h"
int dmar4resize_gray(volatile int *ins, hls::stream<ap_axis<32,1,1,1> >& outs,
unsigned int frame_buffer0, unsigned int frame_buffer1,
ap_uint<1> & RorL){
#pragma HLS INTERFACE ap_none port=RorL
#pragma HLS INTERFACE s_axilite port=frame_buffer0
#pragma HLS INTERFACE s_axilite port=frame_buffer1
#pragma HLS INTERFACE m_axi depth=5000000 port=ins offset=off
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_axis<32,1,1,1> pix;
int dma_index;
if(RorL == (ap_uint<1>)0) // 1つ前のフレームバッファを読みだす
dma_index = frame_buffer1/sizeof(int);
else
dma_index = frame_buffer0/sizeof(int);
for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
pix.data = ins[dma_index+(y*HORIZONTAL_PIXEL_WIDTH)+x];
if (y==0 && x==0)
pix.user = 1;
else
pix.user = 0;
if (x == (HORIZONTAL_PIXEL_WIDTH-1))
pix.last = 1;
else
pix.last = 0;
outs << pix;
}
}
return 0;
}
// dmar4resize_gray_tb.cpp
// 2017/09/01 by marsee
//
// if RorL=0 then frame_buffer1 read
// if RorL=1 then frame_buffer0 read
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <ap_axi_sdata.h>
#include "dmar4resize_gray.h"
#include "bmp_header.h"
int dmar4resize_gray(volatile int *ins, hls::stream<ap_axis<32,1,1,1> >& outs,
unsigned int frame_buffer0, unsigned int frame_buffer1,
ap_uint<1> & RorL);
int main()
{
using namespace std;
hls::stream<ap_axis<32,1,1,1> > outs;
ap_axis<32,1,1,1> pix;
ap_axis<32,1,1,1> vals;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpr2, *fbmpw;
int *rd_bmp, *hw_lapd;
int blue, green, red;
ap_uint<2> active_frame = 0;
ap_uint<1> RorL;
char output_file[200];
if ((fbmpr = fopen("straight0.bmp", "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open straight0.bmp by binary read mode\n");
exit(1);
}
if ((fbmpr2 = fopen("left_turn4.bmp", "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open left_turn4.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(char), 2, fbmpr);
fread(&bmpfhr.bfSize, sizeof(long), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
fread(&bmpfhr.bfType, sizeof(char), 2, fbmpr2);
fread(&bmpfhr.bfSize, sizeof(long), 1, fbmpr2);
fread(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpr2);
fread(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpr2);
fread(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpr2);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr2);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(MAX_FRAME_NUMBER * sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
int *buf;
if ((buf =(int *)malloc(MAX_FRAME_NUMBER * sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate buf memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr2);
green = fgetc(fbmpr2);
red = fgetc(fbmpr2);
rd_bmp[bmpihr.biHeight*bmpihr.biWidth+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr2);
RorL = 0;
dmar4resize_gray((volatile int *)0, outs, (unsigned int)rd_bmp,
(unsigned int)rd_bmp+(bmpihr.biWidth * bmpihr.biHeight) * sizeof(int),
RorL);
RorL = 1;
dmar4resize_gray((volatile int *)0, outs, (unsigned int)rd_bmp,
(unsigned int)rd_bmp+(bmpihr.biWidth * bmpihr.biHeight) * sizeof(int),
RorL);
// outs ストリームのデータを buf に入力する
for (int k=0; k<MAX_FRAME_NUMBER; k++){
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
outs >> vals;
ap_int<32> val = vals.data;
buf[(k*bmpihr.biWidth*bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = (int)val;
}
}
}
// DMAされたデータをBMPフィルに書き込む
for (int i=0; i<MAX_FRAME_NUMBER; i++){
switch (i){
case 0:
strcpy(output_file,"dma_result0.bmp");
break;
case 1:
strcpy(output_file,"dma_result1.bmp");
break;
case 2:
strcpy(output_file,"dma_result2.bmp");
break;
}
if ((fbmpw=fopen(output_file, "wb")) == NULL){
fprintf(stderr, "Can't open %s by binary write mode\n", output_file);
exit(1);
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
// RGB データの書き込み、逆順にする
int offset = i * bmpihr.biWidth * bmpihr.biHeight;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = buf[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = (buf[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] >> 8) & 0xff;
red = (buf[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]>>16) & 0xff;
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
}
free(rd_bmp);
return(0);
}
// resize_gray.h
// 2017/08/31 by marsee
//
#ifndef __resize_gray_H__
#define __resize_gray_H__
#include "ap_axi_sdata.h"
#include "hls_video.h"
#define MAX_HEIGHT 600
#define MAX_WIDTH 800
typedef hls::stream<ap_axiu<32,1,1,1> > AXI_STREAM;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC3> RGB_IMAGE;
typedef hls::Mat<MAX_HEIGHT, MAX_WIDTH, HLS_8UC1> GRAY_IMAGE;
#endif
// resize_gray.cpp
// 2017/08/31 by marsee
//
#include "resize_gray.h"
int resize_gray(AXI_STREAM& ins, AXI_STREAM& outs){
#pragma HLS INTERFACE axis register both port=outs
#pragma HLS INTERFACE axis register both port=ins
#pragma HLS DATAFLOW
#pragma HLS INTERFACE s_axilite port=return
RGB_IMAGE org_img(600, 800);
GRAY_IMAGE org_img_g(600, 800);
GRAY_IMAGE resize_img_g(45, 60);
RGB_IMAGE resize_img(45, 60);
hls::AXIvideo2Mat(ins, org_img);
hls::CvtColor<HLS_BGR2GRAY>(org_img, org_img_g);
hls::Resize(org_img_g, resize_img_g);
hls::CvtColor<HLS_GRAY2BGR>(resize_img_g, resize_img);
hls::Mat2AXIvideo(resize_img, outs);
return(0);
}
// resize_gray_tb.cpp
// 2017/08/31 by marsee
//
#include <iostream>
#include "hls_opencv.h"
#include "resize_gray.h"
using namespace cv;
#define INPUT_IMAGE "straight0.bmp"
#define OUTPUT_IMAGE "test_straight0.bmp"
#define OUTPUT_IMAGE_CV "test_straight0_cv.bmp"
void resize_gray(AXI_STREAM& ins, AXI_STREAM& outs);
void opencv_resize_gray(Mat& src, Mat& dst);
int main (int argc, char** argv) {
// OpenCV で 画像を読み込む
Mat src = imread(INPUT_IMAGE);
AXI_STREAM src_axi, dst_axi;
// Mat フォーマットから AXI4 Stream へ変換
cvMat2AXIvideo(src, src_axi);
// resize_gray() 関数をコール
resize_gray(src_axi, dst_axi);
// AXI4 Stream から Mat フォーマットへ変換
// dst は宣言時にサイズとカラー・フォーマットを定義する必要がある
Mat dst(45, 60, CV_8UC3);
AXIvideo2cvMat(dst_axi, dst);
// Mat フォーマットからファイルに書き込み
imwrite(OUTPUT_IMAGE, dst);
// opencv_resize_gray() をコール
Mat dst_cv(45, 60, CV_8UC3);
opencv_resize_gray(src, dst_cv);
imwrite(OUTPUT_IMAGE_CV, dst_cv);
// dst と dst_cv が同じ画像かどうか?比較する
for (int y=0; y<45; y++){
Vec3b* dst_ptr = dst.ptr<Vec3b>(y);
Vec3b* dst_cv_ptr = dst_cv.ptr<Vec3b>(y);
for (int x=0; x<60; x++){
Vec3b dst_bgr = dst_ptr[x];
Vec3b dst_cv_bgr = dst_cv_ptr[x];
// bgr のどれかが間違っていたらエラー
if (std::pow(dst_bgr[0]-dst_cv_bgr[0], 2.0) > 1 || std::pow(dst_bgr[1]-dst_cv_bgr[1], 2.0) > 1
|| std::pow(dst_bgr[2]-dst_cv_bgr[2], 2.0) > 1){
printf("x = %d, y = %d, Error dst=%d,%d,%d dst_cv=%d,%d,%d\n", x, y,
dst_bgr[0], dst_bgr[1], dst_bgr[0], dst_cv_bgr[0], dst_cv_bgr[1], dst_cv_bgr[2]);
//return 1;
}
}
}
printf("Test with 0 errors.\n");
return 0;
}
void opencv_resize_gray(Mat& src, Mat& dst){
Mat gray(src.rows, src.cols, CV_8UC1);
Mat img0g(45, 60, CV_8UC1);
cvtColor(src, gray, CV_BGR2GRAY);
resize(gray, img0g, img0g.size(), 0, 0, INTER_LINEAR);
cvtColor(img0g, dst, CV_GRAY2BGR);
}
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | - | 1 | 2 |
3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 |