に変更してみよう。wready_gen.set_low_time(1);
wready_gen.set_high_time(1);
に変更してみよう。wready_gen.set_ready_policy(XIL_AXI_READY_GEN_RANDOM);
入れてみよう。mtestWData[31:0] = 32'h12345678;
mtestWData[63:32] = 32'h9abcdef0;を
if (y == 0)
lap_fb[(VERTICAL_PIXEL_WIDTH-1)*HORIZONTAL_PIXEL_WIDTH+x] = lap;
else
lap_fb[(y-1)*HORIZONTAL_PIXEL_WIDTH+x] = lap;
リスト 1: lap_fb[]のライト部分の変更点
for (int y=0; y<=VERTICAL_PIXEL_WIDTH; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE
if (y < VERTICAL_PIXEL_WIDTH)
pix = cam_fb[y*HORIZONTAL_PIXEL_WIDTH+x];
リスト 2: cam_fb[]のリード部分の変更点
if (y != 0)
lap_fb[(y-1)*HORIZONTAL_PIXEL_WIDTH+x] = lap;
リスト 3: lap_fb[]のライト部分の変更点
でVivado HLS のAXI4-Stream編として書きました。これが本命だと思っています。このようにデータのストリーミング中に処理を行うという方法がFPGA に合っていると言いますか、これができないようだとCPU やGPU に処理速度で負けちゃうんじゃないでしょうか?というのが私の意見です。第1章
【データが順番に流れるならストリームが最適】
無料のVivado HL WebPACK Edition 2016.4で高位合成にチャレンジしよう(AXI4ストリーム編)
AXI4ストリームによるラプラシアン・フィルタ処理回路の実装
第2章ではVivado HLS のAXI4 Master 編(FPGAマガジンNo.16)でアルゴリズムの通りに実装したラプラシアンフィルタ回路と最適化を行ったラプラシアンフィルタ回路の間の性能差は約 25 倍でしたが、約 80 倍の性能のAXI4 Master のラプラシアンフィルタ回路ができたので、ご紹介しました。もう原理的に限界の性能を出すことができました。記事のタイトルが誤解を招くかもしれませんが、これはAXI4-Stream インターフェースではなく、AXI4 インターフェースのMaster です。AXI4-Stream 版とい同じように書いたら最速になった、AXI4 インターフェースのMaster 版のラプラシアンフィルタ回路の話です。よろしくお願いします。第2章
【これ以上の高速化は難しい?!】
無料のVivado HL WebPACK Edition 2016.4で高位合成にチャレンジしよう(AXI4ストリーム編)
ついに80倍高速化!AXI4マスタ版ラプラシアン・フィルタの最適化
でGitHub からクローンした。git clone https://github.com/mnielsen/neural-networks-and-deep-learning.git
とタイプしてPython をインタプリタモードで起動した。python
import mnist_loader
training_data, validation_data, test_data = \
mnist_loader.load_data_wrapper()
import network
net = network.Network([784, 30, 10])
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
net = network.Network([784, 100, 10])
net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
net = network.Network([784, 100, 10])
net.SGD(training_data, 30, 10, 0.001, test_data=test_data)
net = network.Network([784, 100, 10])
net.SGD(training_data, 30, 10, 0.01, test_data=test_data)
net = network.Network([784, 100, 10])
net.SGD(training_data, 30, 10, 100, test_data=test_data)
ono@ono-VirtualBox:~/DNN$ cd DLNN_example/
ono@ono-VirtualBox:~/DNN/DLNN_example$ ls
README.md data fig requirements.txt src
ono@ono-VirtualBox:~/DNN/DLNN_example$ cd src
ono@ono-VirtualBox:~/DNN/DLNN_example/src$ python
Python 2.7.6 (default, Oct 26 2016, 20:30:19)
[GCC 4.8.4] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import mnist_loader
>>> import network
>>> net = network.Network([784, 30, 10])
>>> net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'training_data' is not defined
>>>
>>> q
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
NameError: name 'q' is not defined
>>> exit
Use exit() or Ctrl-D (i.e. EOF) to exit
>>>
ono@ono-VirtualBox:~/DNN/DLNN_example/src$ python
Python 2.7.6 (default, Oct 26 2016, 20:30:19)
[GCC 4.8.4] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> import mnist_loader
>>> training_data, validation_data, test_data = \
... mnist_loader.load_data_wrapper()
>>> import network
>>> net = network.Network([784, 30, 10])
>>> net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
Epoch 0: 7500 / 10000
Epoch 1: 8497 / 10000
Epoch 2: 8557 / 10000
Epoch 3: 8609 / 10000
Epoch 4: 8605 / 10000
Epoch 5: 8638 / 10000
Epoch 6: 8635 / 10000
Epoch 7: 8631 / 10000
Epoch 8: 8662 / 10000
Epoch 9: 8654 / 10000
Epoch 10: 8657 / 10000
Epoch 11: 8672 / 10000
Epoch 12: 8682 / 10000
Epoch 13: 8691 / 10000
Epoch 14: 8702 / 10000
Epoch 15: 8681 / 10000
Epoch 16: 8691 / 10000
Epoch 17: 8696 / 10000
Epoch 18: 8672 / 10000
Epoch 19: 8704 / 10000
Epoch 20: 8695 / 10000
Epoch 21: 8715 / 10000
Epoch 22: 8711 / 10000
Epoch 23: 8718 / 10000
Epoch 24: 8710 / 10000
Epoch 25: 8726 / 10000
Epoch 26: 8686 / 10000
Epoch 27: 8693 / 10000
Epoch 28: 8727 / 10000
Epoch 29: 8703 / 10000
>>> net = network.Network([784, 100, 10])
>>> net.SGD(training_data, 30, 10, 3.0, test_data=test_data)
Epoch 0: 8352 / 10000
Epoch 1: 8510 / 10000
Epoch 2: 8550 / 10000
Epoch 3: 8613 / 10000
Epoch 4: 8629 / 10000
Epoch 5: 8659 / 10000
Epoch 6: 8689 / 10000
Epoch 7: 8695 / 10000
Epoch 8: 8705 / 10000
Epoch 9: 8742 / 10000
Epoch 10: 9498 / 10000
Epoch 11: 9561 / 10000
Epoch 12: 9588 / 10000
Epoch 13: 9569 / 10000
Epoch 14: 9600 / 10000
Epoch 15: 9595 / 10000
Epoch 16: 9617 / 10000
Epoch 17: 9610 / 10000
Epoch 18: 9619 / 10000
Epoch 19: 9625 / 10000
Epoch 20: 9626 / 10000
Epoch 21: 9626 / 10000
Epoch 22: 9630 / 10000
Epoch 23: 9641 / 10000
Epoch 24: 9634 / 10000
Epoch 25: 9629 / 10000
Epoch 26: 9649 / 10000
Epoch 27: 9628 / 10000
Epoch 28: 9637 / 10000
Epoch 29: 9629 / 10000
>>> net = network.Network([784, 100, 10])
>>> net.SGD(training_data, 30, 10, 0.001, test_data=test_data)
Epoch 0: 1172 / 10000
Epoch 1: 1116 / 10000
Epoch 2: 1022 / 10000
Epoch 3: 1201 / 10000
Epoch 4: 1693 / 10000
Epoch 5: 1810 / 10000
Epoch 6: 1906 / 10000
Epoch 7: 1973 / 10000
Epoch 8: 2024 / 10000
Epoch 9: 2089 / 10000
Epoch 10: 2158 / 10000
Epoch 11: 2206 / 10000
Epoch 12: 2255 / 10000
Epoch 13: 2291 / 10000
Epoch 14: 2341 / 10000
Epoch 15: 2378 / 10000
Epoch 16: 2411 / 10000
Epoch 17: 2455 / 10000
Epoch 18: 2479 / 10000
Epoch 19: 2526 / 10000
Epoch 20: 2555 / 10000
Epoch 21: 2598 / 10000
Epoch 22: 2640 / 10000
Epoch 23: 2682 / 10000
Epoch 24: 2718 / 10000
Epoch 25: 2751 / 10000
Epoch 26: 2777 / 10000
Epoch 27: 2809 / 10000
Epoch 28: 2845 / 10000
Epoch 29: 2878 / 10000
>>> net = network.Network([784, 100, 10])
>>> net.SGD(training_data, 30, 10, 0.01, test_data=test_data)
Epoch 0: 1515 / 10000
Epoch 1: 1890 / 10000
Epoch 2: 2038 / 10000
Epoch 3: 2185 / 10000
Epoch 4: 2339 / 10000
Epoch 5: 2506 / 10000
Epoch 6: 2684 / 10000
Epoch 7: 2852 / 10000
Epoch 8: 2981 / 10000
Epoch 9: 3123 / 10000
Epoch 10: 3253 / 10000
Epoch 11: 3422 / 10000
Epoch 12: 3560 / 10000
Epoch 13: 3640 / 10000
Epoch 14: 3721 / 10000
Epoch 15: 3770 / 10000
Epoch 16: 3834 / 10000
Epoch 17: 3899 / 10000
Epoch 18: 3967 / 10000
Epoch 19: 4052 / 10000
Epoch 20: 4134 / 10000
Epoch 21: 4216 / 10000
Epoch 22: 4316 / 10000
Epoch 23: 4412 / 10000
Epoch 24: 4513 / 10000
Epoch 25: 4591 / 10000
Epoch 26: 4686 / 10000
Epoch 27: 4763 / 10000
Epoch 28: 4868 / 10000
Epoch 29: 4977 / 10000
>>> net = network.Network([784, 100, 10])
>>> net.SGD(training_data, 30, 10, 100, test_data=test_data)
Epoch 0: 894 / 10000
Epoch 1: 894 / 10000
Epoch 2: 894 / 10000
Epoch 3: 894 / 10000
Epoch 4: 894 / 10000
Epoch 5: 894 / 10000
Epoch 6: 894 / 10000
Epoch 7: 894 / 10000
Epoch 8: 894 / 10000
Epoch 9: 893 / 10000
Epoch 10: 893 / 10000
Epoch 11: 893 / 10000
Epoch 12: 893 / 10000
Epoch 13: 893 / 10000
Epoch 14: 893 / 10000
Epoch 15: 893 / 10000
Epoch 16: 893 / 10000
Epoch 17: 893 / 10000
Epoch 18: 893 / 10000
Epoch 19: 893 / 10000
Epoch 20: 892 / 10000
Epoch 21: 892 / 10000
Epoch 22: 892 / 10000
Epoch 23: 1262 / 10000
Epoch 24: 1262 / 10000
Epoch 25: 1262 / 10000
Epoch 26: 1262 / 10000
Epoch 27: 1262 / 10000
Epoch 28: 1262 / 10000
Epoch 29: 1262 / 10000
>>>
AXI Master VIP
AXI Slave VIP
AXI Pass-Through VIP
other traditional IP (modules in the static/physical world)
dynamic world
• User environment
• Master agent
• AXI master VIP
• Master write driver
• Master read driver
• Monitor
• Virtual interface
• User environment
• Slave agent without a memory model
• AXI slave VIP
• Slave write driver
• Slave read driver
• Monitor
• Virtual interface
・Vivado IP インテグーター
・ AXI 設計アシ ス タ ン ス:
° AXI Interconnect (古い) または AXI SmartConnect (新しい) か ら選択可能。
° 設計アシ ス タ ン スによ る自動化を AXI4-Stream インターフェイス、 CLK、 リ セ ッ ト に拡張。
• 検証 IP
° AXI Verification IP を導入
- SystemVerilog ベース、 ラ イセンス不要
- AXI3、 AXI4、 および AXI4-Lite をサポー ト 。
- すべてのプ ロ ト コル データ幅およびア ド レ ス幅、 転送タ イプ、 応答をサポー ト
- AXI プロ ト コル チェ ッ カーを完全にサポー ト
- 統合 ARM ラ イセンスのプロ ト コル アサーシ ョ ン
- パス スルー モー ド で合成を イネーブル (ワ イヤに合成)
- シ ミ ュレーシ ョ ン メ ッ セージを設定可能
- サンプル デザイ ンおよびテス ト ベンチを IP イ ンテグ レーターで提供
° Zynq-7000 VIP を導入 (上記の AXI VIP に基づ く )
- ラ イセン ス不要、 SystemVerilog ベース
- PS Configuration ウ ィ ザー ド の出力フ ァ イル と し て提供
- 既存の Zynq-7000 BFM 用 API と下位互換性あ り
Vivado デバッグ
• IP イ ンテグレーターでのシステム デバ ッ グで ラ ン タ イ ムの AXI ト ラ ンザ ク シ ョ ンの表示をサポー ト 。
° 波形に ト ラ ンザ ク シ ョ ン行を表示。
° IP イ ンテグ レーターか ら ビ ッ ト ス ト リ ーム生成のフ ローを改善。
- IP イ ンテグ レーターの設計アシ ス タ ン ス を向上。
- 合成後に [Debug] ウィンドウで System ILA を表示。
だそうです。やった~。。。これで私もパーシャル リコンフィ ギュ レーシ ョン 試せる。Xilinxさん太っ腹。。。System Edition および Design Edition でパーシャル リコンフィ ギュ レーシ ョン (PR) ラ イセン ス を追加コ ス ト な しで提供。
#!/bin/sh
# device tree overlay command (devtov)
# Be sure to run it as superuser
# $1 is device tree name
mkdir /config/device-tree/overlays/$1
cp $1.dtbo /config/device-tree/overlays/$1/dtbo
echo 1 > /config/device-tree/overlays/$1/status
sleep 0.1
chmod 666 /dev/uio*
ls -l /dev/uio*
if test -e /dev/udmabuf*; then
chmod 666 /dev/udmabuf*
ls -l /dev/udmabuf*
fi
if test -e /dev/fclk0; then
chmod 666 /dev/fclk*
ls -l /dev/fclk*
fi
/dts-v1/;
/ {
fragment@0 {
target-path = "/amba";
__overlay__ {
#address-cells = <0x1>;
#size-cells = <0x1>;
axi_iic_0@41600000 {
compatible = "generic-uio";
reg = <0x41600000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axi_vdma_0@43000000 {
compatible = "generic-uio";
reg = <0x43000000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axis_switch_0@43C10000 {
compatible = "generic-uio";
reg = <0x43C10000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axis_switch_1@43C20000 {
compatible = "generic-uio";
reg = <0x43C20000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
bitmap_disp_cntrler_axi_master_0@43C00000 {
compatible = "generic-uio";
reg = <0x43C00000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
fastx_corner_det_0@43C30000 {
compatible = "generic-uio";
reg = <0x43C30000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
lap_filter_axis_0@43C50000 {
compatible = "generic-uio";
reg = <0x43C50000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
mt9d111_inf_axis_0@43C40000 {
compatible = "generic-uio";
reg = <0x43C40000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
unsharp_mask_axis_0@43C60000 {
compatible = "generic-uio";
reg = <0x43C60000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
udmabuf4 {
compatible = "ikwzm,udmabuf-0.10.a";
minor-number = <4>;
size = <0x00600000>;
};
fclk0 {
compatible = "ikwzm,fclkcfg-0.10.a";
clocks = <1 15>;
};
fclk1 {
compatible = "ikwzm,fclkcfg-0.10.a";
clocks = <1 16>;
};
fclk2 {
compatible = "ikwzm,fclkcfg-0.10.a";
clocks = <1 17>;
};
};
};
};
// cam_disp.c
// 2017/04/08 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
#define NUMBER_OF_WRITE_FRAMES 3 // Note: If not at least 3 or more, the image is not displayed in succession.
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
#define PIXEL_NUM_OF_BYTES 4
#define ALL_DISP_ADDRESS (HORIZONTAL_PIXELS*VERTICAL_LINES*PIXEL_NUM_OF_BYTES)
#define FASTX_THRESHOLD 20
void cam_i2c_init(volatile unsigned *mt9d111_i2c_axi_lites) {
mt9d111_i2c_axi_lites[64] = 0x2; // reset tx fifo ,address is 0x100, i2c_control_reg
mt9d111_i2c_axi_lites[64] = 0x1; // enable i2c
}
void cam_i2x_write_sync(void) {
// unsigned c;
// c = *cam_i2c_rx_fifo;
// while ((c & 0x84) != 0x80)
// c = *cam_i2c_rx_fifo; // No Bus Busy and TX_FIFO_Empty = 1
usleep(1000);
}
void cam_i2c_write(volatile unsigned *mt9d111_i2c_axi_lites, unsigned int device_addr, unsigned int write_addr, unsigned int write_data){
mt9d111_i2c_axi_lites[66] = 0x100 | (device_addr & 0xfe); // Slave IIC Write Address, address is 0x108, i2c_tx_fifo
mt9d111_i2c_axi_lites[66] = write_addr;
mt9d111_i2c_axi_lites[66] = (write_data >> 8)|0xff; // first data
mt9d111_i2c_axi_lites[66] = 0x200 | (write_data & 0xff); // second data
cam_i2x_write_sync();
}
int main()
{
int fd0, fd1, fd2, fd3, fd4, fd5, fd6, fd7, fd8;
int fd_udmabuf, fd_paddr;
volatile unsigned int *axi_iic_0, *axi_vdma_0, *axis_switch_0, *axis_switch_1;
volatile unsigned int *bitmap_disp_cntrler_axim_0, *fastx_corner_det_0;
volatile unsigned int *mt9d111_inf_axis_0;
volatile unsigned int *frame_buffer;
unsigned char attr[1024];
unsigned long phys_addr;
// axi_iic_0 (uio0)
fd0 = open("/dev/uio0", O_RDWR); // axi_iic_0
if (fd0 < 1){
fprintf(stderr, "/dev/uio0 (axi_iic_0) open errorn");
exit(-1);
}
axi_iic_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0);
if (axi_iic_0 == MAP_FAILED){
fprintf(stderr, "axi_iic_0 mmap errorn");
exit(-1);
}
// axi_vdma_0 (uio1)
fd1 = open("/dev/uio1", O_RDWR); // axi_vdma_0
if (fd1 < 1){
fprintf(stderr, "/dev/uio1 (axi_vdma_0) open errorn");
exit(-1);
}
axi_vdma_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0);
if (axi_vdma_0 == MAP_FAILED){
fprintf(stderr, "axi_vdma_0 mmap errorn");
exit(-1);
}
// axis_switch_0 (uio2)
fd2 = open("/dev/uio2", O_RDWR); // axis_switch_0
if (fd2 < 1){
fprintf(stderr, "/dev/uio2 (axis_switch_0) open errorn");
exit(-1);
}
axis_switch_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd2, 0);
if (axis_switch_0 == MAP_FAILED){
fprintf(stderr, "axis_switch_0 mmap errorn");
exit(-1);
}
// axis_switch_1 (uio3)
fd3 = open("/dev/uio3", O_RDWR); // axis_switch_1
if (fd3 < 1){
fprintf(stderr, "/dev/uio3 (axis_switch_1) open errorn");
exit(-1);
}
axis_switch_1 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd3, 0);
if (axis_switch_1 == MAP_FAILED){
fprintf(stderr, "axis_switch_1 mmap errorn");
exit(-1);
}
// bitmap_disp_cntrler_axim_0 (uio4)
fd4 = open("/dev/uio4", O_RDWR); // bitmap_disp_cntrler_axim_0
if (fd4 < 1){
fprintf(stderr, "/dev/uio4 (bitmap_disp_cntrler_axim_0) open errorn");
exit(-1);
}
bitmap_disp_cntrler_axim_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd4, 0);
if (bitmap_disp_cntrler_axim_0 == MAP_FAILED){
fprintf(stderr, "bitmap_disp_cntrler_axim_0 mmap errorn");
exit(-1);
}
// mt9d111_inf_axis_0 (uio7)
fd7 = open("/dev/uio7", O_RDWR); // mt9d111_inf_axis_0
if (fd7 < 1){
fprintf(stderr, "/dev/uio7 (mt9d111_inf_axis_0) open errorn");
exit(-1);
}
mt9d111_inf_axis_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd7, 0);
if (mt9d111_inf_axis_0 == MAP_FAILED){
fprintf(stderr, "mt9d111_inf_axis_0 mmap errorn");
exit(-1);
}
// udmabuf4
fd_udmabuf = open("/dev/udmabuf4", O_RDWR | O_SYNC); // frame_buffer, The chache is disabled.
if (fd_udmabuf == -1){
fprintf(stderr, "/dev/udmabuf4 open errorn");
exit(-1);
}
frame_buffer = (volatile unsigned int *)mmap(NULL, (ALL_DISP_ADDRESS*3), PROT_READ|PROT_WRITE, MAP_SHARED, fd_udmabuf, 0);
if (frame_buffer == MAP_FAILED){
fprintf(stderr, "frame_buffer mmap errorn");
exit(-1);
}
// phys_addr of udmabuf4
fd_paddr = open("/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr", O_RDONLY);
if (fd_paddr == -1){
fprintf(stderr, "/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr open errorn");
exit(-1);
}
read(fd_paddr, attr, 1024);
sscanf(attr, "%lx", &phys_addr);
close(fd_paddr);
printf("phys_addr = %x\n", (unsigned int)phys_addr);
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_1[16] = 0x0; // 0x40 = 0
axis_switch_1[17] = 0x80000000; // 0x44 = 0x80000000, disable
axis_switch_1[18] = 0x80000000; // 0x48 = 0x80000000, disable
axis_switch_1[19] = 0x80000000; // 0x4C = 0x80000000, disable
axis_switch_1[0] = 0x2; // Comit registers
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_0[16] = 0x0; // 0x40 = 0;
axis_switch_0[0] = 0x2; // Comit registers
// AXI VDMA Initialization sequence (axi_vdma_0)
axi_vdma_0[12] = 0x4; // S2MM_VDMACR (S2MM VDMA Control Register Offset 30h) is 0x4
while ((axi_vdma_0[12] & 0x4) == 0x4) ; // Reset is progress
axi_vdma_0[12] = 0x4; // S2MM_VDMACR (S2MM VDMA Control Register Offset 30h) is 0x4
while ((axi_vdma_0[12] & 0x4) == 0x4) ; // Reset is progress
axi_vdma_0[18] = NUMBER_OF_WRITE_FRAMES; // S2MM_FRMSTORE (0x48) register
axi_vdma_0[12] = 0x00010002; // S2MM_VDMACR(IRQFrameCount = 0x1, Circular_Park = 1)
axi_vdma_0[41] = HORIZONTAL_PIXELS*PIXEL_NUM_OF_BYTES; // S2MM Horizontal Size Register(S2MM_HSIZE)0xc80 = 3200dec = 800 x 4
axi_vdma_0[42] = HORIZONTAL_PIXELS*PIXEL_NUM_OF_BYTES; // S2MM Frame Delay and Stride Register(S2MM_FRMDLY_STRIDE)0xc80 = 3200dec = 800 x 4
axi_vdma_0[43] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 1
axi_vdma_0[44] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 2
axi_vdma_0[45] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 3
axi_vdma_0[12] = 0x00010003; // S2MM_VDMACR(IRQFrameCount = 0x1, Circular_Park = 1, Run/stop = 1)
while((axi_vdma_0[13] & 0x1) == 0x1) ; // Halt? (S2MM_VDMASR 0x34)
axi_vdma_0[40] = VERTICAL_LINES; // S2MM Vertical Size (S2MM_VSIZE Offset 0xA0) 0x258 = 600dec
// CMOS Camera initialize, MT9D111
cam_i2c_init(axi_iic_0);
cam_i2c_write(axi_iic_0, 0xba, 0xf0, 0x1); // Changed regster map to IFP page 1
cam_i2c_write(axi_iic_0, 0xba, 0x97, 0x20); // RGB Mode, RGB565
mt9d111_inf_axis_0[1] = 0;
mt9d111_inf_axis_0[0] = (unsigned int)phys_addr;
bitmap_disp_cntrler_axim_0[0] = (unsigned int)phys_addr;
munmap((void *)axi_iic_0, 0x10000);
munmap((void *)axi_vdma_0, 0x10000);
munmap((void *)axis_switch_0, 0x10000);
munmap((void *)axis_switch_1, 0x10000);
munmap((void *)bitmap_disp_cntrler_axim_0, 0x10000);
munmap((void *)mt9d111_inf_axis_0, 0x10000);
munmap((void *)frame_buffer, (ALL_DISP_ADDRESS*3));
close(fd0);
close(fd1);
close(fd2);
close(fd3);
close(fd4);
close(fd7);
close(fd_udmabuf);
}
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x18 : Data signal of in_r
// bit 31~0 - in_r[31:0] (Read/Write)
// 0x1c : reserved
// 0x20 : Data signal of fb0_offset_addr
// bit 31~0 - fb0_offset_addr[31:0] (Read/Write)
// 0x24 : reserved
// 0x28 : Data signal of fb1_offset_addr
// bit 31~0 - fb1_offset_addr[31:0] (Read/Write)
// 0x2c : reserved
// 0x30 : Data signal of fb2_offset_addr
// bit 31~0 - fb2_offset_addr[31:0] (Read/Write)
// 0x34 : reserved
// 0x38 : Data signal of mode_V
// bit 0 - mode_V[0] (Read/Write)
// others - reserved
// 0x3c : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
// DMA_Read.h
// 2016/07/14 by marsee
// 2016/09/18 : BURST_LENGTH を追加
//
#ifndef __DMA_READ_H__
#define __DMA_READ_H__
//#define HORIZONTAL_PIXEL_WIDTH 800
//#define VERTICAL_PIXEL_WIDTH 600
#define HORIZONTAL_PIXEL_WIDTH 64
#define VERTICAL_PIXEL_WIDTH 48
#define ALL_PIXEL_VALUE (HORIZONTAL_PIXEL_WIDTH*VERTICAL_PIXEL_WIDTH)
#define MAX_FRAME_NUMBER 3
#define DMA_WRITE_MODE 0
#define FREE_RUN_MODE 1
#define MEMCPY_LENGTH (HORIZONTAL_PIXEL_WIDTH*4)
#endif
// DMA_Read_offset.cpp
// 2017/04/17 by marsee
//
// fb0_offset_addr, fb1_offset_addr, fb2_offset_addr には3つのフレームバッファのオフセット・アドレスを入れる
// mode = 0 : DMA Write IP の active_frame を見て、その1つ前のフレームをDMA Readするモード(DMA_WRITE_MODE)
// mode = 1 : フリーラン モード(FREE_RUN_MODE)
//
// カメラのフレームレートの方が小さい場合に、フリーランモードで3つフレームバッファが別々だと画像がぶれてしまう。
// その場合は、3つのフレームバッファ・アドレスを1つのアドレスを入れて、シングル・フレームバッファとする必要がある。
// 1フレーム分のDMAを行う
//
#include <stdio.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "DMA_Read.h"
int DMA_Read_offset(volatile int *in, hls::stream<ap_axis<32,1,1,1> >& outs,
unsigned int fb0_offset_addr, unsigned int fb1_offset_addr,
unsigned int fb2_offset_addr, ap_uint<2> & active_frame,
ap_uint<1> mode){
#pragma HLS INTERFACE s_axilite port=mode
#pragma HLS INTERFACE ap_none register port=active_frame
#pragma HLS INTERFACE s_axilite port=fb0_offset_addr
#pragma HLS INTERFACE s_axilite port=fb1_offset_addr
#pragma HLS INTERFACE s_axilite port=fb2_offset_addr
#pragma HLS INTERFACE m_axi depth=9216 port=in offset=slave
#pragma HLS INTERFACE axis port=outs
#pragma HLS INTERFACE s_axilite port=return
ap_axis<32,1,1,1> pix;
int dma_index;
static int n = 0;
if (mode == DMA_WRITE_MODE){
n = (int)active_frame;
}else{
n++;
if (n > 2)
n = 0;
}
switch (n){ // 1つ前のフレームバッファを読みだす
case 0 :
dma_index = fb2_offset_addr/sizeof(int);
break;
case 1 :
dma_index = fb0_offset_addr/sizeof(int);
break;
case 2 :
dma_index = fb1_offset_addr/sizeof(int);
break;
default :
dma_index = fb0_offset_addr/sizeof(int);
break;
}
for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
pix.data = in[dma_index+(y*HORIZONTAL_PIXEL_WIDTH)+x];
if (y==0 && x==0)
pix.user = 1;
else
pix.user = 0;
if (x == (HORIZONTAL_PIXEL_WIDTH-1))
pix.last = 1;
else
pix.last = 0;
outs << pix;
}
}
return 0;
}
// DMA_Read_offset_tb.cpp
// 2016/07/15 by marsee
// 2017/03/16 : 3フレーム処理から1フレーム処理に変更
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <ap_axi_sdata.h>
#include "DMA_Read.h"
#include "bmp_header.h"
int DMA_Read_offset(volatile int *in, hls::stream<ap_axis<32,1,1,1> >& outs,
unsigned int fb0_offset_addr, unsigned int fb1_offset_addr,
unsigned int fb2_offset_addr, ap_uint<2> & active_frame,
ap_uint<1> mode);
int main()
{
using namespace std;
hls::stream<ap_axis<32,1,1,1> > outs;
ap_axis<32,1,1,1> pix;
ap_axis<32,1,1,1> vals;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpw;
int *rd_bmp, *hw_lapd;
int blue, green, red;
ap_uint<2> active_frame = 0;
int *frame_buffer;
if ((fbmpr = fopen("test.bmp", "rb")) == NULL){ // test.bmp をオープン
//if ((fbmpr = fopen("road_1.bmp", "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open test.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(char), 2, fbmpr);
fread(&bmpfhr.bfSize, sizeof(long), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
int *buf;
if ((buf =(int *)malloc(MAX_FRAME_NUMBER * sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate buf memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// frame buffer をアロケートする、3倍の領域を取ってそれを3つに分ける
if ((frame_buffer =(int *)malloc(MAX_FRAME_NUMBER * sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate frame_buffer0 ~ 2\n");
exit(1);
}
// 3 つのフレームバッファにそれぞれ'A' を入力する(1つに変更)
memcpy(frame_buffer, rd_bmp, bmpihr.biHeight * bmpihr.biWidth * sizeof(int));
memcpy((int *)((unsigned int)frame_buffer + bmpihr.biHeight * bmpihr.biWidth * sizeof(int)),
rd_bmp, bmpihr.biHeight * bmpihr.biWidth * sizeof(int));
memcpy((int *)((unsigned int)frame_buffer + 2 * bmpihr.biHeight * bmpihr.biWidth * sizeof(int)),
rd_bmp, bmpihr.biHeight * bmpihr.biWidth * sizeof(int));
active_frame = 1; // fb0_offset_addr
DMA_Read_offset((volatile int *)frame_buffer, outs, (unsigned int)0,
(unsigned int)(bmpihr.biWidth * bmpihr.biHeight * sizeof(int)),
(unsigned int)(2 * (bmpihr.biWidth * bmpihr.biHeight) * sizeof(int)),
active_frame, DMA_WRITE_MODE);
active_frame = 2; // fb1_offset_addr
DMA_Read_offset((volatile int *)frame_buffer, outs, (unsigned int)0,
(unsigned int)(bmpihr.biWidth * bmpihr.biHeight * sizeof(int)),
(unsigned int)(2 * (bmpihr.biWidth * bmpihr.biHeight) * sizeof(int)),
active_frame, DMA_WRITE_MODE);
active_frame = 0; // fb2_offset_addr
DMA_Read_offset((volatile int *)frame_buffer, outs, (unsigned int)0,
(unsigned int)(bmpihr.biWidth * bmpihr.biHeight * sizeof(int)),
(unsigned int)(2 * (bmpihr.biWidth * bmpihr.biHeight) * sizeof(int)),
active_frame, DMA_WRITE_MODE);
// outs ストリームのデータを buf に入力する
for (int k=0; k<3; k++){
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
outs >> vals;
ap_int<32> val = vals.data;
buf[(k*bmpihr.biWidth*bmpihr.biHeight)+(j*bmpihr.biWidth)+i] = (int)val;
if (val != frame_buffer[(k*bmpihr.biWidth*bmpihr.biHeight)+(j*bmpihr.biWidth)+i]){
printf("ERROR HW and SW results mismatch k = %d, i = %d, j = %d, HW = %08x, SW = %08x\n", k, i, j, (int)val, (int)frame_buffer[(k*bmpihr.biWidth*bmpihr.biHeight)+(j*bmpihr.biWidth)+i]);
return(1);
}
}
}
}
// DMAされたデータをBMPフィルに書き込む
char output_file[] = "dma_result0.bmp";
for (int i=0; i<MAX_FRAME_NUMBER; i++){
switch (i){
case 0:
strcpy(output_file,"dma_result0.bmp");
break;
case 1:
strcpy(output_file,"dma_result1.bmp");
break;
case 2:
strcpy(output_file,"dma_result2.bmp");
break;
}
if ((fbmpw=fopen(output_file, "wb")) == NULL){
fprintf(stderr, "Can't open %s by binary write mode\n", output_file);
exit(1);
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
// RGB データの書き込み、逆順にする
int offset = i * bmpihr.biWidth * bmpihr.biHeight;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = buf[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = (buf[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] >> 8) & 0xff;
red = (buf[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]>>16) & 0xff;
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
}
free(rd_bmp);
free(frame_buffer);
return 0;
}
で、オプションにオフセット・アドレスを指定する offset がある。#pragma HLS INTERFACE m_axi port=<ポート名>
off : ベース・アドレス無し。アドレスは 0x00000000
direct : ベース・アドレスは32ビット幅のポートの値となる。
slave : ベース・アドレスを示すためにAXI4-Liteインターフェースに32ビット幅のレジスタが追加される。この場合は、ブロックレベルのインターフェースをAXI4-Liteにする必要がある。
(混同を避けるため、こちらの用語はベース・アドレスにした)
なのだが、指示子は以下のようになっている。int DMA_Write(hls::stream
>& ins, volatile int *out,
unsigned int frame_buffer0, unsigned int frame_buffer1,
unsigned int frame_buffer2, volatile ap_uint<2> & active_frame)
out ポートのAXI4マスタ・インターフェースのoffset オプションは off としている。つまり、このベース・アドレスは、0 で、frame_buffer0、frame_buffer1、frame_buffer2 を絶対アドレスとして設定して、それらのアドレスを指定してDMAすることになる。#pragma HLS INTERFACE ap_vld port=active_frame
#pragma HLS INTERFACE s_axilite port=frame_buffer0
#pragma HLS INTERFACE s_axilite port=frame_buffer1
#pragma HLS INTERFACE s_axilite port=frame_buffer2
#pragma HLS INTERFACE m_axi depth=5000000 port=out offset=off
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE s_axilite port=return
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x18 : Data signal of frame_buffer0
// bit 31~0 - frame_buffer0[31:0] (Read/Write)
// 0x1c : reserved
// 0x20 : Data signal of frame_buffer1
// bit 31~0 - frame_buffer1[31:0] (Read/Write)
// 0x24 : reserved
// 0x28 : Data signal of frame_buffer2
// bit 31~0 - frame_buffer2[31:0] (Read/Write)
// 0x2c : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
DMA_Write(ins, (volatile int *)0, (unsigned int)frame_buffer,
(unsigned int)frame_buffer+(bmpihr.biWidth * bmpihr.biHeight * sizeof(unsigned int)),
(unsigned int)frame_buffer+(2 * (bmpihr.biWidth * bmpihr.biHeight) * sizeof(unsigned int)),
active_frame);
DMA_Write(ins, (volatile int *)frame_buffer, (unsigned int)0,
(unsigned int)(bmpihr.biWidth * bmpihr.biHeight * sizeof(int)),
(unsigned int)(2 * (bmpihr.biWidth * bmpihr.biHeight) * sizeof(int)),
active_frame);
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x18 : Data signal of out_r
// bit 31~0 - out_r[31:0] (Read/Write)
// 0x1c : reserved
// 0x20 : Data signal of fb0_offset_addr
// bit 31~0 - fb0_offset_addr[31:0] (Read/Write)
// 0x24 : reserved
// 0x28 : Data signal of fb1_offset_addr
// bit 31~0 - fb1_offset_addr[31:0] (Read/Write)
// 0x2c : reserved
// 0x30 : Data signal of fb2_offset_addr
// bit 31~0 - fb2_offset_addr[31:0] (Read/Write)
// 0x34 : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
// DMA_Write.cpp
// 2016/07/10 by marsee
//
// frame_buffer0, frame_buffer1, frame_buffer2 には3つのフレームバッファのアドレスを入れる
// 2017/04/15 : m_axiプラグマのdepthを変更した
//
#include <stdio.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <ap_axi_sdata.h>
#include "DMA_Write.h"
int DMA_Write(hls::stream<ap_axis<32,1,1,1> >& ins, volatile int *out,
unsigned int frame_buffer0, unsigned int frame_buffer1,
unsigned int frame_buffer2, volatile ap_uint<2> & active_frame){
#pragma HLS INTERFACE ap_vld port=active_frame
#pragma HLS INTERFACE s_axilite port=frame_buffer0
#pragma HLS INTERFACE s_axilite port=frame_buffer1
#pragma HLS INTERFACE s_axilite port=frame_buffer2
#pragma HLS INTERFACE m_axi depth=9216 port=out offset=off
//#pragma HLS INTERFACE m_axi depth=30000000 port=out offset=off
#pragma HLS INTERFACE axis port=ins
#pragma HLS INTERFACE s_axilite port=return
ap_axis<32,1,1,1> pix;
int dma_index;
for (int i=0; i<MAX_FRAME_NUMBER; i++){
switch (i){
case 0 :
dma_index = frame_buffer0/sizeof(int);
break;
case 1 :
dma_index = frame_buffer1/sizeof(int);
break;
case 2 :
dma_index = frame_buffer2/sizeof(int);
break;
}
active_frame = i;
do { // user が 1になった時にフレームがスタートする
#pragma HLS LOOP_TRIPCOUNT min=1 max=1 avg=1
ins >> pix;
} while(pix.user == 0);
for (int y=0; y<VERTICAL_PIXEL_WIDTH; y++){
for (int x=0; x<HORIZONTAL_PIXEL_WIDTH; x++){
#pragma HLS PIPELINE II=1
if (!(x==0 && y==0)) // 最初の入力はすでに入力されている
ins >> pix; // AXI4-Stream からの入力
out[dma_index+(y*HORIZONTAL_PIXEL_WIDTH)+x] = pix.data;
}
}
}
return 0;
}
// DMA_Write.h
// 2016/07/10 by marsee
//
#ifndef __DMA_WRITE_H__
#define __DMA_WRITE_H__
//#define HORIZONTAL_PIXEL_WIDTH 800
//#define VERTICAL_PIXEL_WIDTH 600
#define HORIZONTAL_PIXEL_WIDTH 64
#define VERTICAL_PIXEL_WIDTH 48
#define ALL_PIXEL_VALUE (HORIZONTAL_PIXEL_WIDTH*VERTICAL_PIXEL_WIDTH)
#define MAX_FRAME_NUMBER 3
#endif
// DMA_Write_tb.cpp
// 2016/07/10 by marsee
//
// 2017/04/15 : DMA_Write()を呼び出すときの引数を変更した
//
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ap_int.h>
#include <hls_stream.h>
#include <iostream>
#include <fstream>
#include <ap_axi_sdata.h>
#include "DMA_Write.h"
#include "bmp_header.h"
int DMA_Write(hls::stream<ap_axis<32,1,1,1> >& ins, volatile int *out,
unsigned int frame_buffer0, unsigned int frame_buffer1,
unsigned int frame_buffer2, volatile ap_uint<2> & active_frame);
int main()
{
using namespace std;
hls::stream<ap_axis<32,1,1,1> > ins;
ap_axis<32,1,1,1> pix;
BITMAPFILEHEADER bmpfhr; // BMPファイルのファイルヘッダ(for Read)
BITMAPINFOHEADER bmpihr; // BMPファイルのINFOヘッダ(for Read)
FILE *fbmpr, *fbmpw;
int *rd_bmp, *hw_lapd;
int blue, green, red;
ap_uint<2> active_frame;
int *frame_buffer;
if ((fbmpr = fopen("test.bmp", "rb")) == NULL){ // test.bmp をオープン
fprintf(stderr, "Can't open test.bmp by binary read mode\n");
exit(1);
}
// bmpヘッダの読み出し
fread(&bmpfhr.bfType, sizeof(char), 2, fbmpr);
fread(&bmpfhr.bfSize, sizeof(long), 1, fbmpr);
fread(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpr);
fread(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpr);
fread(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpr);
// ピクセルを入れるメモリをアロケートする
if ((rd_bmp =(int *)malloc(sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate rd_bmp memory\n");
exit(1);
}
// rd_bmp にBMPのピクセルを代入。その際に、行を逆転する必要がある
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = fgetc(fbmpr);
green = fgetc(fbmpr);
red = fgetc(fbmpr);
rd_bmp[((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] = (blue & 0xff) | ((green & 0xff)<<8) | ((red & 0xff)<<16);
}
}
fclose(fbmpr);
// ins に入力データを用意する
for(int i=0; i<5; i++){ // dummy data
pix.user = 0;
pix.data = i;
ins << pix;
}
for(int k=0; k<MAX_FRAME_NUMBER; k++){
for(int j=0; j < bmpihr.biHeight; j++){
for(int i=0; i < bmpihr.biWidth; i++){
pix.data = (ap_int<32>)rd_bmp[(j*bmpihr.biWidth)+i];
if (j==0 && i==0) // 最初のデータの時に TUSER を 1 にする
pix.user = 1;
else
pix.user = 0;
if (i == bmpihr.biWidth-1) // 行の最後でTLASTをアサートする
pix.last = 1;
else
pix.last = 0;
ins << pix;
}
}
}
// frame buffer をアロケートする、3倍の領域を取ってそれを3つに分ける
if ((frame_buffer =(int *)malloc(MAX_FRAME_NUMBER * sizeof(int) * (bmpihr.biWidth * bmpihr.biHeight))) == NULL){
fprintf(stderr, "Can't allocate frame_buffer0 ~ 2\n");
exit(1);
}
DMA_Write(ins, (volatile int *)0, (unsigned int)frame_buffer,
(unsigned int)frame_buffer+(bmpihr.biWidth * bmpihr.biHeight * sizeof(unsigned int)),
(unsigned int)frame_buffer+(2 * (bmpihr.biWidth * bmpihr.biHeight) * sizeof(unsigned int)),
active_frame);
// DMAされたデータをBMPフィルに書き込む
char output_file[] = "dma_result0.bmp";
for (int i=0; i<MAX_FRAME_NUMBER; i++){
switch (i){
case 0:
strcpy(output_file,"dma_result0.bmp");
break;
case 1:
strcpy(output_file,"dma_result1.bmp");
break;
case 2:
strcpy(output_file,"dma_result2.bmp");
break;
}
if ((fbmpw=fopen(output_file, "wb")) == NULL){
fprintf(stderr, "Can't open %s by binary write mode\n", output_file);
exit(1);
}
// BMPファイルヘッダの書き込み
fwrite(&bmpfhr.bfType, sizeof(char), 2, fbmpw);
fwrite(&bmpfhr.bfSize, sizeof(long), 1, fbmpw);
fwrite(&bmpfhr.bfReserved1, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfReserved2, sizeof(short), 1, fbmpw);
fwrite(&bmpfhr.bfOffBits, sizeof(long), 1, fbmpw);
fwrite(&bmpihr, sizeof(BITMAPINFOHEADER), 1, fbmpw);
// RGB データの書き込み、逆順にする
int offset = i * bmpihr.biWidth * bmpihr.biHeight;
for (int y=0; y<bmpihr.biHeight; y++){
for (int x=0; x<bmpihr.biWidth; x++){
blue = frame_buffer[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] & 0xff;
green = (frame_buffer[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x] >> 8) & 0xff;
red = (frame_buffer[offset+((bmpihr.biHeight-1)-y)*bmpihr.biWidth+x]>>16) & 0xff;
fputc(blue, fbmpw);
fputc(green, fbmpw);
fputc(red, fbmpw);
}
}
fclose(fbmpw);
}
free(rd_bmp);
free(frame_buffer);
return 0;
}
//------------------------Address Info-------------------
// 0x00 : Control signals
// bit 0 - ap_start (Read/Write/COH)
// bit 1 - ap_done (Read/COR)
// bit 2 - ap_idle (Read)
// bit 3 - ap_ready (Read)
// bit 7 - auto_restart (Read/Write)
// others - reserved
// 0x04 : Global Interrupt Enable Register
// bit 0 - Global Interrupt Enable (Read/Write)
// others - reserved
// 0x08 : IP Interrupt Enable Register (Read/Write)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x0c : IP Interrupt Status Register (Read/TOW)
// bit 0 - Channel 0 (ap_done)
// bit 1 - Channel 1 (ap_ready)
// others - reserved
// 0x10 : Data signal of ap_return
// bit 31~0 - ap_return[31:0] (Read)
// 0x18 : Data signal of frame_buffer0
// bit 31~0 - frame_buffer0[31:0] (Read/Write)
// 0x1c : reserved
// 0x20 : Data signal of frame_buffer1
// bit 31~0 - frame_buffer1[31:0] (Read/Write)
// 0x24 : reserved
// 0x28 : Data signal of frame_buffer2
// bit 31~0 - frame_buffer2[31:0] (Read/Write)
// 0x2c : reserved
// (SC = Self Clear, COR = Clear on Read, TOW = Toggle on Write, COH = Clear on Handshake)
のdepth だけを 30000000 に変更します。#pragma HLS INTERFACE m_axi depth=9216 port=out offset=off
/dts-v1/;
/ {
fragment@0 {
target-path = "/amba";
__overlay__ {
fclk0 {
compatible = "ikwzm,fclkcfg-0.10.a";
clocks = <1 15>;
};
};
};
};
// cam_disp.c
// 2017/04/08 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
#define NUMBER_OF_WRITE_FRAMES 3 // Note: If not at least 3 or more, the image is not displayed in succession.
#define HORIZONTAL_PIXELS 800
#define VERTICAL_LINES 600
#define PIXEL_NUM_OF_BYTES 4
#define ALL_DISP_ADDRESS (HORIZONTAL_PIXELS*VERTICAL_LINES*PIXEL_NUM_OF_BYTES)
#define FASTX_THRESHOLD 20
void cam_i2c_init(volatile unsigned *mt9d111_i2c_axi_lites) {
mt9d111_i2c_axi_lites[64] = 0x2; // reset tx fifo ,address is 0x100, i2c_control_reg
mt9d111_i2c_axi_lites[64] = 0x1; // enable i2c
}
void cam_i2x_write_sync(void) {
// unsigned c;
// c = *cam_i2c_rx_fifo;
// while ((c & 0x84) != 0x80)
// c = *cam_i2c_rx_fifo; // No Bus Busy and TX_FIFO_Empty = 1
usleep(1000);
}
void cam_i2c_write(volatile unsigned *mt9d111_i2c_axi_lites, unsigned int device_addr, unsigned int write_addr, unsigned int write_data){
mt9d111_i2c_axi_lites[66] = 0x100 | (device_addr & 0xfe); // Slave IIC Write Address, address is 0x108, i2c_tx_fifo
mt9d111_i2c_axi_lites[66] = write_addr;
mt9d111_i2c_axi_lites[66] = (write_data >> 8)|0xff; // first data
mt9d111_i2c_axi_lites[66] = 0x200 | (write_data & 0xff); // second data
cam_i2x_write_sync();
}
int main()
{
int fd0, fd1, fd2, fd3, fd4, fd5, fd6, fd7, fd8;
int fd_udmabuf, fd_paddr;
volatile unsigned int *axi_iic_0, *axi_vdma_0, *axis_switch_0, *axis_switch_1;
volatile unsigned int *bitmap_disp_cntrler_axim_0, *fastx_corner_det_0;
volatile unsigned int *mt9d111_inf_axis_0;
volatile unsigned int *frame_buffer;
unsigned char attr[1024];
unsigned long phys_addr;
// axi_iic_0 (uio0)
fd0 = open("/dev/uio0", O_RDWR); // axi_iic_0
if (fd0 < 1){
fprintf(stderr, "/dev/uio0 (axi_iic_0) open errorn");
exit(-1);
}
axi_iic_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd0, 0);
if (axi_iic_0 == MAP_FAILED){
fprintf(stderr, "axi_iic_0 mmap errorn");
exit(-1);
}
// axi_vdma_0 (uio1)
fd1 = open("/dev/uio1", O_RDWR); // axi_vdma_0
if (fd1 < 1){
fprintf(stderr, "/dev/uio1 (axi_vdma_0) open errorn");
exit(-1);
}
axi_vdma_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd1, 0);
if (axi_vdma_0 == MAP_FAILED){
fprintf(stderr, "axi_vdma_0 mmap errorn");
exit(-1);
}
// axis_switch_0 (uio2)
fd2 = open("/dev/uio2", O_RDWR); // axis_switch_0
if (fd2 < 1){
fprintf(stderr, "/dev/uio2 (axis_switch_0) open errorn");
exit(-1);
}
axis_switch_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd2, 0);
if (axis_switch_0 == MAP_FAILED){
fprintf(stderr, "axis_switch_0 mmap errorn");
exit(-1);
}
// axis_switch_1 (uio3)
fd3 = open("/dev/uio3", O_RDWR); // axis_switch_1
if (fd3 < 1){
fprintf(stderr, "/dev/uio3 (axis_switch_1) open errorn");
exit(-1);
}
axis_switch_1 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd3, 0);
if (axis_switch_1 == MAP_FAILED){
fprintf(stderr, "axis_switch_1 mmap errorn");
exit(-1);
}
// bitmap_disp_cntrler_axim_0 (uio4)
fd4 = open("/dev/uio4", O_RDWR); // bitmap_disp_cntrler_axim_0
if (fd4 < 1){
fprintf(stderr, "/dev/uio4 (bitmap_disp_cntrler_axim_0) open errorn");
exit(-1);
}
bitmap_disp_cntrler_axim_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd4, 0);
if (bitmap_disp_cntrler_axim_0 == MAP_FAILED){
fprintf(stderr, "bitmap_disp_cntrler_axim_0 mmap errorn");
exit(-1);
}
// mt9d111_inf_axis_0 (uio7)
fd7 = open("/dev/uio7", O_RDWR); // mt9d111_inf_axis_0
if (fd7 < 1){
fprintf(stderr, "/dev/uio7 (mt9d111_inf_axis_0) open errorn");
exit(-1);
}
mt9d111_inf_axis_0 = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd7, 0);
if (mt9d111_inf_axis_0 == MAP_FAILED){
fprintf(stderr, "mt9d111_inf_axis_0 mmap errorn");
exit(-1);
}
// udmabuf4
fd_udmabuf = open("/dev/udmabuf4", O_RDWR | O_SYNC); // frame_buffer, The chache is disabled.
if (fd_udmabuf == -1){
fprintf(stderr, "/dev/udmabuf4 open errorn");
exit(-1);
}
frame_buffer = (volatile unsigned int *)mmap(NULL, (ALL_DISP_ADDRESS*3), PROT_READ|PROT_WRITE, MAP_SHARED, fd_udmabuf, 0);
if (frame_buffer == MAP_FAILED){
fprintf(stderr, "frame_buffer mmap errorn");
exit(-1);
}
// phys_addr of udmabuf4
fd_paddr = open("/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr", O_RDONLY);
if (fd_paddr == -1){
fprintf(stderr, "/sys/devices/soc0/amba/amba:udmabuf4/udmabuf/udmabuf4/phys_addr open errorn");
exit(-1);
}
read(fd_paddr, attr, 1024);
sscanf(attr, "%lx", &phys_addr);
close(fd_paddr);
printf("phys_addr = %x\n", (unsigned int)phys_addr);
// axis_switch_1, 1to2 ,Select M00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_1[16] = 0x0; // 0x40 = 0
axis_switch_1[17] = 0x80000000; // 0x44 = 0x80000000, disable
axis_switch_1[18] = 0x80000000; // 0x48 = 0x80000000, disable
axis_switch_1[19] = 0x80000000; // 0x4C = 0x80000000, disable
axis_switch_1[0] = 0x2; // Comit registers
printf("axis_swtich_1[17] = %x\n", axis_switch_1[17]);
// axis_switch_0, 2to1, Select S00_AXIS
// Refer to http://marsee101.blog19.fc2.com/blog-entry-3177.html
axis_switch_0[16] = 0x0; // 0x40 = 0;
axis_switch_0[0] = 0x2; // Comit registers
printf("axis_swtich_0[16] = %x\n", axis_switch_0[16]);
// AXI VDMA Initialization sequence (axi_vdma_0)
axi_vdma_0[12] = 0x4; // S2MM_VDMACR (S2MM VDMA Control Register Offset 30h) is 0x4
while ((axi_vdma_0[12] & 0x4) == 0x4) ; // Reset is progress
printf("axi_vdma_0[12] = %x\n", axi_vdma_0[12]);
axi_vdma_0[12] = 0x4; // S2MM_VDMACR (S2MM VDMA Control Register Offset 30h) is 0x4
while ((axi_vdma_0[12] & 0x4) == 0x4) ; // Reset is progress
axi_vdma_0[18] = NUMBER_OF_WRITE_FRAMES; // S2MM_FRMSTORE (0x48) register
axi_vdma_0[12] = 0x00010002; // S2MM_VDMACR(IRQFrameCount = 0x1, Circular_Park = 1)
axi_vdma_0[41] = HORIZONTAL_PIXELS*PIXEL_NUM_OF_BYTES; // S2MM Horizontal Size RegisterS2MM_HSIZE0xc80 = 3200dec = 800 x 4
axi_vdma_0[42] = HORIZONTAL_PIXELS*PIXEL_NUM_OF_BYTES; // S2MM Frame Delay and Stride RegisterS2MM_FRMDLY_STRIDE0xc80 = 3200dec = 800 x 4
axi_vdma_0[43] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 1
axi_vdma_0[44] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 2
axi_vdma_0[45] = (unsigned)phys_addr; // S2MM Start Address (1 to 16) Start Address 3
axi_vdma_0[12] = 0x00010003; // S2MM_VDMACR(IRQFrameCount = 0x1, Circular_Park = 1, Run/stop = 1)
while((axi_vdma_0[13] & 0x1) == 0x1) ; // Halt? (S2MM_VDMASR 0x34)
axi_vdma_0[40] = VERTICAL_LINES; // S2MM Vertical Size (S2MM_VSIZE Offset 0xA0) 0x258 = 600dec
// CMOS Camera initialize, MT9D111
cam_i2c_init(axi_iic_0);
cam_i2c_write(axi_iic_0, 0xba, 0xf0, 0x1); // Changed regster map to IFP page 1
cam_i2c_write(axi_iic_0, 0xba, 0x97, 0x20); // RGB Mode, RGB565
mt9d111_inf_axis_0[1] = 0;
mt9d111_inf_axis_0[0] = (unsigned int)phys_addr;
printf("mt9d111_inf_axis_0[0] = %x\n", mt9d111_inf_axis_0[0]);
bitmap_disp_cntrler_axim_0[0] = (unsigned int)phys_addr;
printf("bitmap_disp_cntrler_axim_0[0] = %x\n", bitmap_disp_cntrler_axim_0[0]);
printf("frame_buffer[0] = %x\n", frame_buffer[0]);
printf("frame_buffer[1] = %x\n", frame_buffer[1]);
munmap((void *)axi_iic_0, 0x10000);
munmap((void *)axi_vdma_0, 0x10000);
munmap((void *)axis_switch_0, 0x10000);
munmap((void *)axis_switch_1, 0x10000);
munmap((void *)bitmap_disp_cntrler_axim_0, 0x10000);
munmap((void *)mt9d111_inf_axis_0, 0x10000);
munmap((void *)frame_buffer, (ALL_DISP_ADDRESS*3));
close(fd0);
close(fd1);
close(fd2);
close(fd3);
close(fd4);
close(fd7);
close(fd_udmabuf);
}
で「Vivado HLS で DMA Read IP を作る2(絶対アドレス指定版)」をIP として入れたVivado プロジェクトを論理シミュレーションすると動作するが、 Post-Synthesis Functional Simulation すると動作しない。しかし、Project Settings のTarget language を VHDL にする(DMA Read IPはVerilog HDL で IP化しているのだが)と Post-Synthesis Functional Simulation が動作するという現象があった。それを ikwzm さんが vivado-hls-axi-dma-read-failure でテストケースを作ってくれたのだが、Linux にVivado 2016.4 の環境が無かったためやっていなかった。今回は、環境を構築したためやってみた。「Vivado HLS で DMA Read IP を作る2(絶対アドレス指定版)」を使って合成後の機能シミュレーション
「Vivado HLS で DMA Read IP を作る2(絶対アドレス指定版)」を使って合成後の機能シミュレーション2
「Vivado HLS で DMA Read IP を作る2(絶対アドレス指定版)」を使って合成後の機能シミュレーション3
/dts-v1/;
/ {
fragment@0 {
target-path = "/amba";
__overlay__ {
#address-cells = <0x1>;
#size-cells = <0x1>;
axi_iic_0@41600000 {
compatible = "generic-uio";
reg = <0x41600000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axi_vdma_0@43000000 {
compatible = "generic-uio";
reg = <0x43000000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axis_switch_0@43C10000 {
compatible = "generic-uio";
reg = <0x43C10000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
axis_switch_1@43C20000 {
compatible = "generic-uio";
reg = <0x43C20000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
bitmap_disp_cntrler_axi_master_0@43C00000 {
compatible = "generic-uio";
reg = <0x43C00000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
fastx_corner_det_0@43C30000 {
compatible = "generic-uio";
reg = <0x43C30000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
lap_filter_axis_0@43C50000 {
compatible = "generic-uio";
reg = <0x43C50000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
mt9d111_inf_axis_0@43C40000 {
compatible = "generic-uio";
reg = <0x43C40000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
unsharp_mask_axis_0@43C60000 {
compatible = "generic-uio";
reg = <0x43C60000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
udmabuf4 {
compatible = "ikwzm,udmabuf-0.10.a";
minor-number = <4>;
size = <0x00600000>;
};
};
};
};
#!/bin/sh
# device tree overlay command (devtov)
# Be sure to run it as superuser
# $1 is device tree name
mkdir /config/device-tree/overlays/$1
cp $1.dtbo /config/device-tree/overlays/$1/dtbo
echo 1 > /config/device-tree/overlays/$1/status
sleep 0.1
chmod 666 /dev/uio*
ls -l /dev/uio*
if test -e /dev/udmabuf*; then
chmod 666 /dev/udmabuf*
ls -l /dev/udmabuf*
fi
#!/bin/sh
# Remove device tree overlay command (rmdevtov)
# Be sure to run it as superuser
# $1 is device tree name
rmdir /config/device-tree/overlays/$1
#!/bin/sh
# fpga manager command (fpgamag)
# Be sure to run it as superuser
# $1 is bit file name
echo 1 > /sys/class/fpgacfg/fpgacfg0/data_format
echo 1 > /sys/class/fpgacfg/fpgacfg0/load_start
cp $1 /dev/fpgacfg0
// pynq_led_test.c
// 2017/04/06 by marsee
//
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <assert.h>
#include <sys/mman.h>
#include <fcntl.h>
int main() {
int fd;
volatile unsigned int *led_gpio;
fd = open("/dev/uio0", O_RDWR);
if (fd < 1){
fprintf(stderr, "/dev/uio0 open error\n");
exit(-1);
}
led_gpio = (volatile unsigned int *)mmap(NULL, 0x10000, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
if (led_gpio == MAP_FAILED){
fprintf(stderr, "mmap error\n");
exit(-1);
}
unsigned int i = 0;
while(i<17){
led_gpio[0] = i++;
sleep(1);
}
munmap((void *)led_gpio, 0x10000);
return(0);
}
/dts-v1/;
/ {
fragment@0 {
target-path = "/amba";
__overlay__ {
#address-cells = <0x1>;
#size-cells = <0x1>;
uio_gpio_0@41200000 {
compatible = "generic-uio";
reg = <0x41200000 0x10000>;
#interrupts = <0x0 0x1d 0x4>;
};
};
};
};
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | - | - | 1 |
2 | 3 | 4 | 5 | 6 | 7 | 8 |
9 | 10 | 11 | 12 | 13 | 14 | 15 |
16 | 17 | 18 | 19 | 20 | 21 | 22 |
23 | 24 | 25 | 26 | 27 | 28 | 29 |
30 | - | - | - | - | - | - |