// square_host.cpp
// 2019/12/11 by marsee
//
// Vitis-Tutorials/docs/mixing-c-rtl-kernels/reference-files/src/host/host_step1.cpp のコードを引用します
// https://github.com/Xilinx/Vitis-Tutorials/blob/master/docs/mixing-c-rtl-kernels/reference-files/src/host/host_step1.cpp
#define CL_HPP_CL_1_2_DEFAULT_BUILD
#define CL_HPP_TARGET_OPENCL_VERSION 120
#define CL_HPP_MINIMUM_OPENCL_VERSION 120
#define CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY 1
#define CL_USE_DEPRECATED_OPENCL_1_2_APIS
#include <vector>
#include <CL/cl2.hpp>
#include <iostream>
#include <fstream>
#include <CL/cl_ext_xilinx.h>
#include <unistd.h>
#include <limits.h>
#include <sys/stat.h>
static const std::string error_message =
"Error: Result mismatch:\n"
"i = %d CPU result = %d Device result = %d\n";
//Some Library functions to be used.
template <typename T>
struct aligned_allocator
{
using value_type = T;
T* allocate(std::size_t num)
{
void* ptr = nullptr;
if (posix_memalign(&ptr,4096,num*sizeof(T)))
throw std::bad_alloc();
return reinterpret_cast<T*>(ptr);
}
void deallocate(T* p, std::size_t num)
{
free(p);
}
};
#define OCL_CHECK(error,call) \
call; \
if (error != CL_SUCCESS) { \
printf("%s:%d Error calling " #call ", error code is: %d\n", \
__FILE__,__LINE__, error); \
exit(EXIT_FAILURE); \
}
namespace xcl {
std::vector<cl::Device> get_devices(const std::string& vendor_name) {
size_t i;
cl_int err;
std::vector<cl::Platform> platforms;
OCL_CHECK(err, err = cl::Platform::get(&platforms));
cl::Platform platform;
for (i = 0 ; i < platforms.size(); i++){
platform = platforms[i];
OCL_CHECK(err, std::string platformName = platform.getInfo<CL_PLATFORM_NAME>(&err));
if (platformName == vendor_name){
std::cout << "Found Platform" << std::endl;
std::cout << "Platform Name: " << platformName.c_str() << std::endl;
break;
}
}
if (i == platforms.size()) {
std::cout << "Error: Failed to find Xilinx platform" << std::endl;
exit(EXIT_FAILURE);
}
//Getting ACCELERATOR Devices and selecting 1st such device
std::vector<cl::Device> devices;
OCL_CHECK(err, err = platform.getDevices(CL_DEVICE_TYPE_ACCELERATOR, &devices));
return devices;
}
std::vector<cl::Device> get_xil_devices() {
return get_devices("Xilinx");
}
char* read_binary_file(const std::string &xclbin_file_name, unsigned &nb)
{
std::cout << "INFO: Reading " << xclbin_file_name << std::endl;
if(access(xclbin_file_name.c_str(), R_OK) != 0) {
printf("ERROR: %s xclbin not available please build\n", xclbin_file_name.c_str());
exit(EXIT_FAILURE);
}
//Loading XCL Bin into char buffer
std::cout << "Loading: '" << xclbin_file_name.c_str() << "'\n";
std::ifstream bin_file(xclbin_file_name.c_str(), std::ifstream::binary);
bin_file.seekg (0, bin_file.end);
nb = bin_file.tellg();
bin_file.seekg (0, bin_file.beg);
char *buf = new char [nb];
bin_file.read(buf, nb);
return buf;
}
};
// Vitis-Tutorials/docs/mixing-c-rtl-kernels/reference-files/src/host/host_step1.cpp のコードを引用終了
#define DATA_SIZE 10
// Vitis-Tutorials/docs/mixing-c-rtl-kernels/reference-files/src/host/host_step1.cpp のコードを自分用に変更して引用します
int main(int argc, char* argv[])
{
const char* xclbinFilename;
if (argc==2) {
xclbinFilename = argv[1];
std::cout <<"Using FPGA binary file specfied through the command line: " << xclbinFilename << std::endl;
}
else {
xclbinFilename = "../lap_filter_axim.xclbin";
std::cout << "No FPGA binary file specified through the command line, using:" << xclbinFilename <<std::endl;
}
std::vector<int,aligned_allocator<int>> in_data(DATA_SIZE);
std::vector<int,aligned_allocator<int>> square_data(DATA_SIZE);
size_t size_in_bytes = (DATA_SIZE) * sizeof(int);
// input data
for(int i=0; i<DATA_SIZE; i++){
in_data[i] = i;
square_data[i] = 0;
}
std::vector<cl::Device> devices = xcl::get_xil_devices();
cl::Device device = devices[0];
devices.resize(1);
// Creating Context and Command Queue for selected device
cl::Context context(device);
cl::CommandQueue q(context, device, CL_QUEUE_PROFILING_ENABLE);
// Load xclbin
std::cout << "Loading: '" << xclbinFilename << "'\n";
std::ifstream bin_file(xclbinFilename, std::ifstream::binary);
bin_file.seekg (0, bin_file.end);
unsigned nb = bin_file.tellg();
bin_file.seekg (0, bin_file.beg);
char *buf = new char [nb];
bin_file.read(buf, nb);
// Creating Program from Binary File
cl::Program::Binaries bins;
bins.push_back({buf,nb});
cl::Program program(context, devices, bins);
// This call will get the kernel object from program. A kernel is an
// OpenCL function that is executed on the FPGA.
cl::Kernel krnl_square(program,"square");
// These commands will allocate memory on the Device. The cl::Buffer objects can
// be used to reference the memory locations on the device.
cl::Buffer ind_buf(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_ONLY,
size_in_bytes, in_data.data());
cl::Buffer squared_buf(context, CL_MEM_USE_HOST_PTR | CL_MEM_READ_WRITE,
size_in_bytes, square_data.data());
// Data will be transferred from system memory over PCIe to the FPGA on-board
// DDR memory.
q.enqueueMigrateMemObjects({ind_buf},0/* 0 means from host*/);
//set the kernel Arguments
krnl_square.setArg(0,ind_buf);
krnl_square.setArg(1,squared_buf);
krnl_square.setArg(2,DATA_SIZE);
//Launch the Kernel
q.enqueueTask(krnl_square);
// The result of the previous kernel execution will need to be retrieved in
// order to view the results. This call will transfer the data from FPGA to
// source_results vector
q.enqueueMigrateMemObjects({squared_buf},CL_MIGRATE_MEM_OBJECT_HOST);
q.finish();
// Compare the results
int error = 0;
for(int i=0; i<DATA_SIZE; i++){
if(square_data[i] != i*i){
std::cout << "Error: i = " << i << " i^2 = " << i*i << " square_data = " << square_data[i] << std::endl;
error = 1;
}else{
//std::cout << "i = " << i << " i^2 = " << i*i << " square_data = " << square_data[i] << std::endl;
}
}
std::cout << "TEST " << (error ? "FAILED" : "PASSED") << std::endl;
return (error ? EXIT_FAILURE : EXIT_SUCCESS);
}
// square.cpp
// 2019/12/11 by marsee
//
// Vitis_Accel_Examples/cpp_kernels/dataflow_stream/src/adder.cpp を参考にしています
// https://github.com/Xilinx/Vitis_Accel_Examples/blob/master/cpp_kernels/dataflow_stream/src/adder.cpp
#include <hls_stream.h>
static void read_dma(int *inm, hls::stream<int> &outs, int size){
LOOP_RDMA: for(int i=0; i<size; i++){
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=10 max=10
outs.write(inm[i]);
}
}
static void square_stream(hls::stream<int> &ins, hls::stream<int> &outs, int size){
LOOP_SQAURE_ST: for(int i=0; i<size; i++){
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=10 max=10
int tmp = ins.read();
outs.write(tmp * tmp);
}
}
static void write_dma(hls::stream<int> &ins, int *outm, int size){
LOOP_WDMA: for(int i=0; i<size; i++){
#pragma HLS PIPELINE II=1
#pragma HLS LOOP_TRIPCOUNT min=10 max=10
outm[i] = ins.read();
}
}
extern "C" {
void square(int *inm, int *outm, int size){
#pragma HLS INTERFACE m_axi port = inm offset = slave bundle = gmem
#pragma HLS INTERFACE m_axi port = outm offset = slave bundle = gmem
#pragma HLS INTERFACE s_axilite port = size bundle = control
#pragma HLS INTERFACE s_axilite port = return bundle = control
static hls::stream<int> ins;
static hls::stream<int> outs;
#pragma HLS STREAM variable = ins depth = 32
#pragma HLS STREAM variable = outs depth = 32
#pragma HLS dataflow
read_dma(inm, ins, size);
square_stream(ins, outs, size);
write_dma(outs, outm, size);
}
}
日 | 月 | 火 | 水 | 木 | 金 | 土 |
---|---|---|---|---|---|---|
- | - | - | - | - | 1 | 2 |
3 | 4 | 5 | 6 | 7 | 8 | 9 |
10 | 11 | 12 | 13 | 14 | 15 | 16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 |
24 | 25 | 26 | 27 | 28 | 29 | 30 |