Weitere ähnliche Inhalte Ähnlich wie Altera SDK for OpenCL解体新書 : ホストとデバイスの関係 (20) Mehr von Mr. Vengineer (20) Altera SDK for OpenCL解体新書 : ホストとデバイスの関係3. Altera SDK for OpenCLでのBSPとは?
CPU
ホスト側の
メモリ
Kernel−A
Kernel−B
Kernel−C
DMA
PCIe PCIe
DMABSPに相当する部分
デバイス側の
メモリ
MIF
6. デバイス側のOpenCLコード
const char *kernelSource =
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
_kernel void vecAdd(__global double *a, __global double *b,
__global double *c, const unsigned int n)
{
//Get our global thread ID
int id = get_global_id(0);
//Make sure we do not go out of bounds
if (id < n)
c[id] = a[id] + b[id];
}
7. ホスト側のCコード(メモリの割当)
h_a = (double*)malloc(bytes);
h_b = (double*)malloc(bytes);
h_c = (double*)malloc(bytes);
d_a = clCreateBuffer(context, CL_MEM_READ_ONLY , bytes, NULL, NULL);
d_b = clCreateBuffer(context, CL_MEM_READ_ONLY , bytes, NULL, NULL);
d_c = clCreateBuffer(context, CL_MEM_WRITE_ONLY , bytes, NULL, NULL);
8. ホスト側のCコード(実行部)
err = clEnqueueWriteBuffer(queue, d_a, CL_TRUE, 0, bytes, h_a, 0, NULL,
NULL);
err |= clEnqueueWriteBuffer(queue, d_b, CL_TRUE, 0, bytes, h_b, 0, NULL,
NULL);
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize,
&localSize,
0, NULL,
NULL);
clFinish(queue);
18. ホスト側のCコード(メモリの割当)
cl_mem d_a = clCreateBuffer(context,CL_MEM_ALLOC_HOST_PTR, bytes, NULL, NULL);
cl_mem d_b = clCreateBuffer(context,CL_MEM_ALLOC_HOST_PTR, bytes, NULL, NULL);
cl_mem d_c = clCreateBuffer(context,CL_MEM_ALLOC_HOST_PTR, bytes, NULL, NULL);
// clEnqueuWriteBuffer/clEnqueueReadBufferの代わりに
clEnqueueMapBufferが必要(ホスト側のメモリをマップする)
double *h_a = (double*)clEnqueueMapBuffer (queue, d_a, CL_TRUE, CL_MAP_READ,
0, bytes, 0, NULL, NULL );
double *h_b = (double*)clEnqueueMapBuffer (queue, d_b, CL_TRUE, CL_MAP_READ,
0, bytes, 0, NULL, NULL );
double *h_c = (double*)clEnqueueMapBuffer (queue, d_c, CL_TRUE, CL_MAP_WRITE,
0, bytes, 0, NULL, NULL );
19. ホスト側のCコード(実行部)
err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &d_a);
err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &d_b);
err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &d_c);
err |= clSetKernelArg(kernel, 3, sizeof(unsigned int), &n);
err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &globalSize,
&localSize,
0, NULL,
NULL);
clFinish(queue);
20. ホスト側のCコード(メモリの開放)
// clEnqueueUnMapMemObjectが必要
clEnqueueUnmapMemObject(queue, d_a, h_a, 0, NULL, NULL );
clEnqueueUnmapMemObject(queue, d_b, h_b, 0, NULL, NULL );
clEnqueueUnmapMemObject(queue, d_c, h_c, 0, NULL, NULL );
clReleaseMemObject(d_a);
clReleaseMemObject(d_b);
clReleaseMemObject(d_c);
// ホスト側のメモリ開放は必要なし