374 lines
9 KiB
C
374 lines
9 KiB
C
|
#include <stdlib.h>
|
||
|
#include <stdio.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include "logging.h"
|
||
|
|
||
|
#include "cl_setup.h"
|
||
|
#define CL_DEBUG
|
||
|
|
||
|
cl_int
|
||
|
cl_init(struct cl_uctx *puctx)
|
||
|
{
|
||
|
/* OpenCL specific variables */
|
||
|
size_t dataBytes;
|
||
|
|
||
|
cl_int result = CL_SUCCESS;
|
||
|
cl_uint nplat = 0, ndevices = 0, i = 0, matched_plat;
|
||
|
cl_platform_id *platforms = NULL, platform = NULL;
|
||
|
cl_device_id *devices = NULL;
|
||
|
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
|
||
|
cl_context ctx = NULL;
|
||
|
|
||
|
|
||
|
if (puctx == NULL || puctx->platform_name == NULL) {
|
||
|
return CL_INVALID_VALUE;
|
||
|
}
|
||
|
if (puctx->device_type > CL_DEVICE_TYPE_GPU) {
|
||
|
puctx->device_type = CL_DEVICE_TYPE_ALL;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Initialize OpenCL.
|
||
|
*/
|
||
|
result = clGetPlatformIDs(0, NULL, &nplat);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "Failed getting the number of platforms");
|
||
|
goto err;
|
||
|
}
|
||
|
if (nplat < 0) {
|
||
|
log_warn("cl", "No platforms found");
|
||
|
goto err;
|
||
|
}
|
||
|
platforms = calloc(nplat, sizeof platforms[0]);
|
||
|
if (platforms == NULL) {
|
||
|
log_warn("cl", "Failed to allocate platforms");
|
||
|
goto err;
|
||
|
}
|
||
|
result = clGetPlatformIDs(nplat, platforms, NULL);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "Failed fetching the platforms");
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
log_debug("cl", "Found %d platforms", nplat);
|
||
|
for (i = 0; i < nplat; i++) {
|
||
|
char platname[100];
|
||
|
result = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
|
||
|
sizeof(platname), platname, NULL);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "Failed fetching platform info");
|
||
|
goto err;
|
||
|
}
|
||
|
log_debug("cl", "Checking %s == %s",
|
||
|
puctx->platform_name, platname);
|
||
|
if (!strcmp(platname, puctx->platform_name)) {
|
||
|
platform = platforms[i];
|
||
|
matched_plat = i;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
if (platform == NULL) {
|
||
|
log_warn("cl", "No matching platform found");
|
||
|
result = CL_DEVICE_NOT_FOUND;
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
#ifdef CL_DEBUG
|
||
|
printf("-----------------------------------------------------------\n");
|
||
|
printf(" PLATFORM INFORMATION (the number of platforms = %d) \n",
|
||
|
nplat);
|
||
|
for(i = 0 ; i < nplat; i++)
|
||
|
{
|
||
|
char *long_str;
|
||
|
char str[1024];
|
||
|
size_t str_size;
|
||
|
|
||
|
printf("-------------------------------------------------------"
|
||
|
"----\n");
|
||
|
printf( " PLATFORM ID : %d "
|
||
|
"\n", i);
|
||
|
printf("-------------------------------------------------------"
|
||
|
"----\n");
|
||
|
|
||
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str),
|
||
|
str, NULL);
|
||
|
printf("Platform name : %s\n", str);
|
||
|
|
||
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION,
|
||
|
sizeof(str), str, NULL);
|
||
|
printf("Platform version : %s\n", str);
|
||
|
|
||
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0,
|
||
|
NULL, &str_size);
|
||
|
long_str = (char *)malloc(str_size);
|
||
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS,
|
||
|
str_size, long_str, NULL);
|
||
|
printf("Platform extensions : %s\n", long_str);
|
||
|
|
||
|
printf("-------------------------------------------------------"
|
||
|
"----\n\n");
|
||
|
|
||
|
free(long_str);
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
props[1] = (cl_context_properties)platform;
|
||
|
|
||
|
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_ALL, NULL, NULL,
|
||
|
&result);
|
||
|
if(result != CL_SUCCESS) {
|
||
|
log_warn("cl", "Failed to create context");
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
/* fetch the list of devices associated with context */
|
||
|
result = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL,
|
||
|
&dataBytes);
|
||
|
if(result != CL_SUCCESS) {
|
||
|
log_warn("cl", "Failed to fetch devices size!");
|
||
|
goto err;
|
||
|
}
|
||
|
devices = (cl_device_id *)malloc(dataBytes);
|
||
|
if (devices == NULL) {
|
||
|
log_warn("cl", "devices malloc() failed!");
|
||
|
goto err;
|
||
|
}
|
||
|
result |= clGetContextInfo(ctx, CL_CONTEXT_DEVICES, dataBytes,
|
||
|
devices, NULL);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "clGetContextInfo() failed with %d!", result);
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
|
||
|
#ifdef CL_DEBUG
|
||
|
result = clGetDeviceIDs(platforms[matched_plat], CL_DEVICE_TYPE_ALL,
|
||
|
0, NULL, &ndevices);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "clGetDeviceIDs() failed!");
|
||
|
goto err;
|
||
|
}
|
||
|
printf("-----------------------------------------------------------\n");
|
||
|
printf(" DEVICE INFORMATION (the number of devices = %d) \n",
|
||
|
ndevices);
|
||
|
for(i = 0 ; i < ndevices ; i++)
|
||
|
{
|
||
|
char str[1024];
|
||
|
size_t int_info;
|
||
|
|
||
|
printf("-------------------------------------------------------"
|
||
|
"----\n");
|
||
|
printf(" DEVICE ID : %d "
|
||
|
"\n",i);
|
||
|
printf("-------------------------------------------------------"
|
||
|
"----\n");
|
||
|
|
||
|
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(str), str,
|
||
|
NULL);
|
||
|
printf("Device Name : %s\n",str);
|
||
|
|
||
|
clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(str), str,
|
||
|
NULL);
|
||
|
printf("Device Version : %s\n",str);
|
||
|
|
||
|
clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE,
|
||
|
sizeof(int_info), &int_info, NULL);
|
||
|
printf("Size of global memory : %lu (MB) \n",
|
||
|
int_info/1024/1024);
|
||
|
|
||
|
clGetDeviceInfo(devices[i], CL_DEVICE_LOCAL_MEM_SIZE,
|
||
|
sizeof(int_info), &int_info, NULL);
|
||
|
printf("Size of local memory : %lu (KB) \n", int_info/1024);
|
||
|
|
||
|
clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
||
|
sizeof(int_info), &int_info, NULL);
|
||
|
printf("Max clock frequency : %4.2lf (GHz) \n",
|
||
|
int_info/1024.0);
|
||
|
|
||
|
printf("-------------------------------------------------------"
|
||
|
"----\n\n");
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
/*
|
||
|
* XXX: Very AMD-centric, should make it more flexible...
|
||
|
* Intel has the first device as the CPU and no GPU support
|
||
|
*/
|
||
|
|
||
|
/*
|
||
|
* GPU
|
||
|
*/
|
||
|
puctx->gpu_queue = clCreateCommandQueueWithProperties(ctx, devices[0], 0,
|
||
|
&result);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "GPU: clGetContextInfo() failed!");
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* CPU
|
||
|
*/
|
||
|
#if 0
|
||
|
puctx->cpu_queue = clCreateCommandQueue(ctx, devices[1], 0,
|
||
|
&result);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "CPU: clGetContextInfo() failed!");
|
||
|
goto err;
|
||
|
}
|
||
|
#endif
|
||
|
|
||
|
puctx->devices = devices;
|
||
|
puctx->ctx = ctx;
|
||
|
|
||
|
free(platforms);
|
||
|
|
||
|
return CL_SUCCESS;
|
||
|
|
||
|
err:
|
||
|
if (platforms)
|
||
|
free(platforms);
|
||
|
if (devices)
|
||
|
free(devices);
|
||
|
if (puctx->cpu_queue)
|
||
|
clReleaseCommandQueue(puctx->cpu_queue);
|
||
|
if (puctx->gpu_queue)
|
||
|
clReleaseCommandQueue(puctx->gpu_queue);
|
||
|
if (ctx)
|
||
|
clReleaseContext(ctx);
|
||
|
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
cl_clean_up(struct cl_uctx uctx)
|
||
|
{
|
||
|
if (uctx.devices) {
|
||
|
free(uctx.devices);
|
||
|
}
|
||
|
if (uctx.gpu_queue) {
|
||
|
clReleaseCommandQueue(uctx.gpu_queue);
|
||
|
}
|
||
|
if (uctx.cpu_queue) {
|
||
|
clReleaseCommandQueue(uctx.cpu_queue);
|
||
|
}
|
||
|
if (uctx.ctx) {
|
||
|
clReleaseContext(uctx.ctx);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
cl_int
|
||
|
cl_build(struct cl_uctx uctx, cl_device_type dev,
|
||
|
char *kern_fname, cl_program *pprogram)
|
||
|
{
|
||
|
cl_program program = NULL;
|
||
|
cl_int result = CL_SUCCESS;
|
||
|
|
||
|
FILE *kern_file = NULL;
|
||
|
char *kern_src = NULL;
|
||
|
size_t srcsz = 0;
|
||
|
|
||
|
int type;
|
||
|
|
||
|
#ifdef INTEL_KERNEL_DEBUG
|
||
|
char build_options[100] = "-g -s F:\\obj\\vs\\debug\\Bin\\Debug\\";
|
||
|
strcat(&build_options[32], kern_fname);
|
||
|
#else
|
||
|
char *build_options = NULL;
|
||
|
#endif
|
||
|
|
||
|
if (kern_fname == NULL || pprogram == NULL)
|
||
|
return CL_INVALID_VALUE;
|
||
|
|
||
|
/* XXX: AMD-centric, should probably be passed as param */
|
||
|
/* Decide the target based on device type */
|
||
|
if (dev == CL_DEVICE_TYPE_GPU) {
|
||
|
type = 0;
|
||
|
} else {
|
||
|
type = 1;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Compile and link the OpenCL kernel.
|
||
|
*/
|
||
|
|
||
|
/* Read-in the source code */
|
||
|
kern_file = fopen(kern_fname, "rb");
|
||
|
if (kern_file == NULL) {
|
||
|
log_warn("cl", "Failed to open kernel source file %s!",
|
||
|
kern_fname);
|
||
|
result = CL_INVALID_VALUE;
|
||
|
goto err;
|
||
|
}
|
||
|
fseek(kern_file, 0, SEEK_END);
|
||
|
srcsz = ftell(kern_file);
|
||
|
fseek(kern_file, 0, SEEK_SET);
|
||
|
kern_src = (char *)malloc(srcsz + 1);
|
||
|
if (kern_src == NULL) {
|
||
|
log_warn("cl", "kern_src malloc() failed!");
|
||
|
result = CL_INVALID_VALUE;
|
||
|
goto err;
|
||
|
}
|
||
|
fread(kern_src, 1, srcsz, kern_file);
|
||
|
kern_src[srcsz] = 0;
|
||
|
|
||
|
log_info("cl", "FILE DUMP BEGINS");
|
||
|
log_info("cl", "%s", kern_src);
|
||
|
log_info("cl", "FILE DUMP ENDS");
|
||
|
|
||
|
program = clCreateProgramWithSource(uctx.ctx, 1,
|
||
|
(const char **)&kern_src, &srcsz, &result);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
log_warn("cl", "clCreateProgamWithSource() failed!");
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
/* Build the kernel */
|
||
|
result = clBuildProgram(program, 1, &uctx.devices[type],
|
||
|
build_options, NULL, NULL);
|
||
|
if (result != CL_SUCCESS) {
|
||
|
/* Print out the build log in case of failure */
|
||
|
char programLog[10000] = {0};
|
||
|
log_warn("cl", "clBuildProgram() failed!");
|
||
|
clGetProgramBuildInfo(program, uctx.devices[type],
|
||
|
CL_PROGRAM_BUILD_LOG, 10000, programLog, 0);
|
||
|
log_warn("cl", "%s\n", programLog);
|
||
|
goto err;
|
||
|
}
|
||
|
|
||
|
*pprogram = program;
|
||
|
|
||
|
err:
|
||
|
if (kern_file)
|
||
|
fclose(kern_file);
|
||
|
if (kern_src)
|
||
|
free(kern_src);
|
||
|
if (result != CL_SUCCESS && program)
|
||
|
clReleaseProgram(program);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
cl_int
|
||
|
cl_get_kern(cl_program program, char *kname, cl_kernel *pkern)
|
||
|
{
|
||
|
cl_int result = CL_SUCCESS;
|
||
|
|
||
|
if (program == NULL || kname == NULL || pkern == NULL)
|
||
|
return CL_INVALID_VALUE;
|
||
|
|
||
|
*pkern = clCreateKernel(program, kname, &result);
|
||
|
|
||
|
#if 0
|
||
|
clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did],
|
||
|
CL_KERNEL_WORK_GROUP_SIZE, sizeof(int_info), &int_info, NULL);
|
||
|
printf("GPU Maximum Work group size : %d\n", int_info);
|
||
|
clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did],
|
||
|
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(int_info),
|
||
|
&int_info, NULL);
|
||
|
printf("GPU Preferred Work group size : %d\n", int_info);
|
||
|
#endif
|
||
|
|
||
|
return result;
|
||
|
}
|