ccubes-cl/cl_setup.c

374 lines
9 KiB
C
Raw Normal View History

2025-03-20 19:32:25 +02:00
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "logging.h"
#include "cl_setup.h"
#define CL_DEBUG
cl_int
cl_init(struct cl_uctx *puctx)
{
/* OpenCL specific variables */
size_t dataBytes;
cl_int result = CL_SUCCESS;
cl_uint nplat = 0, ndevices = 0, i = 0, matched_plat;
cl_platform_id *platforms = NULL, platform = NULL;
cl_device_id *devices = NULL;
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
cl_context ctx = NULL;
if (puctx == NULL || puctx->platform_name == NULL) {
return CL_INVALID_VALUE;
}
if (puctx->device_type > CL_DEVICE_TYPE_GPU) {
puctx->device_type = CL_DEVICE_TYPE_ALL;
}
/*
* Initialize OpenCL.
*/
result = clGetPlatformIDs(0, NULL, &nplat);
if (result != CL_SUCCESS) {
log_warn("cl", "Failed getting the number of platforms");
goto err;
}
if (nplat < 0) {
log_warn("cl", "No platforms found");
goto err;
}
platforms = calloc(nplat, sizeof platforms[0]);
if (platforms == NULL) {
log_warn("cl", "Failed to allocate platforms");
goto err;
}
result = clGetPlatformIDs(nplat, platforms, NULL);
if (result != CL_SUCCESS) {
log_warn("cl", "Failed fetching the platforms");
goto err;
}
log_debug("cl", "Found %d platforms", nplat);
for (i = 0; i < nplat; i++) {
char platname[100];
result = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
sizeof(platname), platname, NULL);
if (result != CL_SUCCESS) {
log_warn("cl", "Failed fetching platform info");
goto err;
}
log_debug("cl", "Checking %s == %s",
puctx->platform_name, platname);
if (!strcmp(platname, puctx->platform_name)) {
platform = platforms[i];
matched_plat = i;
break;
}
}
if (platform == NULL) {
log_warn("cl", "No matching platform found");
result = CL_DEVICE_NOT_FOUND;
goto err;
}
#ifdef CL_DEBUG
printf("-----------------------------------------------------------\n");
printf(" PLATFORM INFORMATION (the number of platforms = %d) \n",
nplat);
for(i = 0 ; i < nplat; i++)
{
char *long_str;
char str[1024];
size_t str_size;
printf("-------------------------------------------------------"
"----\n");
printf( " PLATFORM ID : %d "
"\n", i);
printf("-------------------------------------------------------"
"----\n");
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str),
str, NULL);
printf("Platform name : %s\n", str);
clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION,
sizeof(str), str, NULL);
printf("Platform version : %s\n", str);
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0,
NULL, &str_size);
long_str = (char *)malloc(str_size);
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS,
str_size, long_str, NULL);
printf("Platform extensions : %s\n", long_str);
printf("-------------------------------------------------------"
"----\n\n");
free(long_str);
}
#endif
props[1] = (cl_context_properties)platform;
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_ALL, NULL, NULL,
&result);
if(result != CL_SUCCESS) {
log_warn("cl", "Failed to create context");
goto err;
}
/* fetch the list of devices associated with context */
result = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL,
&dataBytes);
if(result != CL_SUCCESS) {
log_warn("cl", "Failed to fetch devices size!");
goto err;
}
devices = (cl_device_id *)malloc(dataBytes);
if (devices == NULL) {
log_warn("cl", "devices malloc() failed!");
goto err;
}
result |= clGetContextInfo(ctx, CL_CONTEXT_DEVICES, dataBytes,
devices, NULL);
if (result != CL_SUCCESS) {
log_warn("cl", "clGetContextInfo() failed with %d!", result);
goto err;
}
#ifdef CL_DEBUG
result = clGetDeviceIDs(platforms[matched_plat], CL_DEVICE_TYPE_ALL,
0, NULL, &ndevices);
if (result != CL_SUCCESS) {
log_warn("cl", "clGetDeviceIDs() failed!");
goto err;
}
printf("-----------------------------------------------------------\n");
printf(" DEVICE INFORMATION (the number of devices = %d) \n",
ndevices);
for(i = 0 ; i < ndevices ; i++)
{
char str[1024];
size_t int_info;
printf("-------------------------------------------------------"
"----\n");
printf(" DEVICE ID : %d "
"\n",i);
printf("-------------------------------------------------------"
"----\n");
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(str), str,
NULL);
printf("Device Name : %s\n",str);
clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(str), str,
NULL);
printf("Device Version : %s\n",str);
clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE,
sizeof(int_info), &int_info, NULL);
printf("Size of global memory : %lu (MB) \n",
int_info/1024/1024);
clGetDeviceInfo(devices[i], CL_DEVICE_LOCAL_MEM_SIZE,
sizeof(int_info), &int_info, NULL);
printf("Size of local memory : %lu (KB) \n", int_info/1024);
clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY,
sizeof(int_info), &int_info, NULL);
printf("Max clock frequency : %4.2lf (GHz) \n",
int_info/1024.0);
printf("-------------------------------------------------------"
"----\n\n");
}
#endif
/*
* XXX: Very AMD-centric, should make it more flexible...
* Intel has the first device as the CPU and no GPU support
*/
/*
* GPU
*/
puctx->gpu_queue = clCreateCommandQueueWithProperties(ctx, devices[0], 0,
&result);
if (result != CL_SUCCESS) {
log_warn("cl", "GPU: clGetContextInfo() failed!");
goto err;
}
/*
* CPU
*/
#if 0
puctx->cpu_queue = clCreateCommandQueue(ctx, devices[1], 0,
&result);
if (result != CL_SUCCESS) {
log_warn("cl", "CPU: clGetContextInfo() failed!");
goto err;
}
#endif
puctx->devices = devices;
puctx->ctx = ctx;
free(platforms);
return CL_SUCCESS;
err:
if (platforms)
free(platforms);
if (devices)
free(devices);
if (puctx->cpu_queue)
clReleaseCommandQueue(puctx->cpu_queue);
if (puctx->gpu_queue)
clReleaseCommandQueue(puctx->gpu_queue);
if (ctx)
clReleaseContext(ctx);
return result;
}
void
cl_clean_up(struct cl_uctx uctx)
{
if (uctx.devices) {
free(uctx.devices);
}
if (uctx.gpu_queue) {
clReleaseCommandQueue(uctx.gpu_queue);
}
if (uctx.cpu_queue) {
clReleaseCommandQueue(uctx.cpu_queue);
}
if (uctx.ctx) {
clReleaseContext(uctx.ctx);
}
}
cl_int
cl_build(struct cl_uctx uctx, cl_device_type dev,
char *kern_fname, cl_program *pprogram)
{
cl_program program = NULL;
cl_int result = CL_SUCCESS;
FILE *kern_file = NULL;
char *kern_src = NULL;
size_t srcsz = 0;
int type;
#ifdef INTEL_KERNEL_DEBUG
char build_options[100] = "-g -s F:\\obj\\vs\\debug\\Bin\\Debug\\";
strcat(&build_options[32], kern_fname);
#else
char *build_options = NULL;
#endif
if (kern_fname == NULL || pprogram == NULL)
return CL_INVALID_VALUE;
/* XXX: AMD-centric, should probably be passed as param */
/* Decide the target based on device type */
if (dev == CL_DEVICE_TYPE_GPU) {
type = 0;
} else {
type = 1;
}
/*
* Compile and link the OpenCL kernel.
*/
/* Read-in the source code */
kern_file = fopen(kern_fname, "rb");
if (kern_file == NULL) {
log_warn("cl", "Failed to open kernel source file %s!",
kern_fname);
result = CL_INVALID_VALUE;
goto err;
}
fseek(kern_file, 0, SEEK_END);
srcsz = ftell(kern_file);
fseek(kern_file, 0, SEEK_SET);
kern_src = (char *)malloc(srcsz + 1);
if (kern_src == NULL) {
log_warn("cl", "kern_src malloc() failed!");
result = CL_INVALID_VALUE;
goto err;
}
fread(kern_src, 1, srcsz, kern_file);
kern_src[srcsz] = 0;
log_info("cl", "FILE DUMP BEGINS");
log_info("cl", "%s", kern_src);
log_info("cl", "FILE DUMP ENDS");
program = clCreateProgramWithSource(uctx.ctx, 1,
(const char **)&kern_src, &srcsz, &result);
if (result != CL_SUCCESS) {
log_warn("cl", "clCreateProgamWithSource() failed!");
goto err;
}
/* Build the kernel */
result = clBuildProgram(program, 1, &uctx.devices[type],
build_options, NULL, NULL);
if (result != CL_SUCCESS) {
/* Print out the build log in case of failure */
char programLog[10000] = {0};
log_warn("cl", "clBuildProgram() failed!");
clGetProgramBuildInfo(program, uctx.devices[type],
CL_PROGRAM_BUILD_LOG, 10000, programLog, 0);
log_warn("cl", "%s\n", programLog);
goto err;
}
*pprogram = program;
err:
if (kern_file)
fclose(kern_file);
if (kern_src)
free(kern_src);
if (result != CL_SUCCESS && program)
clReleaseProgram(program);
return result;
}
cl_int
cl_get_kern(cl_program program, char *kname, cl_kernel *pkern)
{
cl_int result = CL_SUCCESS;
if (program == NULL || kname == NULL || pkern == NULL)
return CL_INVALID_VALUE;
*pkern = clCreateKernel(program, kname, &result);
#if 0
clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did],
CL_KERNEL_WORK_GROUP_SIZE, sizeof(int_info), &int_info, NULL);
printf("GPU Maximum Work group size : %d\n", int_info);
clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did],
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(int_info),
&int_info, NULL);
printf("GPU Preferred Work group size : %d\n", int_info);
#endif
return result;
}