2025-03-26 11:50:07 +02:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2011-2016, 2025 Paul Irofti <paul@irofti.net>
|
|
|
|
*
|
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
*/
|
|
|
|
|
2025-03-20 19:32:25 +02:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "logging.h"
|
|
|
|
|
|
|
|
#include "cl_setup.h"
|
|
|
|
#define CL_DEBUG
|
|
|
|
|
|
|
|
cl_int
|
|
|
|
cl_init(struct cl_uctx *puctx)
|
|
|
|
{
|
|
|
|
/* OpenCL specific variables */
|
|
|
|
size_t dataBytes;
|
|
|
|
|
|
|
|
cl_int result = CL_SUCCESS;
|
|
|
|
cl_uint nplat = 0, ndevices = 0, i = 0, matched_plat;
|
|
|
|
cl_platform_id *platforms = NULL, platform = NULL;
|
|
|
|
cl_device_id *devices = NULL;
|
|
|
|
cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
|
|
|
|
cl_context ctx = NULL;
|
|
|
|
|
|
|
|
|
|
|
|
if (puctx == NULL || puctx->platform_name == NULL) {
|
|
|
|
return CL_INVALID_VALUE;
|
|
|
|
}
|
|
|
|
if (puctx->device_type > CL_DEVICE_TYPE_GPU) {
|
|
|
|
puctx->device_type = CL_DEVICE_TYPE_ALL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Initialize OpenCL.
|
|
|
|
*/
|
|
|
|
result = clGetPlatformIDs(0, NULL, &nplat);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "Failed getting the number of platforms");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
if (nplat < 0) {
|
|
|
|
log_warn("cl", "No platforms found");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
platforms = calloc(nplat, sizeof platforms[0]);
|
|
|
|
if (platforms == NULL) {
|
|
|
|
log_warn("cl", "Failed to allocate platforms");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
result = clGetPlatformIDs(nplat, platforms, NULL);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "Failed fetching the platforms");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
log_debug("cl", "Found %d platforms", nplat);
|
|
|
|
for (i = 0; i < nplat; i++) {
|
|
|
|
char platname[100];
|
|
|
|
result = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
|
|
|
|
sizeof(platname), platname, NULL);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "Failed fetching platform info");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
log_debug("cl", "Checking %s == %s",
|
|
|
|
puctx->platform_name, platname);
|
|
|
|
if (!strcmp(platname, puctx->platform_name)) {
|
|
|
|
platform = platforms[i];
|
|
|
|
matched_plat = i;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (platform == NULL) {
|
|
|
|
log_warn("cl", "No matching platform found");
|
|
|
|
result = CL_DEVICE_NOT_FOUND;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef CL_DEBUG
|
|
|
|
printf("-----------------------------------------------------------\n");
|
|
|
|
printf(" PLATFORM INFORMATION (the number of platforms = %d) \n",
|
|
|
|
nplat);
|
|
|
|
for(i = 0 ; i < nplat; i++)
|
|
|
|
{
|
|
|
|
char *long_str;
|
|
|
|
char str[1024];
|
|
|
|
size_t str_size;
|
|
|
|
|
|
|
|
printf("-------------------------------------------------------"
|
|
|
|
"----\n");
|
|
|
|
printf( " PLATFORM ID : %d "
|
|
|
|
"\n", i);
|
|
|
|
printf("-------------------------------------------------------"
|
|
|
|
"----\n");
|
|
|
|
|
|
|
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str),
|
|
|
|
str, NULL);
|
|
|
|
printf("Platform name : %s\n", str);
|
|
|
|
|
|
|
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION,
|
|
|
|
sizeof(str), str, NULL);
|
|
|
|
printf("Platform version : %s\n", str);
|
|
|
|
|
|
|
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0,
|
|
|
|
NULL, &str_size);
|
|
|
|
long_str = (char *)malloc(str_size);
|
|
|
|
clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS,
|
|
|
|
str_size, long_str, NULL);
|
|
|
|
printf("Platform extensions : %s\n", long_str);
|
|
|
|
|
|
|
|
printf("-------------------------------------------------------"
|
|
|
|
"----\n\n");
|
|
|
|
|
|
|
|
free(long_str);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
props[1] = (cl_context_properties)platform;
|
|
|
|
|
|
|
|
ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_ALL, NULL, NULL,
|
|
|
|
&result);
|
|
|
|
if(result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "Failed to create context");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* fetch the list of devices associated with context */
|
|
|
|
result = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL,
|
|
|
|
&dataBytes);
|
|
|
|
if(result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "Failed to fetch devices size!");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
devices = (cl_device_id *)malloc(dataBytes);
|
|
|
|
if (devices == NULL) {
|
|
|
|
log_warn("cl", "devices malloc() failed!");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
result |= clGetContextInfo(ctx, CL_CONTEXT_DEVICES, dataBytes,
|
|
|
|
devices, NULL);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "clGetContextInfo() failed with %d!", result);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef CL_DEBUG
|
|
|
|
result = clGetDeviceIDs(platforms[matched_plat], CL_DEVICE_TYPE_ALL,
|
|
|
|
0, NULL, &ndevices);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "clGetDeviceIDs() failed!");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
printf("-----------------------------------------------------------\n");
|
|
|
|
printf(" DEVICE INFORMATION (the number of devices = %d) \n",
|
|
|
|
ndevices);
|
|
|
|
for(i = 0 ; i < ndevices ; i++)
|
|
|
|
{
|
|
|
|
char str[1024];
|
|
|
|
size_t int_info;
|
|
|
|
|
|
|
|
printf("-------------------------------------------------------"
|
|
|
|
"----\n");
|
|
|
|
printf(" DEVICE ID : %d "
|
|
|
|
"\n",i);
|
|
|
|
printf("-------------------------------------------------------"
|
|
|
|
"----\n");
|
|
|
|
|
|
|
|
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(str), str,
|
|
|
|
NULL);
|
|
|
|
printf("Device Name : %s\n",str);
|
|
|
|
|
|
|
|
clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(str), str,
|
|
|
|
NULL);
|
|
|
|
printf("Device Version : %s\n",str);
|
|
|
|
|
|
|
|
clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE,
|
|
|
|
sizeof(int_info), &int_info, NULL);
|
|
|
|
printf("Size of global memory : %lu (MB) \n",
|
|
|
|
int_info/1024/1024);
|
|
|
|
|
|
|
|
clGetDeviceInfo(devices[i], CL_DEVICE_LOCAL_MEM_SIZE,
|
|
|
|
sizeof(int_info), &int_info, NULL);
|
|
|
|
printf("Size of local memory : %lu (KB) \n", int_info/1024);
|
|
|
|
|
|
|
|
clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY,
|
|
|
|
sizeof(int_info), &int_info, NULL);
|
|
|
|
printf("Max clock frequency : %4.2lf (GHz) \n",
|
|
|
|
int_info/1024.0);
|
|
|
|
|
|
|
|
printf("-------------------------------------------------------"
|
|
|
|
"----\n\n");
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
/*
|
|
|
|
* XXX: Very AMD-centric, should make it more flexible...
|
|
|
|
* Intel has the first device as the CPU and no GPU support
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* GPU
|
|
|
|
*/
|
|
|
|
puctx->gpu_queue = clCreateCommandQueueWithProperties(ctx, devices[0], 0,
|
|
|
|
&result);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "GPU: clGetContextInfo() failed!");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* CPU
|
|
|
|
*/
|
|
|
|
#if 0
|
|
|
|
puctx->cpu_queue = clCreateCommandQueue(ctx, devices[1], 0,
|
|
|
|
&result);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "CPU: clGetContextInfo() failed!");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
puctx->devices = devices;
|
|
|
|
puctx->ctx = ctx;
|
|
|
|
|
|
|
|
free(platforms);
|
|
|
|
|
|
|
|
return CL_SUCCESS;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (platforms)
|
|
|
|
free(platforms);
|
|
|
|
if (devices)
|
|
|
|
free(devices);
|
|
|
|
if (puctx->cpu_queue)
|
|
|
|
clReleaseCommandQueue(puctx->cpu_queue);
|
|
|
|
if (puctx->gpu_queue)
|
|
|
|
clReleaseCommandQueue(puctx->gpu_queue);
|
|
|
|
if (ctx)
|
|
|
|
clReleaseContext(ctx);
|
|
|
|
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
cl_clean_up(struct cl_uctx uctx)
|
|
|
|
{
|
|
|
|
if (uctx.devices) {
|
|
|
|
free(uctx.devices);
|
|
|
|
}
|
|
|
|
if (uctx.gpu_queue) {
|
|
|
|
clReleaseCommandQueue(uctx.gpu_queue);
|
|
|
|
}
|
|
|
|
if (uctx.cpu_queue) {
|
|
|
|
clReleaseCommandQueue(uctx.cpu_queue);
|
|
|
|
}
|
|
|
|
if (uctx.ctx) {
|
|
|
|
clReleaseContext(uctx.ctx);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_int
|
|
|
|
cl_build(struct cl_uctx uctx, cl_device_type dev,
|
|
|
|
char *kern_fname, cl_program *pprogram)
|
|
|
|
{
|
|
|
|
cl_program program = NULL;
|
|
|
|
cl_int result = CL_SUCCESS;
|
|
|
|
|
|
|
|
FILE *kern_file = NULL;
|
|
|
|
char *kern_src = NULL;
|
|
|
|
size_t srcsz = 0;
|
2025-03-26 15:15:57 +02:00
|
|
|
size_t ret;
|
2025-03-20 19:32:25 +02:00
|
|
|
|
|
|
|
int type;
|
|
|
|
|
|
|
|
#ifdef INTEL_KERNEL_DEBUG
|
|
|
|
char build_options[100] = "-g -s F:\\obj\\vs\\debug\\Bin\\Debug\\";
|
|
|
|
strcat(&build_options[32], kern_fname);
|
|
|
|
#else
|
|
|
|
char *build_options = NULL;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (kern_fname == NULL || pprogram == NULL)
|
|
|
|
return CL_INVALID_VALUE;
|
|
|
|
|
|
|
|
/* XXX: AMD-centric, should probably be passed as param */
|
|
|
|
/* Decide the target based on device type */
|
|
|
|
if (dev == CL_DEVICE_TYPE_GPU) {
|
|
|
|
type = 0;
|
|
|
|
} else {
|
|
|
|
type = 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Compile and link the OpenCL kernel.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Read-in the source code */
|
|
|
|
kern_file = fopen(kern_fname, "rb");
|
|
|
|
if (kern_file == NULL) {
|
|
|
|
log_warn("cl", "Failed to open kernel source file %s!",
|
|
|
|
kern_fname);
|
|
|
|
result = CL_INVALID_VALUE;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
fseek(kern_file, 0, SEEK_END);
|
|
|
|
srcsz = ftell(kern_file);
|
|
|
|
fseek(kern_file, 0, SEEK_SET);
|
|
|
|
kern_src = (char *)malloc(srcsz + 1);
|
|
|
|
if (kern_src == NULL) {
|
|
|
|
log_warn("cl", "kern_src malloc() failed!");
|
|
|
|
result = CL_INVALID_VALUE;
|
|
|
|
goto err;
|
|
|
|
}
|
2025-03-26 15:15:57 +02:00
|
|
|
ret = fread(kern_src, 1, srcsz, kern_file);
|
|
|
|
if (ret != srcsz) {
|
|
|
|
log_warn("cl", "fread() failed!");
|
|
|
|
goto err;
|
|
|
|
}
|
2025-03-20 19:32:25 +02:00
|
|
|
kern_src[srcsz] = 0;
|
|
|
|
|
|
|
|
log_info("cl", "FILE DUMP BEGINS");
|
|
|
|
log_info("cl", "%s", kern_src);
|
|
|
|
log_info("cl", "FILE DUMP ENDS");
|
|
|
|
|
|
|
|
program = clCreateProgramWithSource(uctx.ctx, 1,
|
|
|
|
(const char **)&kern_src, &srcsz, &result);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
log_warn("cl", "clCreateProgamWithSource() failed!");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Build the kernel */
|
|
|
|
result = clBuildProgram(program, 1, &uctx.devices[type],
|
|
|
|
build_options, NULL, NULL);
|
|
|
|
if (result != CL_SUCCESS) {
|
|
|
|
/* Print out the build log in case of failure */
|
|
|
|
char programLog[10000] = {0};
|
|
|
|
log_warn("cl", "clBuildProgram() failed!");
|
|
|
|
clGetProgramBuildInfo(program, uctx.devices[type],
|
|
|
|
CL_PROGRAM_BUILD_LOG, 10000, programLog, 0);
|
|
|
|
log_warn("cl", "%s\n", programLog);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
*pprogram = program;
|
|
|
|
|
|
|
|
err:
|
|
|
|
if (kern_file)
|
|
|
|
fclose(kern_file);
|
|
|
|
if (kern_src)
|
|
|
|
free(kern_src);
|
|
|
|
if (result != CL_SUCCESS && program)
|
|
|
|
clReleaseProgram(program);
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
cl_int
|
|
|
|
cl_get_kern(cl_program program, char *kname, cl_kernel *pkern)
|
|
|
|
{
|
|
|
|
cl_int result = CL_SUCCESS;
|
|
|
|
|
|
|
|
if (program == NULL || kname == NULL || pkern == NULL)
|
|
|
|
return CL_INVALID_VALUE;
|
|
|
|
|
|
|
|
*pkern = clCreateKernel(program, kname, &result);
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did],
|
|
|
|
CL_KERNEL_WORK_GROUP_SIZE, sizeof(int_info), &int_info, NULL);
|
|
|
|
printf("GPU Maximum Work group size : %d\n", int_info);
|
|
|
|
clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did],
|
|
|
|
CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(int_info),
|
|
|
|
&int_info, NULL);
|
|
|
|
printf("GPU Preferred Work group size : %d\n", int_info);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|