/* * Copyright (c) 2011-2016, 2025 Paul Irofti * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include #include "logging.h" #include "cl_setup.h" #define CL_DEBUG cl_int cl_init(struct cl_uctx *puctx) { /* OpenCL specific variables */ size_t dataBytes; cl_int result = CL_SUCCESS; cl_uint nplat = 0, ndevices = 0, i = 0, matched_plat; cl_platform_id *platforms = NULL, platform = NULL; cl_device_id *devices = NULL; cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0}; cl_context ctx = NULL; if (puctx == NULL || puctx->platform_name == NULL) { return CL_INVALID_VALUE; } if (puctx->device_type > CL_DEVICE_TYPE_GPU) { puctx->device_type = CL_DEVICE_TYPE_ALL; } /* * Initialize OpenCL. */ result = clGetPlatformIDs(0, NULL, &nplat); if (result != CL_SUCCESS) { log_warn("cl", "Failed getting the number of platforms"); goto err; } if (nplat < 0) { log_warn("cl", "No platforms found"); goto err; } platforms = calloc(nplat, sizeof platforms[0]); if (platforms == NULL) { log_warn("cl", "Failed to allocate platforms"); goto err; } result = clGetPlatformIDs(nplat, platforms, NULL); if (result != CL_SUCCESS) { log_warn("cl", "Failed fetching the platforms"); goto err; } log_debug("cl", "Found %d platforms", nplat); for (i = 0; i < nplat; i++) { char platname[100]; result = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(platname), platname, NULL); if (result != CL_SUCCESS) { log_warn("cl", "Failed fetching platform info"); goto err; } log_debug("cl", "Checking %s == %s", puctx->platform_name, platname); if (!strcmp(platname, puctx->platform_name)) { platform = platforms[i]; matched_plat = i; break; } } if (platform == NULL) { log_warn("cl", "No matching platform found"); result = CL_DEVICE_NOT_FOUND; goto err; } #ifdef CL_DEBUG printf("-----------------------------------------------------------\n"); printf(" PLATFORM INFORMATION (the number of platforms = %d) \n", nplat); for(i = 0 ; i < nplat; i++) { char *long_str; char str[1024]; size_t str_size; printf("-------------------------------------------------------" "----\n"); printf( " PLATFORM ID : %d " "\n", i); printf("-------------------------------------------------------" "----\n"); clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(str), str, NULL); printf("Platform name : %s\n", str); clGetPlatformInfo(platforms[i], CL_PLATFORM_VERSION, sizeof(str), str, NULL); printf("Platform version : %s\n", str); clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &str_size); long_str = (char *)malloc(str_size); clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, str_size, long_str, NULL); printf("Platform extensions : %s\n", long_str); printf("-------------------------------------------------------" "----\n\n"); free(long_str); } #endif props[1] = (cl_context_properties)platform; ctx = clCreateContextFromType(props, CL_DEVICE_TYPE_ALL, NULL, NULL, &result); if(result != CL_SUCCESS) { log_warn("cl", "Failed to create context"); goto err; } /* fetch the list of devices associated with context */ result = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, 0, NULL, &dataBytes); if(result != CL_SUCCESS) { log_warn("cl", "Failed to fetch devices size!"); goto err; } devices = (cl_device_id *)malloc(dataBytes); if (devices == NULL) { log_warn("cl", "devices malloc() failed!"); goto err; } result |= clGetContextInfo(ctx, CL_CONTEXT_DEVICES, dataBytes, devices, NULL); if (result != CL_SUCCESS) { log_warn("cl", "clGetContextInfo() failed with %d!", result); goto err; } #ifdef CL_DEBUG result = clGetDeviceIDs(platforms[matched_plat], CL_DEVICE_TYPE_ALL, 0, NULL, &ndevices); if (result != CL_SUCCESS) { log_warn("cl", "clGetDeviceIDs() failed!"); goto err; } printf("-----------------------------------------------------------\n"); printf(" DEVICE INFORMATION (the number of devices = %d) \n", ndevices); for(i = 0 ; i < ndevices ; i++) { char str[1024]; size_t int_info; printf("-------------------------------------------------------" "----\n"); printf(" DEVICE ID : %d " "\n",i); printf("-------------------------------------------------------" "----\n"); clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(str), str, NULL); printf("Device Name : %s\n",str); clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(str), str, NULL); printf("Device Version : %s\n",str); clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(int_info), &int_info, NULL); printf("Size of global memory : %lu (MB) \n", int_info/1024/1024); clGetDeviceInfo(devices[i], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(int_info), &int_info, NULL); printf("Size of local memory : %lu (KB) \n", int_info/1024); clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(int_info), &int_info, NULL); printf("Max clock frequency : %4.2lf (GHz) \n", int_info/1024.0); printf("-------------------------------------------------------" "----\n\n"); } #endif /* * XXX: Very AMD-centric, should make it more flexible... * Intel has the first device as the CPU and no GPU support */ /* * GPU */ puctx->gpu_queue = clCreateCommandQueueWithProperties(ctx, devices[0], 0, &result); if (result != CL_SUCCESS) { log_warn("cl", "GPU: clGetContextInfo() failed!"); goto err; } /* * CPU */ #if 0 puctx->cpu_queue = clCreateCommandQueue(ctx, devices[1], 0, &result); if (result != CL_SUCCESS) { log_warn("cl", "CPU: clGetContextInfo() failed!"); goto err; } #endif puctx->devices = devices; puctx->ctx = ctx; free(platforms); return CL_SUCCESS; err: if (platforms) free(platforms); if (devices) free(devices); if (puctx->cpu_queue) clReleaseCommandQueue(puctx->cpu_queue); if (puctx->gpu_queue) clReleaseCommandQueue(puctx->gpu_queue); if (ctx) clReleaseContext(ctx); return result; } void cl_clean_up(struct cl_uctx uctx) { if (uctx.devices) { free(uctx.devices); } if (uctx.gpu_queue) { clReleaseCommandQueue(uctx.gpu_queue); } if (uctx.cpu_queue) { clReleaseCommandQueue(uctx.cpu_queue); } if (uctx.ctx) { clReleaseContext(uctx.ctx); } } cl_int cl_build(struct cl_uctx uctx, cl_device_type dev, char *kern_fname, cl_program *pprogram) { cl_program program = NULL; cl_int result = CL_SUCCESS; FILE *kern_file = NULL; char *kern_src = NULL; size_t srcsz = 0; int type; #ifdef INTEL_KERNEL_DEBUG char build_options[100] = "-g -s F:\\obj\\vs\\debug\\Bin\\Debug\\"; strcat(&build_options[32], kern_fname); #else char *build_options = NULL; #endif if (kern_fname == NULL || pprogram == NULL) return CL_INVALID_VALUE; /* XXX: AMD-centric, should probably be passed as param */ /* Decide the target based on device type */ if (dev == CL_DEVICE_TYPE_GPU) { type = 0; } else { type = 1; } /* * Compile and link the OpenCL kernel. */ /* Read-in the source code */ kern_file = fopen(kern_fname, "rb"); if (kern_file == NULL) { log_warn("cl", "Failed to open kernel source file %s!", kern_fname); result = CL_INVALID_VALUE; goto err; } fseek(kern_file, 0, SEEK_END); srcsz = ftell(kern_file); fseek(kern_file, 0, SEEK_SET); kern_src = (char *)malloc(srcsz + 1); if (kern_src == NULL) { log_warn("cl", "kern_src malloc() failed!"); result = CL_INVALID_VALUE; goto err; } fread(kern_src, 1, srcsz, kern_file); kern_src[srcsz] = 0; log_info("cl", "FILE DUMP BEGINS"); log_info("cl", "%s", kern_src); log_info("cl", "FILE DUMP ENDS"); program = clCreateProgramWithSource(uctx.ctx, 1, (const char **)&kern_src, &srcsz, &result); if (result != CL_SUCCESS) { log_warn("cl", "clCreateProgamWithSource() failed!"); goto err; } /* Build the kernel */ result = clBuildProgram(program, 1, &uctx.devices[type], build_options, NULL, NULL); if (result != CL_SUCCESS) { /* Print out the build log in case of failure */ char programLog[10000] = {0}; log_warn("cl", "clBuildProgram() failed!"); clGetProgramBuildInfo(program, uctx.devices[type], CL_PROGRAM_BUILD_LOG, 10000, programLog, 0); log_warn("cl", "%s\n", programLog); goto err; } *pprogram = program; err: if (kern_file) fclose(kern_file); if (kern_src) free(kern_src); if (result != CL_SUCCESS && program) clReleaseProgram(program); return result; } cl_int cl_get_kern(cl_program program, char *kname, cl_kernel *pkern) { cl_int result = CL_SUCCESS; if (program == NULL || kname == NULL || pkern == NULL) return CL_INVALID_VALUE; *pkern = clCreateKernel(program, kname, &result); #if 0 clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did], CL_KERNEL_WORK_GROUP_SIZE, sizeof(int_info), &int_info, NULL); printf("GPU Maximum Work group size : %d\n", int_info); clGetKernelWorkGroupInfo(ckKernel[0], cdDevice[did], CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, sizeof(int_info), &int_info, NULL); printf("GPU Preferred Work group size : %d\n", int_info); #endif return result; }