/* * Copyright (c) 2025 Paul Irofti * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include "real.h" #include "cl_setup.h" #include "clccubes.h" #include "config.h" #include "logging.h" int ccubes_create_alloc(struct ccubes_context *ctx, int *nofvalues, /* IN: RC */ int *ON_set, /* IN: RC */ int *OFF_set /* IN: RC */ ) { int rc = 0; /* __global const int *nofvalues, IN: RC */ ctx->nofvalues = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->ninputs * sizeof(int), nofvalues, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *ON_set, IN: RC */ ctx->ON_set = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->posrows * ctx->ninputs * sizeof(int), ON_set, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *OFF_set, IN: RC */ ctx->OFF_set = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->ninputs * ctx->negrows * sizeof(int), OFF_set, &rc); if (rc != CL_SUCCESS) { goto err; } err: return rc; } struct ccubes_context * ccubes_create(const char *ccubes_kernel_file, int ninputs, int posrows, int negrows, int *nofvalues, /* IN: RC */ int *ON_set, /* IN: RC */ int *OFF_set /* IN: RC */ ) { struct ccubes_context *ctx = NULL; int rc = 0; ctx = malloc(sizeof *ctx); if (ctx == NULL) { log_error("clccubes", "ctx malloc failed"); goto err; } ctx->clctx = malloc(sizeof *ctx->clctx); if (ctx->clctx == NULL) { log_error("clccubes", "clctx malloc failed"); goto err; } ctx->clctx->device_type = CL_DEVICE_TYPE_GPU; strcpy(ctx->clctx->platform_name, "NVIDIA Corporation\0"); /* strcpy(ctx->clctx->platform_name, "Advanced Micro Devices, Inc.\0"); */ /* strcpy(ctx->clctx->platform_name, "Intel(R) Corporation\0"); */ rc = cl_init(ctx->clctx); if (rc != CL_SUCCESS) { log_error("clccubes", "[%d] Failed to initialize the OpenCL framework", rc); goto err; } if (ccubes_kernel_file == NULL) { log_error("clccubes", "Kernel file pointer is NULL", ccubes_kernel_file); goto err; } ctx->ccubes_kernel_file = strdup(ccubes_kernel_file); if (ctx->ccubes_kernel_file == NULL) { log_error("clccubes", "Failed to copy kernel file path %s", ccubes_kernel_file); goto err; } ctx->ninputs = ninputs; ctx->posrows = posrows; ctx->negrows = negrows; rc = ccubes_create_alloc(ctx, nofvalues, ON_set, OFF_set); if (rc != CL_SUCCESS) { log_error("clccubes", "Failed to allocate initial buffers"); goto err; } err: return ctx; } int ccubes_init(struct ccubes_context *ctx, int n_tasks, int n_tasks_off, int k, int prevfoundPI, int implicant_words, int value_bit_width, int value_bit_mask, int pichart_words, int estimPI ) { int rc = 0; ctx->k = k; ctx->prevfoundPI = prevfoundPI; ctx->implicant_words = implicant_words; ctx->value_bit_width = value_bit_width; ctx->value_bit_mask = value_bit_mask; ctx->pichart_words = pichart_words; ctx->estimPI = estimPI; ctx->gws = n_tasks; ctx->goff = n_tasks_off; err: return rc; } int ccubes_preprocess(struct ccubes_context *ctx) { int rc = 0; FILE *fp; fp = fopen("ccubes_generated.h", "w"); if (fp == NULL) { rc = -1; log_error("clccubes", "preprocess fopen failed (%d)", rc); goto err; } fprintf(fp, "#ifndef CCUBES_GENERATED_H__\n"); fprintf(fp, "#define CCUBES_GENERATED_H__\n"); fprintf(fp, "#define NINPUTS %d\n", ctx->ninputs); fprintf(fp, "#define POSROWS %d\n", ctx->posrows); fprintf(fp, "#define NEGROWS %d\n", ctx->negrows); fprintf(fp, "#define IMPLICANT_WORDS %d\n", ctx->implicant_words); fprintf(fp, "#define VALUE_BIT_WIDTH %d\n", ctx->value_bit_width); fprintf(fp, "#define VALUE_BIT_MASK %d\n", ctx->value_bit_mask); fprintf(fp, "#define PICHART_WORDS %d\n", ctx->pichart_words); fprintf(fp, "#endif\n"); fclose(fp); err: return rc; } int ccubes_build(struct ccubes_context *ctx) { int rc = 0; rc = cl_build(*ctx->clctx, CL_DEVICE_TYPE_GPU, ctx->ccubes_kernel_file, &ctx->ccubes_program); if (rc != CL_SUCCESS) { log_warn("clccubes", "Failed building ccubes.cl (%d)", rc); goto err; } rc = cl_get_kern(ctx->ccubes_program, "ccubes_task", &ctx->ccubes_task); if (rc != CL_SUCCESS) { log_warn("clccubes", "Failed fetching ccubes_task (%d)", rc); goto err; } err: return rc; } int ccubes_alloc(struct ccubes_context *ctx, unsigned int *p_implicants_pos, /* IN: RC */ unsigned int *p_implicants_val, /* IN: RC */ int *last_index, /* IN: RC */ int *p_covered, /* IN: RC */ int *p_pichart_pos, /* IN: RC */ bool *redundant, /* OUT: RW */ bool *coverage, /* OUT: RW */ int *taskpis, /* OUT: RW */ unsigned int *fixed_bits, /* OUT: RW */ unsigned int *value_bits, /* OUT: RW */ unsigned int *pichart_values /* OUT: RW */ ) { int rc = 0; /* * Save outputs for mapping */ ctx->h_redundant = redundant; ctx->h_coverage = coverage; ctx->h_taskpis = taskpis; ctx->h_fixed_bits = fixed_bits; ctx->h_value_bits = value_bits; ctx->h_pichart_values = pichart_values; /* * INPUTS */ /* __global const unsigned int *p_implicants_pos, IN: RC */ ctx->p_implicants_pos = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_pos, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const unsigned int *p_implicants_val, IN: RC */ ctx->p_implicants_val = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_val, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *last_index, IN: RC */ ctx->last_index = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->posrows * sizeof(int), last_index, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *p_covered, IN: RC */ ctx->p_covered = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * sizeof(int), p_covered, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *p_pichart_pos, IN: RC */ ctx->p_pichart_pos = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * ctx->pichart_words * sizeof(int), p_pichart_pos, &rc); if (rc != CL_SUCCESS) { goto err; } /* * OUTPUTS */ /* __global bool *redundant, OUT: RW */ ctx->redundant = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->posrows * sizeof(bool), NULL, &rc); /* __global bool *coverage, OUT: RW */ ctx->coverage = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->posrows * ctx->posrows * sizeof(bool), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global int *taskpis, OUT: RW */ ctx->taskpis = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * sizeof(int), NULL, &rc); /* __global unsigned int *fixed_bits, OUT: RW */ ctx->fixed_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global unsigned int *value_bits, OUT: RW */ ctx->value_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global unsigned int *pichart_values, OUT: RW */ ctx->pichart_values = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->posrows * ctx->pichart_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } err: return rc; } int ccubes_run(struct ccubes_context *ctx) { int rc = 0; int arg = 0; /* INPUTS */ arg = 0; rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(int), (void *)&ctx->k); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(int), (void *)&ctx->prevfoundPI); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->nofvalues); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->ON_set); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->OFF_set); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_implicants_pos); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_implicants_val); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->last_index); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_covered); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_pichart_pos); /* OUTPUTS */ rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->redundant); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->coverage); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->taskpis); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->fixed_bits); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->value_bits); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->pichart_values); if (rc != CL_SUCCESS) { log_error("clccubes", "Kernel arguments failed (%d)", rc); goto err; } cl_event event; log_debug("clccubes", "NDRange gws %d, goff %d", ctx->gws, ctx-> goff); rc = clEnqueueNDRangeKernel(ctx->clctx->gpu_queue, ctx->ccubes_task, 1, &ctx->goff, &ctx->gws, NULL, 0, NULL, &event); if (rc != CL_SUCCESS) { log_error("clccubes", "NDRange failed (%d)", rc); goto err; } clWaitForEvents(1, &event); err: return rc; } int ccubes_map(struct ccubes_context *ctx) { int rc = 0; ctx->h_redundant = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->redundant, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "redundant mapping failed (%d)", rc); goto err; } ctx->h_coverage = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->coverage, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->posrows * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "coverage mapping failed (%d)", rc); goto err; } ctx->h_taskpis = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->taskpis, CL_TRUE, CL_MAP_READ, 0, ctx->gws * sizeof(int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "taskpis mapping failed (%d)", rc); goto err; } ctx->h_fixed_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->fixed_bits, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "fixed_bits mapping failed (%d)", rc); goto err; } ctx->h_value_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->value_bits, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "value_bits mapping failed (%d)", rc); goto err; } ctx->h_pichart_values = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->pichart_values, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->posrows * ctx->pichart_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "pichart_values mapping failed (%d)", rc); goto err; } err: return rc; } int ccubes_unmap(struct ccubes_context *ctx) { int rc = 0; /* * UNMAP */ rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->redundant, ctx->h_redundant, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "redundant unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->coverage, ctx->h_coverage, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "coverage unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->taskpis, ctx->h_taskpis, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "taskpis unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->fixed_bits, ctx->h_fixed_bits, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "fixed_bits unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->value_bits, ctx->h_value_bits, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "value_bits unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->pichart_values, ctx->h_pichart_values, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "pichart_values unmapping failed (%d)", rc); } err: return rc; } int ccubes_do_tasks(struct ccubes_context *ccubesctx, int n_tasks, int n_tasks_off, int k, int prevfoundPI, int implicant_words, int value_bit_width, int value_bit_mask, int pichart_words, int estimPI, unsigned int *p_implicants_pos, /* IN: RC */ unsigned int *p_implicants_val, /* IN: RC */ int *last_index, /* IN: RC */ int *p_covered, /* IN: RC */ int *p_pichart_pos, /* IN: RC */ bool *redundant, /* OUT: RW */ bool *coverage, /* OUT: RW */ int *taskpis, /* OUT: RW */ unsigned int *fixed_bits, /* OUT: RW */ unsigned int *value_bits, /* OUT: RW */ unsigned int *pichart_values /* OUT: RW */ ) { int rc = 0; rc = ccubes_init(ccubesctx, n_tasks, n_tasks_off, k, prevfoundPI, implicant_words, value_bit_width, value_bit_mask, pichart_words, estimPI); if (ccubesctx == NULL) { log_error("clccubes", "ccubes_init failed (%d)", rc); goto err; } rc = ccubes_preprocess(ccubesctx); if (rc != 0) { log_error("clccubes", "ccubes_preprocess failed (%d)", rc); goto err; } rc = ccubes_build(ccubesctx); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_build failed (%d)", rc); goto err; } rc = ccubes_alloc(ccubesctx, p_implicants_pos, p_implicants_val, last_index, p_covered, p_pichart_pos, redundant, coverage, taskpis, fixed_bits, value_bits, pichart_values); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_alloc failed (%d)", rc); goto err; } rc = ccubes_run(ccubesctx); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_run failed (%d)", rc); goto err; } rc = ccubes_map(ccubesctx); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_map failed (%d)", rc); goto err; } err: return rc; } void ccubes_clean_up(struct ccubes_context *ctx) { clReleaseProgram(ctx->ccubes_program); log_debug("clccubes", "clReleaseProgram ccubes_program"); ccubes_unmap(ctx); log_debug("clccubes", "ccubes_unmap"); /* INPUTS */ clReleaseMemObject(ctx->p_implicants_pos); clReleaseMemObject(ctx->p_implicants_val); clReleaseMemObject(ctx->last_index); clReleaseMemObject(ctx->p_covered); clReleaseMemObject(ctx->p_pichart_pos); log_debug("clccubes", "clReleaseMemObject INPUTS"); /* OUTPUTS */ clReleaseMemObject(ctx->redundant); clReleaseMemObject(ctx->coverage); clReleaseMemObject(ctx->taskpis); clReleaseMemObject(ctx->fixed_bits); clReleaseMemObject(ctx->value_bits); clReleaseMemObject(ctx->pichart_values); log_debug("clccubes", "clReleaseMemObject OUTPUTS"); return; } void ccubes_destroy(struct ccubes_context *ctx) { clReleaseMemObject(ctx->nofvalues); clReleaseMemObject(ctx->ON_set); clReleaseMemObject(ctx->OFF_set); cl_clean_up(*ctx->clctx); log_debug("clccubes", "cl_clean_up"); free(ctx->ccubes_kernel_file); free(ctx); return; }