/* * Copyright (c) 2025 Paul Irofti * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include #include #include "real.h" #include "cl_setup.h" #include "clccubes.h" #include "config.h" #include "logging.h" int ccubes_init(struct ccubes_context *ctx, int n_tasks, int n_tasks_off, int k, int ninputs, int posrows, int negrows, int implicant_words, int value_bit_width, int pichart_words, int estimPI ) { int rc = 0; ctx->clctx = malloc(sizeof *ctx->clctx); if (ctx->clctx == NULL) { log_error("clccubes", "clctx malloc failed (%d)", rc); goto err; } ctx->k = k; ctx->ninputs = ninputs; ctx->posrows = posrows; ctx->negrows = negrows; ctx->implicant_words = implicant_words; ctx->value_bit_width = value_bit_width; ctx->pichart_words = pichart_words; ctx->estimPI = estimPI; ctx->gws = n_tasks; ctx->goff = n_tasks_off; err: return rc; } int ccubes_preprocess(struct ccubes_context *ctx) { int rc = 0; FILE *fp; fp = fopen("ccubes_generated.h", "w"); if (fp == NULL) { rc = -1; log_error("clccubes", "preprocess fopen failed (%d)", rc); goto err; } fprintf(fp, "#ifndef CCUBES_GENERATED_H__\n"); fprintf(fp, "#define CCUBES_GENERATED_H__\n"); fprintf(fp, "#define NINPUTS %d\n", ctx->ninputs); fprintf(fp, "#define POSROWS %d\n", ctx->posrows); fprintf(fp, "#define NEGROWS %d\n", ctx->negrows); fprintf(fp, "#define IMPLICANT_WORDS %d\n", ctx->implicant_words); fprintf(fp, "#define VALUE_BIT_WIDTH %d\n", ctx->value_bit_width); fprintf(fp, "#define PICHART_WORDS %d\n", ctx->pichart_words); fprintf(fp, "#endif"); fclose(fp); err: return rc; } int ccubes_build(struct ccubes_context *ctx) { int rc = 0; ctx->clctx->device_type = CL_DEVICE_TYPE_GPU; strcpy(ctx->clctx->platform_name, "NVIDIA Corporation\0"); /* strcpy(ctx->clctx->platform_name, "Advanced Micro Devices, Inc.\0"); */ /*strcpy(ctx->clctx->platform_name, "Intel(R) Corporation\0");*/ rc = cl_init(ctx->clctx); if (rc != CL_SUCCESS) { printf("[%d] Failed to initialize the OpenCL framework\n", rc); goto err; } rc = cl_build(*ctx->clctx, CL_DEVICE_TYPE_GPU, "ccubes.cl", &ctx->ccubes_program); if (rc != CL_SUCCESS) { log_warn("clccubes", "Failed building ccubes.cl (%d)", rc); goto err; } rc = cl_get_kern(ctx->ccubes_program, "ccubes_task", &ctx->ccubes_task); if (rc != CL_SUCCESS) { log_warn("clccubes", "Failed fetching ccubes_task (%d)", rc); goto err; } err: return rc; } int ccubes_alloc(struct ccubes_context *ctx, int *nofvalues, /* IN: RC */ int *ON_set, /* IN: RC */ int *OFF_set, /* IN: RC */ unsigned int *p_implicants_pos, /* IN: RC */ unsigned int *p_implicants_val, /* IN: RC */ int *last_index, /* IN: RC */ int *p_covered, /* IN: RC */ int *p_pichart_pos, /* IN: RC */ bool *coverage, /* OUT: RW */ unsigned int *fixed_bits, /* OUT: RW */ unsigned int *value_bits, /* OUT: RW */ unsigned int *pichart_values /* OUT: RW */ ) { int rc = 0; /* * Save outputs for mapping */ ctx->h_coverage = coverage; ctx->h_fixed_bits = fixed_bits; ctx->h_value_bits = value_bits; ctx->h_pichart_values = pichart_values; /* * INPUTS */ /* __global const int *nofvalues, IN: RC */ ctx->nofvalues = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->ninputs * sizeof(int), nofvalues, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *ON_set, IN: RC */ ctx->ON_set = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->posrows * ctx->ninputs * sizeof(int), ON_set, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *OFF_set, IN: RC */ ctx->OFF_set = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->ninputs * ctx->negrows * sizeof(int), OFF_set, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const unsigned int *p_implicants_pos, IN: RC */ ctx->p_implicants_pos = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_pos, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const unsigned int *p_implicants_val, IN: RC */ ctx->p_implicants_val = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_val, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *last_index, IN: RC */ ctx->last_index = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->posrows * sizeof(int), last_index, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *p_covered, IN: RC */ ctx->p_covered = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * sizeof(int), p_covered, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *p_pichart_pos, IN: RC */ ctx->p_pichart_pos = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, ctx->estimPI * ctx->pichart_words * sizeof(int), p_pichart_pos, &rc); if (rc != CL_SUCCESS) { goto err; } /* * OUTPUTS */ /* __global bool *coverage, OUT: RW */ ctx->coverage = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->posrows * sizeof(bool), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global unsigned int *fixed_bits, OUT: RW */ ctx->fixed_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global unsigned int *value_bits, OUT: RW */ ctx->value_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global unsigned int *pichart_values, OUT: RW */ ctx->pichart_values = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, ctx->gws * ctx->pichart_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } err: return rc; } int ccubes_run(struct ccubes_context *ctx) { int rc = 0; int arg = 0; /* INPUTS */ arg = 0; rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(int), (void *)&ctx->k); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->ON_set); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->OFF_set); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_implicants_pos); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_implicants_val); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->last_index); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_covered); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->p_pichart_pos); /* OUTPUTS */ rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->coverage); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->fixed_bits); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->value_bits); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->pichart_values); if (rc != CL_SUCCESS) { log_error("clccubes", "Kernel arguments failed (%d)", rc); goto err; } rc = clEnqueueNDRangeKernel(ctx->clctx->gpu_queue, ctx->ccubes_task, 1, &ctx->goff, &ctx->gws, NULL, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "NDRange failed (%d)", rc); goto err; } err: return rc; } int ccubes_map(struct ccubes_context *ctx) { int rc = 0; ctx->h_coverage = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->coverage, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "coverage mapping failed (%d)", rc); goto err; } ctx->h_fixed_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->fixed_bits, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->implicant_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "fixed_bits mapping failed (%d)", rc); goto err; } ctx->h_value_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->value_bits, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->implicant_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "value_bits mapping failed (%d)", rc); goto err; } ctx->h_pichart_values = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->pichart_values, CL_TRUE, CL_MAP_READ, 0, ctx->gws * ctx->pichart_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "pichart_values mapping failed (%d)", rc); goto err; } err: return rc; } int ccubes_unmap(struct ccubes_context *ctx) { int rc = 0; /* * UNMAP */ rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->coverage, ctx->h_coverage, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "coverage unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->fixed_bits, ctx->h_fixed_bits, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "fixed_bits unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->value_bits, ctx->h_value_bits, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "value_bits unmapping failed (%d)", rc); } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->pichart_values, ctx->h_pichart_values, 0, NULL, NULL); if (rc != CL_SUCCESS) { log_error("clccubes", "pichart_values unmapping failed (%d)", rc); } err: return rc; } int ccubes_do_tasks(int n_tasks, int n_tasks_off, int k, int ninputs, int posrows, int negrows, int implicant_words, int value_bit_width, int pichart_words, int estimPI, int *nofvalues, /* IN: RC */ int *ON_set, /* IN: RC */ int *OFF_set, /* IN: RC */ unsigned int *p_implicants_pos, /* IN: RC */ unsigned int *p_implicants_val, /* IN: RC */ int *last_index, /* IN: RC */ int *p_covered, /* IN: RC */ int *p_pichart_pos, /* IN: RC */ bool *coverage, /* OUT: RW */ unsigned int *fixed_bits, /* OUT: RW */ unsigned int *value_bits, /* OUT: RW */ unsigned int *pichart_values /* OUT: RW */ ) { int rc = 0; struct ccubes_context ccubesctx; rc = ccubes_init(&ccubesctx, n_tasks, n_tasks_off, k, ninputs, posrows, negrows, implicant_words, value_bit_width, pichart_words, estimPI); if (rc != 0) { log_error("clccubes", "ccubes_init failed (%d)", rc); goto err; } rc = ccubes_preprocess(&ccubesctx); if (rc != 0) { log_error("clccubes", "ccubes_preprocess failed (%d)", rc); goto err; } rc = ccubes_build(&ccubesctx); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_build failed (%d)", rc); goto err; } rc = ccubes_alloc(&ccubesctx, nofvalues, ON_set, OFF_set, p_implicants_pos, p_implicants_val, last_index, p_covered, p_pichart_pos, coverage, fixed_bits, value_bits, pichart_values); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_alloc failed (%d)", rc); goto err; } rc = ccubes_run(&ccubesctx); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_run failed (%d)", rc); goto err; } rc = ccubes_map(&ccubesctx); if (rc != CL_SUCCESS) { log_error("clccubes", "ccubes_map failed (%d)", rc); goto err; } err: return rc; } void ccubes_clean_up(struct ccubes_context *ctx) { clReleaseProgram(ctx->ccubes_program); ccubes_unmap(ctx); /* INPUTS */ clReleaseMemObject(ctx->nofvalues); clReleaseMemObject(ctx->ON_set); clReleaseMemObject(ctx->OFF_set); clReleaseMemObject(ctx->p_implicants_pos); clReleaseMemObject(ctx->p_implicants_val); clReleaseMemObject(ctx->last_index); clReleaseMemObject(ctx->p_covered); clReleaseMemObject(ctx->p_pichart_pos); /* OUTPUTS */ clReleaseMemObject(ctx->coverage); clReleaseMemObject(ctx->fixed_bits); clReleaseMemObject(ctx->value_bits); clReleaseMemObject(ctx->pichart_values); cl_clean_up(*ctx->clctx); return; }