ccubes-cl/src/clccubes.c

518 lines
14 KiB
C
Raw Normal View History

2025-03-26 11:50:07 +02:00
/*
* Copyright (c) 2025 Paul Irofti <paul@irofti.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
2025-03-21 18:52:41 +02:00
#include <stdio.h>
#include <string.h>
#include "real.h"
#include "cl_setup.h"
#include "clccubes.h"
#include "config.h"
#include "logging.h"
struct ccubes_context *
ccubes_init(int n_tasks,
2025-03-26 12:59:48 +02:00
int n_tasks_off,
int k,
int ninputs,
int posrows,
int negrows,
int implicant_words,
int value_bit_width,
int pichart_words,
int estimPI
)
2025-03-21 18:52:41 +02:00
{
int rc = 0;
struct ccubes_context *ctx = NULL;
ctx = malloc(sizeof *ctx);
if (ctx == NULL) {
log_error("clccubes", "ctx malloc failed");
goto err;
}
2025-03-21 18:52:41 +02:00
ctx->clctx = malloc(sizeof *ctx->clctx);
if (ctx->clctx == NULL) {
log_error("clccubes", "clctx malloc failed");
goto err;
}
ctx->k = k;
ctx->ninputs = ninputs;
ctx->posrows = posrows;
ctx->negrows = negrows;
ctx->implicant_words = implicant_words;
ctx->value_bit_width = value_bit_width;
ctx->pichart_words = pichart_words;
ctx->estimPI = estimPI;
2025-03-26 12:59:48 +02:00
ctx->gws = n_tasks;
ctx->goff = n_tasks_off;
err:
return ctx;
}
int
ccubes_preprocess(struct ccubes_context *ctx)
{
int rc = 0;
FILE *fp;
fp = fopen("ccubes_generated.h", "w");
if (fp == NULL) {
rc = -1;
log_error("clccubes", "preprocess fopen failed (%d)", rc);
goto err;
}
fprintf(fp, "#ifndef CCUBES_GENERATED_H__\n");
fprintf(fp, "#define CCUBES_GENERATED_H__\n");
fprintf(fp, "#define NINPUTS %d\n", ctx->ninputs);
fprintf(fp, "#define POSROWS %d\n", ctx->posrows);
fprintf(fp, "#define NEGROWS %d\n", ctx->negrows);
fprintf(fp, "#define IMPLICANT_WORDS %d\n", ctx->implicant_words);
fprintf(fp, "#define VALUE_BIT_WIDTH %d\n", ctx->value_bit_width);
fprintf(fp, "#define PICHART_WORDS %d\n", ctx->pichart_words);
2025-03-26 15:42:34 +02:00
fprintf(fp, "#endif\n");
fclose(fp);
err:
return rc;
}
int
ccubes_build(struct ccubes_context *ctx)
{
int rc = 0;
2025-03-21 18:52:41 +02:00
ctx->clctx->device_type = CL_DEVICE_TYPE_GPU;
strcpy(ctx->clctx->platform_name, "NVIDIA Corporation\0");
/* strcpy(ctx->clctx->platform_name, "Advanced Micro Devices, Inc.\0"); */
/*strcpy(ctx->clctx->platform_name, "Intel(R) Corporation\0");*/
rc = cl_init(ctx->clctx);
if (rc != CL_SUCCESS) {
printf("[%d] Failed to initialize the OpenCL framework\n",
rc);
goto err;
}
rc = cl_build(*ctx->clctx, CL_DEVICE_TYPE_GPU,
"ccubes.cl", &ctx->ccubes_program);
if (rc != CL_SUCCESS) {
log_warn("clccubes", "Failed building ccubes.cl (%d)", rc);
2025-03-21 18:52:41 +02:00
goto err;
}
rc = cl_get_kern(ctx->ccubes_program, "ccubes_task",
&ctx->ccubes_task);
if (rc != CL_SUCCESS) {
log_warn("clccubes", "Failed fetching ccubes_task (%d)", rc);
2025-03-21 18:52:41 +02:00
goto err;
}
err:
return rc;
}
int
ccubes_alloc(struct ccubes_context *ctx,
2025-03-26 15:15:57 +02:00
int *nofvalues, /* IN: RC */
int *ON_set, /* IN: RC */
int *OFF_set, /* IN: RC */
2025-03-26 11:41:28 +02:00
unsigned int *p_implicants_pos, /* IN: RC */
unsigned int *p_implicants_val, /* IN: RC */
int *last_index, /* IN: RC */
int *p_covered, /* IN: RC */
int *p_pichart_pos, /* IN: RC */
bool *coverage, /* OUT: RW */
unsigned int *fixed_bits, /* OUT: RW */
unsigned int *value_bits, /* OUT: RW */
unsigned int *pichart_values /* OUT: RW */
)
{
int rc = 0;
/*
* Save outputs for mapping
*/
ctx->h_coverage = coverage;
ctx->h_fixed_bits = fixed_bits;
ctx->h_value_bits = value_bits;
ctx->h_pichart_values = pichart_values;
/*
* INPUTS
*/
2025-03-26 15:15:57 +02:00
/* __global const int *nofvalues, IN: RC */
ctx->nofvalues = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
2025-03-26 15:15:57 +02:00
ctx->ninputs * sizeof(int), nofvalues, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
2025-03-26 15:15:57 +02:00
/* __global const int *ON_set, IN: RC */
ctx->ON_set = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
2025-03-26 15:15:57 +02:00
ctx->posrows * ctx->ninputs * sizeof(int), ON_set, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
2025-03-26 15:15:57 +02:00
/* __global const int *OFF_set, IN: RC */
ctx->OFF_set = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
2025-03-26 15:15:57 +02:00
ctx->ninputs * ctx->negrows * sizeof(int), OFF_set, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const unsigned int *p_implicants_pos, IN: RC */
ctx->p_implicants_pos = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_pos, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const unsigned int *p_implicants_val, IN: RC */
ctx->p_implicants_val = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_val, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const int *last_index, IN: RC */
ctx->last_index = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->posrows * sizeof(int), last_index, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const int *p_covered, IN: RC */
ctx->p_covered = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * sizeof(int), p_covered, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const int *p_pichart_pos, IN: RC */
ctx->p_pichart_pos = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * ctx->pichart_words * sizeof(int), p_pichart_pos, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/*
* OUTPUTS
*/
/* __global bool *coverage, OUT: RW */
ctx->coverage = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->posrows * sizeof(bool), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global unsigned int *fixed_bits, OUT: RW */
ctx->fixed_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global unsigned int *value_bits, OUT: RW */
ctx->value_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global unsigned int *pichart_values, OUT: RW */
ctx->pichart_values = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->pichart_words * sizeof(unsigned int), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
2025-03-21 18:52:41 +02:00
err:
return rc;
}
int
ccubes_run(struct ccubes_context *ctx)
2025-03-21 18:52:41 +02:00
{
int rc = 0;
int arg = 0;
2025-03-21 18:52:41 +02:00
/* INPUTS */
arg = 0;
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(int), (void *)&ctx->k);
2025-03-26 15:51:20 +02:00
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->nofvalues);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->ON_set);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->OFF_set);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_implicants_pos);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_implicants_val);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->last_index);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_covered);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_pichart_pos);
/* OUTPUTS */
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->coverage);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->fixed_bits);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->value_bits);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->pichart_values);
if (rc != CL_SUCCESS) {
log_error("clccubes", "Kernel arguments failed (%d)", rc);
goto err;
}
2025-03-27 19:57:16 +02:00
cl_event event;
log_debug("clccubes", "NDRange gws %d, goff %d", ctx->gws, ctx-> goff);
rc = clEnqueueNDRangeKernel(ctx->clctx->gpu_queue, ctx->ccubes_task,
2025-03-26 12:59:48 +02:00
1, &ctx->goff, &ctx->gws, NULL,
2025-03-27 19:57:16 +02:00
0, NULL, &event);
if (rc != CL_SUCCESS) {
log_error("clccubes", "NDRange failed (%d)", rc);
goto err;
}
2025-03-27 19:57:16 +02:00
clWaitForEvents(1, &event);
err:
return rc;
2025-03-21 18:52:41 +02:00
}
int
ccubes_map(struct ccubes_context *ctx)
2025-03-21 18:52:41 +02:00
{
int rc = 0;
ctx->h_coverage = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->coverage, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "coverage mapping failed (%d)", rc);
goto err;
}
ctx->h_fixed_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->fixed_bits, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->implicant_words * sizeof(unsigned int),
0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "fixed_bits mapping failed (%d)", rc);
goto err;
}
ctx->h_value_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->value_bits, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->implicant_words * sizeof(unsigned int),
0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "value_bits mapping failed (%d)", rc);
goto err;
}
ctx->h_pichart_values = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->pichart_values, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->pichart_words * sizeof(unsigned int),
0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "pichart_values mapping failed (%d)", rc);
goto err;
}
err:
return rc;
}
2025-03-26 11:41:28 +02:00
int
ccubes_unmap(struct ccubes_context *ctx)
{
int rc = 0;
/*
* UNMAP
*/
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->coverage,
ctx->h_coverage, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "coverage unmapping failed (%d)", rc);
}
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->fixed_bits,
ctx->h_fixed_bits, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "fixed_bits unmapping failed (%d)", rc);
}
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->value_bits,
ctx->h_value_bits, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "value_bits unmapping failed (%d)", rc);
}
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->pichart_values,
ctx->h_pichart_values, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "pichart_values unmapping failed (%d)", rc);
}
err:
return rc;
}
struct ccubes_context *
2025-03-26 15:15:57 +02:00
ccubes_do_tasks(int n_tasks,
2025-03-26 12:59:48 +02:00
int n_tasks_off,
int k,
int ninputs,
int posrows,
int negrows,
int implicant_words,
int value_bit_width,
int pichart_words,
int estimPI,
2025-03-26 15:15:57 +02:00
int *nofvalues, /* IN: RC */
int *ON_set, /* IN: RC */
int *OFF_set, /* IN: RC */
2025-03-26 11:41:28 +02:00
unsigned int *p_implicants_pos, /* IN: RC */
unsigned int *p_implicants_val, /* IN: RC */
int *last_index, /* IN: RC */
int *p_covered, /* IN: RC */
int *p_pichart_pos, /* IN: RC */
bool *coverage, /* OUT: RW */
unsigned int *fixed_bits, /* OUT: RW */
unsigned int *value_bits, /* OUT: RW */
unsigned int *pichart_values /* OUT: RW */
)
{
int rc = 0;
struct ccubes_context *ccubesctx = NULL;
2025-03-21 18:52:41 +02:00
ccubesctx = ccubes_init(n_tasks,
2025-03-26 12:59:48 +02:00
n_tasks_off,
k,
ninputs,
posrows,
negrows,
implicant_words,
value_bit_width,
pichart_words,
estimPI);
if (ccubesctx == NULL) {
log_error("clccubes", "ccubes_init failed (%d)", rc);
goto err;
}
rc = ccubes_preprocess(ccubesctx);
if (rc != 0) {
log_error("clccubes", "ccubes_preprocess failed (%d)", rc);
goto err;
}
rc = ccubes_build(ccubesctx);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_build failed (%d)", rc);
goto err;
}
rc = ccubes_alloc(ccubesctx,
nofvalues,
ON_set,
OFF_set,
p_implicants_pos,
p_implicants_val,
last_index,
p_covered,
p_pichart_pos,
coverage,
fixed_bits,
value_bits,
2025-03-26 11:41:28 +02:00
pichart_values);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_alloc failed (%d)", rc);
goto err;
}
rc = ccubes_run(ccubesctx);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_run failed (%d)", rc);
goto err;
}
2025-03-21 18:52:41 +02:00
rc = ccubes_map(ccubesctx);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_map failed (%d)", rc);
goto err;
}
2025-03-21 18:52:41 +02:00
err:
return ccubesctx;
2025-03-21 18:52:41 +02:00
}
void
ccubes_clean_up(struct ccubes_context *ctx)
{
clReleaseProgram(ctx->ccubes_program);
log_debug("clccubes", "clReleaseProgram ccubes_program");
ccubes_unmap(ctx);
log_debug("clccubes", "ccubes_unmap");
/* INPUTS */
clReleaseMemObject(ctx->nofvalues);
clReleaseMemObject(ctx->ON_set);
clReleaseMemObject(ctx->OFF_set);
clReleaseMemObject(ctx->p_implicants_pos);
clReleaseMemObject(ctx->p_implicants_val);
clReleaseMemObject(ctx->last_index);
clReleaseMemObject(ctx->p_covered);
clReleaseMemObject(ctx->p_pichart_pos);
log_debug("clccubes", "clReleaseMemObject INPUTS");
/* OUTPUTS */
clReleaseMemObject(ctx->coverage);
clReleaseMemObject(ctx->fixed_bits);
clReleaseMemObject(ctx->value_bits);
clReleaseMemObject(ctx->pichart_values);
log_debug("clccubes", "clReleaseMemObject OUTPUTS");
cl_clean_up(*ctx->clctx);
log_debug("clccubes", "cl_clean_up");
free(ctx);
return;
}