ccubes-cl/clccubes.c

512 lines
13 KiB
C
Raw Normal View History

2025-03-26 11:50:07 +02:00
/*
* Copyright (c) 2025 Paul Irofti <paul@irofti.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
2025-03-21 18:52:41 +02:00
#include <stdio.h>
#include <string.h>
#include "real.h"
#include "cl_setup.h"
#include "clccubes.h"
#include "config.h"
#include "logging.h"
2025-03-26 11:41:28 +02:00
unsigned long int
nchoosek(int n, int k)
{
if (k == 0 || k == n) return 1;
if (k == 1) return n;
unsigned long int result = 1;
if (k > n - k) {
k = n - k;
}
for (int i = 0; i < k; i++) {
result = result * (n - i) / (i + 1);
}
return result;
}
2025-03-21 18:52:41 +02:00
int
ccubes_init(struct ccubes_context *ctx,
int k,
int ninputs,
int posrows,
int negrows,
int implicant_words,
int value_bit_width,
int pichart_words,
int estimPI
)
2025-03-21 18:52:41 +02:00
{
int rc = 0;
ctx->clctx = malloc(sizeof *ctx->clctx);
if (ctx->clctx == NULL) {
log_error("clccubes", "clctx malloc failed (%d)", rc);
goto err;
}
ctx->k = k;
ctx->ninputs = ninputs;
ctx->posrows = posrows;
ctx->negrows = negrows;
ctx->implicant_words = implicant_words;
ctx->value_bit_width = value_bit_width;
ctx->pichart_words = pichart_words;
ctx->estimPI = estimPI;
ctx->gws = nchoosek(ninputs, k);
err:
return rc;
}
int
ccubes_preprocess(struct ccubes_context *ctx)
{
int rc = 0;
FILE *fp;
fp = fopen("ccubes_generated.h", "w");
if (fp == NULL) {
rc = -1;
log_error("clccubes", "preprocess fopen failed (%d)", rc);
goto err;
}
fprintf(fp, "#ifndef CCUBES_GENERATED_H__\n");
fprintf(fp, "#define CCUBES_GENERATED_H__\n");
fprintf(fp, "#define NINPUTS %d\n", ctx->ninputs);
fprintf(fp, "#define POSROWS %d\n", ctx->posrows);
fprintf(fp, "#define NEGROWS %d\n", ctx->negrows);
fprintf(fp, "#define IMPLICANT_WORDS %d\n", ctx->implicant_words);
fprintf(fp, "#define VALUE_BIT_WIDTH %d\n", ctx->value_bit_width);
fprintf(fp, "#define PICHART_WORDS %d\n", ctx->pichart_words);
fprintf(fp, "#endif");
fclose(fp);
err:
return rc;
}
int
ccubes_build(struct ccubes_context *ctx)
{
int rc = 0;
2025-03-21 18:52:41 +02:00
ctx->clctx->device_type = CL_DEVICE_TYPE_GPU;
strcpy(ctx->clctx->platform_name, "NVIDIA Corporation\0");
/* strcpy(ctx->clctx->platform_name, "Advanced Micro Devices, Inc.\0"); */
/*strcpy(ctx->clctx->platform_name, "Intel(R) Corporation\0");*/
rc = cl_init(ctx->clctx);
if (rc != CL_SUCCESS) {
printf("[%d] Failed to initialize the OpenCL framework\n",
rc);
goto err;
}
rc = cl_build(*ctx->clctx, CL_DEVICE_TYPE_GPU,
"ccubes.cl", &ctx->ccubes_program);
if (rc != CL_SUCCESS) {
log_warn("clccubes", "Failed building ccubes.cl (%d)", rc);
2025-03-21 18:52:41 +02:00
goto err;
}
rc = cl_get_kern(ctx->ccubes_program, "ccubes_task",
&ctx->ccubes_task);
if (rc != CL_SUCCESS) {
log_warn("clccubes", "Failed fetching ccubes_task (%d)", rc);
2025-03-21 18:52:41 +02:00
goto err;
}
err:
return rc;
}
int
ccubes_alloc(struct ccubes_context *ctx,
2025-03-26 11:41:28 +02:00
real *nofvalues, /* IN: RC */
real *ON_set, /* IN: RC */
real *OFF_set, /* IN: RC */
unsigned int *p_implicants_pos, /* IN: RC */
unsigned int *p_implicants_val, /* IN: RC */
int *last_index, /* IN: RC */
int *p_covered, /* IN: RC */
int *p_pichart_pos, /* IN: RC */
bool *coverage, /* OUT: RW */
unsigned int *fixed_bits, /* OUT: RW */
unsigned int *value_bits, /* OUT: RW */
unsigned int *pichart_values /* OUT: RW */
)
{
int rc = 0;
/*
* Save outputs for mapping
*/
ctx->h_coverage = coverage;
ctx->h_fixed_bits = fixed_bits;
ctx->h_value_bits = value_bits;
ctx->h_pichart_values = pichart_values;
/*
* INPUTS
*/
/* __global const real *nofvalues, IN: RC */
ctx->nofvalues = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->ninputs * sizeof(real), nofvalues, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const real *ON_set, IN: RC */
ctx->ON_set = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->posrows * ctx->ninputs * sizeof(real), ON_set, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const real *OFF_set, IN: RC */
ctx->OFF_set = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->ninputs * ctx->negrows * sizeof(real), OFF_set, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const unsigned int *p_implicants_pos, IN: RC */
ctx->p_implicants_pos = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_pos, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const unsigned int *p_implicants_val, IN: RC */
ctx->p_implicants_val = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_val, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const int *last_index, IN: RC */
ctx->last_index = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->posrows * sizeof(int), last_index, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const int *p_covered, IN: RC */
ctx->p_covered = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * sizeof(int), p_covered, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global const int *p_pichart_pos, IN: RC */
ctx->p_pichart_pos = clCreateBuffer(ctx->clctx->ctx,
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
ctx->estimPI * ctx->pichart_words * sizeof(int), p_pichart_pos, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/*
* OUTPUTS
*/
/* __global bool *coverage, OUT: RW */
ctx->coverage = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->posrows * sizeof(bool), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global unsigned int *fixed_bits, OUT: RW */
ctx->fixed_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global unsigned int *value_bits, OUT: RW */
ctx->value_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
/* __global unsigned int *pichart_values, OUT: RW */
ctx->pichart_values = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE,
ctx->gws * ctx->pichart_words * sizeof(unsigned int), NULL, &rc);
if (rc != CL_SUCCESS) {
goto err;
}
2025-03-21 18:52:41 +02:00
err:
return rc;
}
int
ccubes_run(struct ccubes_context *ctx)
2025-03-21 18:52:41 +02:00
{
int rc = 0;
int arg = 0;
2025-03-21 18:52:41 +02:00
/* INPUTS */
arg = 0;
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(int), (void *)&ctx->k);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->ON_set);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->OFF_set);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_implicants_pos);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_implicants_val);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->last_index);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_covered);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->p_pichart_pos);
/* OUTPUTS */
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->coverage);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->fixed_bits);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->value_bits);
rc |= clSetKernelArg(ctx->ccubes_task, arg++,
sizeof(cl_mem), (void *)&ctx->pichart_values);
if (rc != CL_SUCCESS) {
log_error("clccubes", "Kernel arguments failed (%d)", rc);
goto err;
}
rc = clEnqueueNDRangeKernel(ctx->clctx->gpu_queue, ctx->ccubes_task,
2025-03-26 11:41:28 +02:00
1, NULL, &ctx->gws, NULL,
0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "NDRange failed (%d)", rc);
goto err;
}
err:
return rc;
2025-03-21 18:52:41 +02:00
}
int
ccubes_map(struct ccubes_context *ctx)
2025-03-21 18:52:41 +02:00
{
int rc = 0;
ctx->h_coverage = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->coverage, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "coverage mapping failed (%d)", rc);
goto err;
}
ctx->h_fixed_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->fixed_bits, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->implicant_words * sizeof(unsigned int),
0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "fixed_bits mapping failed (%d)", rc);
goto err;
}
ctx->h_value_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->value_bits, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->implicant_words * sizeof(unsigned int),
0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "value_bits mapping failed (%d)", rc);
goto err;
}
ctx->h_pichart_values = clEnqueueMapBuffer(ctx->clctx->gpu_queue,
ctx->pichart_values, CL_TRUE, CL_MAP_READ, 0,
ctx->gws * ctx->pichart_words * sizeof(unsigned int),
0, NULL, NULL, &rc);
if (rc != CL_SUCCESS) {
log_error("clccubes", "pichart_values mapping failed (%d)", rc);
goto err;
}
err:
return rc;
}
2025-03-26 11:41:28 +02:00
int
ccubes_unmap(struct ccubes_context *ctx)
{
int rc = 0;
/*
* UNMAP
*/
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->coverage,
ctx->h_coverage, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "coverage unmapping failed (%d)", rc);
}
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->fixed_bits,
ctx->h_fixed_bits, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "fixed_bits unmapping failed (%d)", rc);
}
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->value_bits,
ctx->h_value_bits, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "value_bits unmapping failed (%d)", rc);
}
rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->pichart_values,
ctx->h_pichart_values, 0, NULL, NULL);
if (rc != CL_SUCCESS) {
log_error("clccubes", "pichart_values unmapping failed (%d)", rc);
}
err:
return rc;
}
int
ccubes(int k,
int ninputs,
int posrows,
int negrows,
int implicant_words,
int value_bit_width,
int pichart_words,
int estimPI,
2025-03-26 11:41:28 +02:00
real *nofvalues, /* IN: RC */
real *ON_set, /* IN: RC */
real *OFF_set, /* IN: RC */
unsigned int *p_implicants_pos, /* IN: RC */
unsigned int *p_implicants_val, /* IN: RC */
int *last_index, /* IN: RC */
int *p_covered, /* IN: RC */
int *p_pichart_pos, /* IN: RC */
bool *coverage, /* OUT: RW */
unsigned int *fixed_bits, /* OUT: RW */
unsigned int *value_bits, /* OUT: RW */
unsigned int *pichart_values /* OUT: RW */
)
{
int rc = 0;
2025-03-21 18:52:41 +02:00
struct ccubes_context ccubesctx;
rc = ccubes_init(&ccubesctx,
k,
ninputs,
posrows,
negrows,
implicant_words,
value_bit_width,
pichart_words,
estimPI);
if (rc != 0) {
log_error("clccubes", "ccubes_init failed (%d)", rc);
goto err;
}
rc = ccubes_preprocess(&ccubesctx);
if (rc != 0) {
log_error("clccubes", "ccubes_preprocess failed (%d)", rc);
goto err;
}
rc = ccubes_build(&ccubesctx);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_build failed (%d)", rc);
goto err;
}
rc = ccubes_alloc(&ccubesctx,
nofvalues,
ON_set,
OFF_set,
p_implicants_pos,
p_implicants_val,
last_index,
p_covered,
p_pichart_pos,
coverage,
fixed_bits,
value_bits,
2025-03-26 11:41:28 +02:00
pichart_values);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_alloc failed (%d)", rc);
goto err;
}
rc = ccubes_run(&ccubesctx);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_run failed (%d)", rc);
goto err;
}
2025-03-21 18:52:41 +02:00
rc = ccubes_map(&ccubesctx);
if (rc != CL_SUCCESS) {
log_error("clccubes", "ccubes_map failed (%d)", rc);
goto err;
}
2025-03-21 18:52:41 +02:00
err:
return rc;
2025-03-21 18:52:41 +02:00
}
void
ccubes_clean_up(struct ccubes_context *ctx)
{
clReleaseProgram(ctx->ccubes_program);
ccubes_unmap(ctx);
/* INPUTS */
clReleaseMemObject(ctx->nofvalues);
clReleaseMemObject(ctx->ON_set);
clReleaseMemObject(ctx->OFF_set);
clReleaseMemObject(ctx->p_implicants_pos);
clReleaseMemObject(ctx->p_implicants_val);
clReleaseMemObject(ctx->last_index);
clReleaseMemObject(ctx->p_covered);
clReleaseMemObject(ctx->p_pichart_pos);
/* OUTPUTS */
clReleaseMemObject(ctx->coverage);
clReleaseMemObject(ctx->fixed_bits);
clReleaseMemObject(ctx->value_bits);
clReleaseMemObject(ctx->pichart_values);
cl_clean_up(*ctx->clctx);
return;
}