From 8d915c7984ed2ab23c33dbaf2a569ed5a7a3d4fd Mon Sep 17 00:00:00 2001 From: Paul Irofti Date: Wed, 26 Mar 2025 11:34:01 +0200 Subject: [PATCH] Finished initial draft. Untested. Write-only. --- ccubes.cl | 16 ++- clccubes.c | 398 ++++++++++++++++++++++++++++++++++++++++++++++++----- clccubes.h | 20 ++- 3 files changed, 395 insertions(+), 39 deletions(-) diff --git a/ccubes.cl b/ccubes.cl index 89a1f90..d354bf1 100644 --- a/ccubes.cl +++ b/ccubes.cl @@ -1,3 +1,5 @@ +#include "ccubes_generated.h" + #ifdef USE_DOUBLE #pragma OPENCL EXTENSION cl_khr_fp64 : enable typedef double real; @@ -80,12 +82,14 @@ nchoosek(int n, int k) * * NOTE: Both input and output must be allocated before calling this funciton. */ +#if 0 #define NINPUTS 64 #define POSROWS 128 #define NEGROWS 128 #define IMPLICANT_WORDS 64 #define VALUE_BIT_WIDTH 32 #define PICHART_WORDS 8 +#endif __kernel void ccubes_task(int k, __global const real *nofvalues, /* IN: RC */ @@ -96,10 +100,10 @@ ccubes_task(int k, __global const int *last_index, /* IN: RC */ __global const int *p_covered, /* IN: RC */ __global const int *p_pichart_pos, /* IN: RC */ - __global bool *coverage, /* OUT: RW */ - __global unsigned int *fixed_bits, /* OUT: RW */ - __global unsigned int *value_bits, /* OUT: RW */ - __global unsigned int *pichart_values /* OUT: RW */ + __global bool *g_coverage, /* OUT: RW */ + __global unsigned int *g_fixed_bits, /* OUT: RW */ + __global unsigned int *g_value_bits, /* OUT: RW */ + __global unsigned int *g_pichart_values /* OUT: RW */ ) { /* work-item?: task in nchoosek(ninputs, k) */ @@ -107,6 +111,10 @@ ccubes_task(int k, /* total work: tasks in nchoosek for k in 1 to ninputs */ size_t task = get_global_id(0); + bool *coverage = &g_coverage[task * POSROWS]; + unsigned int *fixed_bits = &g_fixed_bits[task * IMPLICANT_WORDS]; + unsigned int *value_bits = &g_value_bits[task * IMPLICANT_WORDS]; + unsigned int *pichart_values = &g_pichart_values[task * PICHART_WORDS]; int prevfoundPI = 0; diff --git a/clccubes.c b/clccubes.c index d60d7c8..9622de6 100644 --- a/clccubes.c +++ b/clccubes.c @@ -11,11 +11,76 @@ int -ccubes_build(struct ccubes_context *ctx) +ccubes_init(struct ccubes_context *ctx, + int k, + int ninputs, + int posrows, + int negrows, + int implicant_words, + int value_bit_width, + int pichart_words, + int estimPI + ) { int rc = 0; ctx->clctx = malloc(sizeof *ctx->clctx); + if (ctx->clctx == NULL) { + log_error("clccubes", "clctx malloc failed (%d)", rc); + goto err; + } + + ctx->k = k; + ctx->ninputs = ninputs; + ctx->posrows = posrows; + ctx->negrows = negrows; + ctx->implicant_words = implicant_words; + ctx->value_bit_width = value_bit_width; + ctx->pichart_words = pichart_words; + ctx->estimPI = estimPI; + + ctx->gws = nchoosek(ninputs, k); + +err: + return rc; +} + +int +ccubes_preprocess(struct ccubes_context *ctx) +{ + int rc = 0; + FILE *fp; + + fp = fopen("ccubes_generated.h", "w"); + if (fp == NULL) { + rc = -1; + log_error("clccubes", "preprocess fopen failed (%d)", rc); + goto err; + } + + fprintf(fp, "#ifndef CCUBES_GENERATED_H__\n"); + fprintf(fp, "#define CCUBES_GENERATED_H__\n"); + + fprintf(fp, "#define NINPUTS %d\n", ctx->ninputs); + fprintf(fp, "#define POSROWS %d\n", ctx->posrows); + fprintf(fp, "#define NEGROWS %d\n", ctx->negrows); + fprintf(fp, "#define IMPLICANT_WORDS %d\n", ctx->implicant_words); + fprintf(fp, "#define VALUE_BIT_WIDTH %d\n", ctx->value_bit_width); + fprintf(fp, "#define PICHART_WORDS %d\n", ctx->pichart_words); + + fprintf(fp, "#endif"); + + fclose(fp); + +err: + return rc; +} + + +int +ccubes_build(struct ccubes_context *ctx) +{ + int rc = 0; ctx->clctx->device_type = CL_DEVICE_TYPE_GPU; strcpy(ctx->clctx->platform_name, "NVIDIA Corporation\0"); @@ -32,17 +97,47 @@ ccubes_build(struct ccubes_context *ctx) rc = cl_build(*ctx->clctx, CL_DEVICE_TYPE_GPU, "ccubes.cl", &ctx->ccubes_program); if (rc != CL_SUCCESS) { - log_warn("test", "Failed building ccubes.cl (%d)", rc); + log_warn("clccubes", "Failed building ccubes.cl (%d)", rc); goto err; } rc = cl_get_kern(ctx->ccubes_program, "ccubes_task", &ctx->ccubes_task); if (rc != CL_SUCCESS) { - log_warn("test", "Failed fetching ccubes_task (%d)", rc); + log_warn("clccubes", "Failed fetching ccubes_task (%d)", rc); goto err; } +err: + return rc; +} + +int +ccubes_alloc(struct ccubes_context *ctx, + const real *nofvalues, /* IN: RC */ + const real *ON_set, /* IN: RC */ + const real *OFF_set, /* IN: RC */ + const unsigned int *p_implicants_pos, /* IN: RC */ + const unsigned int *p_implicants_val, /* IN: RC */ + const int *last_index, /* IN: RC */ + const int *p_covered, /* IN: RC */ + const int *p_pichart_pos, /* IN: RC */ + bool *coverage, /* OUT: RW */ + unsigned int *fixed_bits, /* OUT: RW */ + unsigned int *value_bits, /* OUT: RW */ + unsigned int *pichart_values /* OUT: RW */ + ) +{ + int rc = 0; + + /* + * Save outputs for mapping + */ + ctx->h_coverage = coverage; + ctx->h_fixed_bits = fixed_bits; + ctx->h_value_bits = value_bits; + ctx->h_pichart_values = pichart_values; + /* * INPUTS */ @@ -50,61 +145,304 @@ ccubes_build(struct ccubes_context *ctx) /* __global const real *nofvalues, IN: RC */ ctx->nofvalues = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - NINPUTS * sizeof(real), nofvalues, &rc); + ctx->ninputs * sizeof(real), nofvalues, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const real *ON_set, IN: RC */ ctx->ON_set = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - POSROWS * NINPUTS * sizeof(real), ON_set, &rc); + ctx->posrows * ctx->ninputs * sizeof(real), ON_set, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const real *OFF_set, IN: RC */ ctx->OFF_set = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - NINPUTS * NEGROWS * sizeof(real), OFF_set, &rc); + ctx->ninputs * ctx->negrows * sizeof(real), OFF_set, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const unsigned int *p_implicants_pos, IN: RC */ ctx->p_implicants_pos = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - estimPI * IMPLICANT_WORDS * sizeof(int), p_implicants_pos, &rc); + ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_pos, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const unsigned int *p_implicants_val, IN: RC */ ctx->p_implicants_val = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - estimPI * IMPLICANT_WORDS * sizeof(int), p_implicants_val, &rc); + ctx->estimPI * ctx->implicant_words * sizeof(int), p_implicants_val, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *last_index, IN: RC */ ctx->last_index = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - POSROWS * sizeof(int), last_index, &rc); + ctx->posrows * sizeof(int), last_index, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *p_covered, IN: RC */ ctx->p_covered = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - estimPI * sizeof(int), p_covered, &rc); + ctx->estimPI * sizeof(int), p_covered, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global const int *p_pichart_pos, IN: RC */ ctx->p_pichart_pos = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - estimPI * PICHART_WORDS * sizeof(int), p_pichart_pos, &rc); + ctx->estimPI * ctx->pichart_words * sizeof(int), p_pichart_pos, &rc); if (rc != CL_SUCCESS) { goto err; } + /* + * OUTPUTS + */ + /* __global bool *coverage, OUT: RW */ + ctx->coverage = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, + ctx->gws * ctx->posrows * sizeof(bool), NULL, &rc); + if (rc != CL_SUCCESS) { + goto err; + } + + /* __global unsigned int *fixed_bits, OUT: RW */ + ctx->fixed_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, + ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); + if (rc != CL_SUCCESS) { + goto err; + } + + /* __global unsigned int *value_bits, OUT: RW */ + ctx->value_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, + ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); + if (rc != CL_SUCCESS) { + goto err; + } + + /* __global unsigned int *pichart_values, OUT: RW */ + ctx->pichart_values = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, + ctx->gws * ctx->pichart_words * sizeof(unsigned int), NULL, &rc); + if (rc != CL_SUCCESS) { + goto err; + } + +err: + return rc; +} + +int +ccubes_run(struct ccubes_context *ctx) +{ + int rc = 0; + int arg = 0; + + /* INPUTS */ + arg = 0; + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(int), (void *)&ctx->k); + + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->ON_set); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->OFF_set); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->p_implicants_pos); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->p_implicants_val); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->last_index); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->p_covered); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->p_pichart_pos); + + /* OUTPUTS */ + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->coverage); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->fixed_bits); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->value_bits); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->pichart_values); + if (rc != CL_SUCCESS) { + log_error("clccubes", "Kernel arguments failed (%d)", rc); + goto err; + } + + rc = clEnqueueNDRangeKernel(ctx->clctx->gpu_queue, ctx->ccubes_task, + 1, NULL, ctx->gws, NULL, + 0, NULL, &ev_ksvd[1]); + if (rc != CL_SUCCESS) { + log_error("clccubes", "NDRange failed (%d)", rc); + goto err; + } + +err: + return rc; +} + +int +ccubes_map(struct ccubes_context *ctx) +{ + int rc = 0; + + ctx->h_coverage = clEnqueueMapBuffer(ctx->clctx->gpu_queue, + ctx->coverage, CL_TRUE, CL_MAP_READ, 0, + ctx->gws * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc); + if (rc != CL_SUCCESS) { + log_error("clccubes", "coverage mapping failed (%d)", rc); + goto err; + } + ctx->h_fixed_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, + ctx->fixed_bits, CL_TRUE, CL_MAP_READ, 0, + ctx->gws * ctx->implicant_words * sizeof(unsigned int), + 0, NULL, NULL, &rc); + if (rc != CL_SUCCESS) { + log_error("clccubes", "fixed_bits mapping failed (%d)", rc); + goto err; + } + ctx->h_value_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, + ctx->value_bits, CL_TRUE, CL_MAP_READ, 0, + ctx->gws * ctx->implicant_words * sizeof(unsigned int), + 0, NULL, NULL, &rc); + if (rc != CL_SUCCESS) { + log_error("clccubes", "value_bits mapping failed (%d)", rc); + goto err; + } + ctx->h_pichart_values = clEnqueueMapBuffer(ctx->clctx->gpu_queue, + ctx->pichart_values, CL_TRUE, CL_MAP_READ, 0, + ctx->gws * ctx->pichart_words * sizeof(unsigned int), + 0, NULL, NULL, &rc); + if (rc != CL_SUCCESS) { + log_error("clccubes", "pichart_values mapping failed (%d)", rc); + goto err; + } + +err: + return rc; +} + +void +ccubes_unmap(struct ccubes_context *ctx) +{ + int rc = 0; + + /* + * UNMAP + */ + rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->coverage, + ctx->h_coverage, 0, NULL, NULL); + if (rc != CL_SUCCESS) { + log_error("clccubes", "coverage unmapping failed (%d)", rc); + } + rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->fixed_bits, + ctx->h_fixed_bits, 0, NULL, NULL); + if (rc != CL_SUCCESS) { + log_error("clccubes", "fixed_bits unmapping failed (%d)", rc); + } + rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->value_bits, + ctx->h_value_bits, 0, NULL, NULL); + if (rc != CL_SUCCESS) { + log_error("clccubes", "value_bits unmapping failed (%d)", rc); + } + rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->pichart_values, + ctx->h_pichart_values, 0, NULL, NULL); + if (rc != CL_SUCCESS) { + log_error("clccubes", "pichart_values unmapping failed (%d)", rc); + } + +err: + return rc; +} + +int +ccubes(int k, + int ninputs, + int posrows, + int negrows, + int implicant_words, + int value_bit_width, + int pichart_words, + int estimPI, + const real *nofvalues, /* IN: RC */ + const real *ON_set, /* IN: RC */ + const real *OFF_set, /* IN: RC */ + const unsigned int *p_implicants_pos, /* IN: RC */ + const unsigned int *p_implicants_val, /* IN: RC */ + const int *last_index, /* IN: RC */ + const int *p_covered, /* IN: RC */ + const int *p_pichart_pos, /* IN: RC */ + bool *coverage, /* OUT: RW */ + unsigned int *fixed_bits, /* OUT: RW */ + unsigned int *value_bits, /* OUT: RW */ + unsigned int *pichart_values /* OUT: RW */ + ) +{ + int rc = 0; + struct ccubes_context ccubesctx; + + rc = ccubes_init(&ccubesctx, + k, + ninputs, + posrows, + negrows, + implicant_words, + value_bit_width, + pichart_words, + estimPI); + if (rc != 0) { + log_error("clccubes", "ccubes_init failed (%d)", rc); + goto err; + } + + rc = ccubes_preprocess(&ccubesctx); + if (rc != 0) { + log_error("clccubes", "ccubes_preprocess failed (%d)", rc); + goto err; + } + + rc = ccubes_build(&ccubesctx); + if (rc != CL_SUCCESS) { + log_error("clccubes", "ccubes_build failed (%d)", rc); + goto err; + } + + rc = ccubes_alloc(&ccubesctx, + nofvalues, + ON_set, + OFF_set, + p_implicants_pos, + p_implicants_val, + last_index, + p_covered, + p_pichart_pos, + coverage, + fixed_bits, + value_bits, + pichart_values + ) + if (rc != CL_SUCCESS) { + log_error("clccubes", "ccubes_alloc failed (%d)", rc); + goto err; + } + + rc = ccubes_run(&ccubesctx); + if (rc != CL_SUCCESS) { + log_error("clccubes", "ccubes_run failed (%d)", rc); + goto err; + } + + rc = ccubes_map(&ccubesctx); + if (rc != CL_SUCCESS) { + log_error("clccubes", "ccubes_map failed (%d)", rc); + goto err; + } err: return rc; } @@ -112,36 +450,28 @@ err: void ccubes_clean_up(struct ccubes_context *ctx) { - cl_clean_up(*ctx->clctx); - clReleaseProgram(ctx->ccubes_program); + ccubes_unmap(ctx); + /* INPUTS */ clReleaseMemObject(ctx->nofvalues); - clReleaseMemObject(ON_set); - clReleaseMemObject(OFF_set); - clReleaseMemObject(p_implicants_pos); - clReleaseMemObject(p_implicants_val); - clReleaseMemObject(last_index); - clReleaseMemObject(p_covered); - clReleaseMemObject(p_pichart_pos); + clReleaseMemObject(ctx->ON_set); + clReleaseMemObject(ctx->OFF_set); + clReleaseMemObject(ctx->p_implicants_pos); + clReleaseMemObject(ctx->p_implicants_val); + clReleaseMemObject(ctx->last_index); + clReleaseMemObject(ctx->p_covered); + clReleaseMemObject(ctx->p_pichart_pos); /* OUTPUTS */ - clReleaseMemObject(coverage); - clReleaseMemObject(fixed_bits); - clReleaseMemObject(value_bits); - clReleaseMemObject(pichart_values); + clReleaseMemObject(ctx->coverage); + clReleaseMemObject(ctx->fixed_bits); + clReleaseMemObject(ctx->value_bits); + clReleaseMemObject(ctx->pichart_values); + + cl_clean_up(*ctx->clctx); return; } -int -ccubes() -{ - struct ccubes_context ccubesctx; - - ccubes_build(&ccubesctx); - -err: - ccubes_clean_up(&ccubesctx); -} diff --git a/clccubes.h b/clccubes.h index f300b0f..61e6153 100644 --- a/clccubes.h +++ b/clccubes.h @@ -18,8 +18,17 @@ struct ccubes_context { cl_program ccubes_program; cl_kernel ccubes_task; + /* internal memory sizes */ + int k; + int ninputs; + int posrows; + int negrows; + int implicant_words; + int value_bit_width; + int pichart_words; + /* INPUTS */ - cl_mem ctx->nofvalues); + cl_mem nofvalues; cl_mem ON_set; cl_mem OFF_set; cl_mem p_implicants_pos; @@ -33,6 +42,15 @@ struct ccubes_context { cl_mem fixed_bits; cl_mem value_bits; cl_mem pichart_values; + + /* Host outputs */ + bool *h_coverage; + unsigned int *h_fixed_bits; + unsigned int *h_value_bits; + unsigned int *h_pichart_values; + + /* ND-Range */ + size_t gws; /* global work size */ }; int ccubes(void);