ccubes-cl/inst/ccubes.cl

358 lines
10 KiB
Common Lisp
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* Copyright (c) 2025 Adrian Dușa <adrian.dusa@unibuc.ro>
* Copyright (c) 2025 Paul Irofti <paul@irofti.net>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "ccubes_generated.h"
#ifdef USE_DOUBLE
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
typedef double real;
#define R_ZERO 1e-14
#else
typedef float real;
#define R_ZERO 1e-10
#endif
#define BITS_PER_WORD 32
#define ROW_DIM 0
#define COL_DIM 1
// #pragma OPENCL EXTENSION cl_amd_printf : enable
// #pragma OPENCL EXTENSION cl_khr_select_fprounding_mode : enable
// #pragma OPENCL SELECT_ROUNDING_MODE rtz
#ifdef RANGE_DEBUG
#define RANGE_CHECK(lower, upper, value, str) do { \
if ((value) < (lower) || (value) > (upper)) { \
printf("%s", (str)); \
return; \
} \
} while(0);
#else
#define RANGE_CHECK(lower, upper, value, str)
#endif
unsigned long int
nchoosek(int n, int k)
{
if (k == 0 || k == n) return 1;
if (k == 1) return n;
unsigned long int result = 1;
if (k > n - k) {
k = n - k;
}
for (int i = 0; i < k; i++) {
result = result * (n - i) / (i + 1);
}
return result;
}
/*
*
* PROBLEM: CCubes
*
* NDRANGE: nchoosek(ninputs, k) work-items
*
* INPUT:
* k - current input
* nofvalues (ninputs x 1) - read, copy-host - number of values
* ON_set (posrows x ninputs) - read, copy-host - ON set
* OFF_set (ninputs x negrows) - read, copy-host - OFF set
* p_implicants_pos - read, copy-host
* p_implicants_val - read, copy-host
* last_index - read, copy-host
* p_covered - read, copy-host
* p_pichart_pos - read, copy-host
*
* CONSTANTS:
* NINPUTS - number of inputs
* POSROWS - positive output rows (the ON set)
* NEGROWS - negative output rows (the OFF set)
* IMPLICANT_WORDS - words needed per PI representation
* VALUE_BIT_WIDTH - largest bit used (ffs)
* PICHART_WORDS - words needed per PI chart columns
*
* OUTPUT:
* covsum - sum of coverage (reproduce on host instead?)
* redundant (1) - read, write
* coverage (posrows x 1) - read, write
* fixed_bits (implicant_words x 1) - read, write
* value_bits (implicant_words x 1) - read, write
* pichart_values (pichart_words x 1) - read, write
*
* NOTE: Both input and output must be allocated before calling this funciton.
*/
#if 0
#define NINPUTS 64
#define POSROWS 128
#define NEGROWS 128
#define IMPLICANT_WORDS 64
#define VALUE_BIT_WIDTH 32
#define PICHART_WORDS 8
#endif
__kernel void
ccubes_task(int k,
__global const int *nofvalues, /* IN: RC */
__global const int *ON_set, /* IN: RC */
__global const int *OFF_set, /* IN: RC */
__global const unsigned int *p_implicants_pos, /* IN: RC */
__global const unsigned int *p_implicants_val, /* IN: RC */
__global const int *last_index, /* IN: RC */
__global const int *p_covered, /* IN: RC */
__global const int *p_pichart_pos, /* IN: RC */
__global bool *g_redundant, /* OUT: RW */
__global bool *g_coverage, /* OUT: RW */
__global unsigned int *g_fixed_bits, /* OUT: RW */
__global unsigned int *g_value_bits, /* OUT: RW */
__global unsigned int *g_pichart_values /* OUT: RW */
)
{
/* work-item?: task in nchoosek(ninputs, k) */
/* work-group?: k in 1 to ninputs */
/* total work: tasks in nchoosek for k in 1 to ninputs */
// size_t gws = get_global_size(0);
// size_t goffset = task - gws;
// size_t goffset = get_global_offset(0);
// size_t gid = task - goffset;
// coverage[0] = 1;
// printf("task %d, gws %d, goffset %d, gid %d\n", task, gws, goffset, gid);
// return;
size_t task = get_global_id(0);
size_t gid = get_global_linear_id();
__global bool *redundant = &g_redundant[gid];
__global bool *coverage = &g_coverage[gid * POSROWS];
__global unsigned int *fixed_bits = &g_fixed_bits[gid * IMPLICANT_WORDS];
__global unsigned int *value_bits = &g_value_bits[gid * IMPLICANT_WORDS];
__global unsigned int *pichart_values = &g_pichart_values[gid * PICHART_WORDS];
*redundant = true;
int prevfoundPI = 0;
int tempk[NINPUTS]; /* max is tempk[ninputs] */
int x = 0;
int combination = task;
/* INIT */
for (int i = 0; i < POSROWS; i++)
coverage[i] = 0;
for (int i = 0; i < IMPLICANT_WORDS; i++) {
fixed_bits[i] = 0U;
value_bits[i] = 0U;
}
for (int i = 0; i < PICHART_WORDS; i++)
pichart_values[i] = 0U;
// fill the combination for the current task
for (int i = 0; i < k; i++) {
while (nchoosek(NINPUTS - (x + 1), k - (i + 1)) <= combination) {
combination -= nchoosek(NINPUTS - (x + 1), k - (i + 1));
x++;
}
tempk[i] = x;
x++;
}
// allocate vectors of decimal row numbers for the positive and negative rows
int decpos[POSROWS];
int decneg[NEGROWS];
// create the vector of multiple bases, useful when calculating the decimal representation
// of a particular combination of columns, for each row
int mbase[NINPUTS];
mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input
// calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
// 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
for (int i = 1; i < k; i++) {
mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
}
// calculate decimal numbers, using mbase, fills in decpos and decneg
for (int r = 0; r < POSROWS; r++) {
decpos[r] = 0;
for (int c = 0; c < k; c++) {
decpos[r] += ON_set[tempk[c] * POSROWS + r] * mbase[c];
}
}
for (int r = 0; r < NEGROWS; r++) {
decneg[r] = 0;
for (int c = 0; c < k; c++) {
decneg[r] += OFF_set[tempk[c] * NEGROWS + r] * mbase[c];
}
}
int possible_rows[POSROWS];
bool possible_cover[POSROWS];
possible_cover[0] = true; // bool flag, to be set with false if found among the OFF set
int found = 0;
// identifies all unique decimal rows, for the selected combination of k inputs
for (int r = 0; r < POSROWS; r++) {
int prev = 0;
bool unique = true; // bool flag, assume the row is unique
while (prev < found && unique) {
unique = decpos[possible_rows[prev]] != decpos[r];
prev++;
}
if (unique) {
possible_rows[found] = r;
possible_cover[found] = true;
found++;
}
}
if (found > 0) {
// some of the ON set numbers are possible PIs (not found in the OFF set)
int frows[POSROWS];
// verify if this is a possible PI
// (if the same decimal number is not found in the OFF set)
for (int i = found - 1; i >= 0; i--) {
int j = 0;
while (j < NEGROWS && possible_cover[i]) {
if (decpos[possible_rows[i]] == decneg[j]) {
possible_cover[i] = false;
found--;
}
j++;
}
if (possible_cover[i]) {
frows[found - i - 1] = possible_rows[i];
}
}
// Rprintf("task: %d; rows: %d\n", task, found);
for (int f = 0; f < found; f++) {
// create a temporary vector of length k, containing the values from the initial ON set
// plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
int tempc[NINPUTS];
// using bit shifting, store the fixed bits and value bits
// unsigned int fixed_bits[IMPLICANT_WORDS];
// unsigned int value_bits[IMPLICANT_WORDS];
for (int i = 0; i < IMPLICANT_WORDS; i++) {
fixed_bits[i] = 0U;
value_bits[i] = 0U;
}
for (int c = 0; c < k; c++) {
int value = ON_set[tempk[c] * POSROWS + frows[f]];
tempc[c] = value + 1;
int word_index = tempk[c] / (BITS_PER_WORD / VALUE_BIT_WIDTH);
int bit_index = (tempk[c] % (BITS_PER_WORD / VALUE_BIT_WIDTH)) * VALUE_BIT_WIDTH;
fixed_bits[word_index] |= (VALUE_BIT_MASK << bit_index);
value_bits[word_index] |= ((unsigned int)value << bit_index);
}
// check if the current PI is not redundant
// bool redundant = false;
*redundant = false;
int i = 0;
while (i < prevfoundPI && !*redundant) {
// /*
// - ck contains the complexity level for each of the previously found non-redundant PIs
// - indx is a matrix containing the indexes of the columns where the values were stored
// - a redundant PI is one for which all values from a previous PI are exactly the same:
// 0 0 1 2 0, let's say previously found PI
// which means a corresponding ck = 2 and a corresponding indx = [3, 4]
// 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
// therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
// */
bool is_subset = true; // Assume it's a subset unless proven otherwise
for (int w = 0; w < IMPLICANT_WORDS; w++) {
// If the new PI has values on positions outside the existing PIs fixed positions, its not a subset
unsigned int index_mask = p_implicants_pos[i * IMPLICANT_WORDS + w];
if ((fixed_bits[w] & index_mask) != index_mask) {
is_subset = false;
break;
}
// then compare the value bits, if one or more values on those positions are different, its not a subset
if ((value_bits[w] & index_mask) != (p_implicants_val[i * IMPLICANT_WORDS + w] & index_mask)) {
is_subset = false;
break;
}
}
*redundant = is_subset;
i++;
}
if (*redundant) continue;
// bool coverage[POSROWS];
int covsum = 0;
// unsigned int pichart_values[PICHART_WORDS];
for (int w = 0; w < PICHART_WORDS; w++) {
pichart_values[w] = 0U;
}
for (int r = 0; r < POSROWS; r++) {
coverage[r] = decpos[r] == decpos[frows[f]];
if (coverage[r]) {
int word_index = r / BITS_PER_WORD;
int bit_index = r % BITS_PER_WORD;
pichart_values[word_index] |= (1U << bit_index);
}
covsum += coverage[r];
}
// verify row dominance
int rd = 0;
while (rd < last_index[covsum - 1] && !*redundant) {
bool dominated = true;
for (int w = 0; w < PICHART_WORDS; w++) {
if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * PICHART_WORDS + w]) != pichart_values[w]) {
dominated = false;
break;
}
}
*redundant = dominated;
rd++;
}
if (*redundant) continue;
}
}
}