diff --git a/inst/ccubes.cl b/inst/ccubes.cl new file mode 100755 index 0000000..20e2081 --- /dev/null +++ b/inst/ccubes.cl @@ -0,0 +1,346 @@ +/* + * Copyright (c) 2025 Adrian Dușa + * Copyright (c) 2025 Paul Irofti + * + * Permission to use, copy, modify, and/or distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "ccubes_generated.h" + +#ifdef USE_DOUBLE +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +typedef double real; +#define R_ZERO 1e-14 +#else +typedef float real; +#define R_ZERO 1e-10 +#endif + +#define BITS_PER_WORD 32 + +#define ROW_DIM 0 +#define COL_DIM 1 + +// #pragma OPENCL EXTENSION cl_amd_printf : enable +// #pragma OPENCL EXTENSION cl_khr_select_fprounding_mode : enable +// #pragma OPENCL SELECT_ROUNDING_MODE rtz + +#ifdef RANGE_DEBUG +#define RANGE_CHECK(lower, upper, value, str) do { \ + if ((value) < (lower) || (value) > (upper)) { \ + printf("%s", (str)); \ + return; \ + } \ +} while(0); +#else +#define RANGE_CHECK(lower, upper, value, str) +#endif + +unsigned long int +nchoosek(int n, int k) +{ + if (k == 0 || k == n) return 1; + if (k == 1) return n; + + unsigned long int result = 1; + + if (k > n - k) { + k = n - k; + } + + for (int i = 0; i < k; i++) { + result = result * (n - i) / (i + 1); + } + + return result; +} + +/* + * + * PROBLEM: CCubes + * + * NDRANGE: nchoosek(ninputs, k) work-items + * + * INPUT: + * k - current input + * nofvalues (ninputs x 1) - read, copy-host - number of values + * ON_set (posrows x ninputs) - read, copy-host - ON set + * OFF_set (ninputs x negrows) - read, copy-host - OFF set + * p_implicants_pos - read, copy-host + * p_implicants_val - read, copy-host + * last_index - read, copy-host + * p_covered - read, copy-host + * p_pichart_pos - read, copy-host + * + * CONSTANTS: + * NINPUTS - number of inputs + * POSROWS - positive output rows (the ON set) + * NEGROWS - negative output rows (the OFF set) + * IMPLICANT_WORDS - words needed per PI representation + * VALUE_BIT_WIDTH - largest bit used (ffs) + * PICHART_WORDS - words needed per PI chart columns + * + * OUTPUT: + * covsum - sum of coverage (reproduce on host instead?) + * redundant (1) - read, write + * coverage (posrows x 1) - read, write + * fixed_bits (implicant_words x 1) - read, write + * value_bits (implicant_words x 1) - read, write + * pichart_values (pichart_words x 1) - read, write + * + * NOTE: Both input and output must be allocated before calling this funciton. + */ +#if 0 +#define NINPUTS 64 +#define POSROWS 128 +#define NEGROWS 128 +#define IMPLICANT_WORDS 64 +#define VALUE_BIT_WIDTH 32 +#define PICHART_WORDS 8 +#endif +__kernel void +ccubes_task(int k, + __global const int *nofvalues, /* IN: RC */ + __global const int *ON_set, /* IN: RC */ + __global const int *OFF_set, /* IN: RC */ + __global const unsigned int *p_implicants_pos, /* IN: RC */ + __global const unsigned int *p_implicants_val, /* IN: RC */ + __global const int *last_index, /* IN: RC */ + __global const int *p_covered, /* IN: RC */ + __global const int *p_pichart_pos, /* IN: RC */ + __global bool *g_redundant, /* OUT: RW */ + __global bool *g_coverage, /* OUT: RW */ + __global unsigned int *g_fixed_bits, /* OUT: RW */ + __global unsigned int *g_value_bits, /* OUT: RW */ + __global unsigned int *g_pichart_values /* OUT: RW */ + ) +{ + /* work-item?: task in nchoosek(ninputs, k) */ + /* work-group?: k in 1 to ninputs */ + /* total work: tasks in nchoosek for k in 1 to ninputs */ + + size_t task = get_global_id(0); + size_t gws = get_global_size(0); + // size_t goffset = task - gws; + size_t goffset = get_global_offset(0); + // size_t gid = task - goffset; + size_t gid = get_global_linear_id(); + + __global bool *redundant = &g_redundant[gid]; + __global bool *coverage = &g_coverage[gid * POSROWS]; + __global unsigned int *fixed_bits = &g_fixed_bits[gid * IMPLICANT_WORDS]; + __global unsigned int *value_bits = &g_value_bits[gid * IMPLICANT_WORDS]; + __global unsigned int *pichart_values = &g_pichart_values[gid * PICHART_WORDS]; + + // coverage[0] = 1; + // printf("task %d, gws %d, goffset %d, gid %d\n", task, gws, goffset, gid); + // return; + redundant = true; + + int prevfoundPI = 0; + + int tempk[NINPUTS]; /* max is tempk[ninputs] */ + int x = 0; + int combination = task; + + // fill the combination for the current task + for (int i = 0; i < k; i++) { + while (nchoosek(NINPUTS - (x + 1), k - (i + 1)) <= combination) { + combination -= nchoosek(NINPUTS - (x + 1), k - (i + 1)); + x++; + } + tempk[i] = x; + x++; + } + + // allocate vectors of decimal row numbers for the positive and negative rows + int decpos[POSROWS]; + int decneg[NEGROWS]; + + // create the vector of multiple bases, useful when calculating the decimal representation + // of a particular combination of columns, for each row + int mbase[NINPUTS]; + mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input + + // calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with + // 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6 + for (int i = 1; i < k; i++) { + mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]]; + } + + // calculate decimal numbers, using mbase, fills in decpos and decneg + for (int r = 0; r < POSROWS; r++) { + decpos[r] = 0; + for (int c = 0; c < k; c++) { + decpos[r] += ON_set[tempk[c] * POSROWS + r] * mbase[c]; + } + } + + for (int r = 0; r < NEGROWS; r++) { + decneg[r] = 0; + for (int c = 0; c < k; c++) { + decneg[r] += OFF_set[tempk[c] * NEGROWS + r] * mbase[c]; + } + } + + + int possible_rows[POSROWS]; + + bool possible_cover[POSROWS]; + possible_cover[0] = true; // bool flag, to be set with false if found among the OFF set + + int found = 0; + + // identifies all unique decimal rows, for the selected combination of k inputs + for (int r = 0; r < POSROWS; r++) { + int prev = 0; + bool unique = true; // bool flag, assume the row is unique + while (prev < found && unique) { + unique = decpos[possible_rows[prev]] != decpos[r]; + prev++; + } + + if (unique) { + possible_rows[found] = r; + possible_cover[found] = true; + found++; + } + } + + if (found > 0) { + // some of the ON set numbers are possible PIs (not found in the OFF set) + int frows[POSROWS]; + + // verify if this is a possible PI + // (if the same decimal number is not found in the OFF set) + for (int i = found - 1; i >= 0; i--) { + int j = 0; + while (j < NEGROWS && possible_cover[i]) { + if (decpos[possible_rows[i]] == decneg[j]) { + possible_cover[i] = false; + found--; + } + j++; + } + + if (possible_cover[i]) { + frows[found - i - 1] = possible_rows[i]; + } + } + // Rprintf("task: %d; rows: %d\n", task, found); + + for (int f = 0; f < found; f++) { + + + // create a temporary vector of length k, containing the values from the initial ON set + // plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc. + int tempc[NINPUTS]; + + // using bit shifting, store the fixed bits and value bits + // unsigned int fixed_bits[IMPLICANT_WORDS]; + // unsigned int value_bits[IMPLICANT_WORDS]; + + for (int i = 0; i < IMPLICANT_WORDS; i++) { + fixed_bits[i] = 0U; + value_bits[i] = 0U; + } + + for (int c = 0; c < k; c++) { + int value = ON_set[tempk[c] * POSROWS + frows[f]]; + tempc[c] = value + 1; + + int word_index = tempk[c] / (BITS_PER_WORD / VALUE_BIT_WIDTH); + int bit_index = (tempk[c] % (BITS_PER_WORD / VALUE_BIT_WIDTH)) * VALUE_BIT_WIDTH; + + fixed_bits[word_index] |= (VALUE_BIT_MASK << bit_index); + value_bits[word_index] |= ((unsigned int)value << bit_index); + } + + // check if the current PI is not redundant + // bool redundant = false; + redundant = false; + + int i = 0; + while (i < prevfoundPI && !redundant) { + // /* + // - ck contains the complexity level for each of the previously found non-redundant PIs + // - indx is a matrix containing the indexes of the columns where the values were stored + // - a redundant PI is one for which all values from a previous PI are exactly the same: + // 0 0 1 2 0, let's say previously found PI + // which means a corresponding ck = 2 and a corresponding indx = [3, 4] + // 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal + // therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2 + // */ + + bool is_subset = true; // Assume it's a subset unless proven otherwise + + for (int w = 0; w < IMPLICANT_WORDS; w++) { + // If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset + unsigned int index_mask = p_implicants_pos[i * IMPLICANT_WORDS + w]; + + if ((fixed_bits[w] & index_mask) != index_mask) { + is_subset = false; + break; + } + + // then compare the value bits, if one or more values on those positions are different, it’s not a subset + if ((value_bits[w] & index_mask) != (p_implicants_val[i * IMPLICANT_WORDS + w] & index_mask)) { + is_subset = false; + break; + } + } + + redundant = is_subset; + + i++; + } + + if (redundant) continue; + + // bool coverage[POSROWS]; + int covsum = 0; + // unsigned int pichart_values[PICHART_WORDS]; + for (int w = 0; w < PICHART_WORDS; w++) { + pichart_values[w] = 0U; + } + + for (int r = 0; r < POSROWS; r++) { + coverage[r] = decpos[r] == decpos[frows[f]]; + if (coverage[r]) { + int word_index = r / BITS_PER_WORD; + int bit_index = r % BITS_PER_WORD; + pichart_values[word_index] |= (1U << bit_index); + } + covsum += coverage[r]; + } + + // verify row dominance + int rd = 0; + while (rd < last_index[covsum - 1] && !redundant) { + + bool dominated = true; + for (int w = 0; w < PICHART_WORDS; w++) { + if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * PICHART_WORDS + w]) != pichart_values[w]) { + dominated = false; + break; + } + } + + redundant = dominated; + rd++; + } + + if (redundant) continue; + } + } +} diff --git a/src/CCubes.c b/src/CCubes.c index 5e4868d..eb07e46 100755 --- a/src/CCubes.c +++ b/src/CCubes.c @@ -28,6 +28,27 @@ SEXP CCubes(SEXP tt) { + // simulate the R command: + // system.file("ccubes.cl", package = "IEEE") + + SEXP args = PROTECT(allocVector(VECSXP, 2)); + SET_VECTOR_ELT(args, 0, mkString("ccubes.cl")); + SET_VECTOR_ELT(args, 1, mkString("IEEE")); + + SEXP names = PROTECT(allocVector(STRSXP, 2)); + SET_STRING_ELT(names, 1, mkChar("package")); + Rf_setAttrib(args, R_NamesSymbol, names); + + SEXP r_call = PROTECT(); + SEXP r_result = PROTECT(R_tryEval( + Rf_lang3(install("do.call"), mkString("system.file"), args), + R_GlobalEnv, + NULL + )); + + Rprintf("Kernel file: '%s'\n", CHAR(STRING_ELT(r_result, 0))); + UNPROTECT(3); + // 32 bits per word, in bit shifting representation char *bits_per_word = getenv("BITS_PER_WORD"); // Read from the PATH int BITS_PER_WORD = bits_per_word ? atoi(bits_per_word) : 32;