ccubes-cl/src/CCubes.c

725 lines
26 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <R.h>
#include <R_ext/RS.h>
#include <R_ext/Boolean.h>
#include <Rinternals.h>
#include <Rmath.h>
#include <R_ext/Rdynload.h>
#include <sys/time.h>
#include <stdbool.h>
#include <math.h>
#include "CCubes.h"
#ifdef _OPENMP
#undef match
#include <omp.h>
#endif
#include "real.h"
#include "cl_setup.h"
#include "clccubes.h"
#include "config.h"
#include "logging.h"
// $ export BITS_PER_WORD=32 in the Terminal, before running R
SEXP CCubes(SEXP tt) {
// 32 bits per word, in bit shifting representation
char *bits_per_word = getenv("BITS_PER_WORD"); // Read from the PATH
int BITS_PER_WORD = bits_per_word ? atoi(bits_per_word) : 32;
if (BITS_PER_WORD != 8 && BITS_PER_WORD != 16 && BITS_PER_WORD != 32 && BITS_PER_WORD != 64) {
BITS_PER_WORD = 32; // Default to 32
}
// $ export PRINT_INFO=true in the Terminal, before running R
char *print_info = getenv("PRINT_INFO"); // Read from the PATH
Rboolean PRINT_INFO = print_info && (
strcmp(print_info, "1") == 0 ||
strcmp(print_info, "TRUE") == 0 ||
strcmp(print_info, "true") == 0 ||
strcmp(print_info, "T") == 0 ||
strcmp(print_info, "t") == 0
);
int multiplier = 0;
struct timeval start, end;
double elapsed_time;
config_set_int("log", LOG_LEVEL_WARN);
config_set_int("log:clccubes", LOG_LEVEL_DEBUG);
config_set_int("log:ccubes", LOG_LEVEL_DEBUG);
config_set_int("log:cl", LOG_LEVEL_DEBUG);
char log[100] = {0};
sprintf(log, "log-ccubes");
config_set_string("out", log);
struct ccubes_context *ctx = ccubes_create();
if (ctx == NULL) {
log_error("ccubes", "ccubes_do_tasks failed");
}
if (PRINT_INFO) {
Rprintf("--- START ---\n");
gettimeofday(&start, NULL); // Start time
}
int *p_tt = INTEGER(tt);
int ttrows = nrows(tt); // number of rows in the data matrix
int ninputs = ncols(tt) - 1; // number of inputs (columns - 1, the last one is the outcome)
// calculate the number of positive output rows (the ON set)
int posrows = 0;
for (int r = 0; r < ttrows; r++) {
posrows += p_tt[ninputs * ttrows + r];
}
// calculate the number of negative output rows (the OFF set)
int negrows = ttrows - posrows;
if (negrows == 0) {
// if there are no negative output rows, no PIs can be found
// all inputs will be completely minimized
return(R_NilValue);
}
// split the minterms in the ON and OFF set matrices
int ON_set[posrows * ninputs];
int OFF_set[ninputs * negrows];
int rowpos = 0, rowneg = 0;
int max_value = 0;
for (int r = 0; r < ttrows; r++) {
if (p_tt[ninputs * ttrows + r] == 1) { // positive
for (int c = 0; c < ninputs; c++) {
int value = p_tt[c * ttrows + r];
ON_set[c * posrows + rowpos] = value;
if (value > max_value) {
max_value = value;
}
}
rowpos += 1;
}
else { // negative
for (int c = 0; c < ninputs; c++) {
int value = p_tt[c * ttrows + r];
OFF_set[c * negrows + rowneg] = value;
if (value > max_value) {
max_value = value;
}
}
rowneg += 1;
}
}
int bits_needed = ceil(log2(max_value)); // Compute the necessary bits
int value_bit_width = 1;
while (value_bit_width < bits_needed) {
value_bit_width *= 2; // Round up to the next power of 2
}
if (value_bit_width > BITS_PER_WORD) {
BITS_PER_WORD = value_bit_width; // Adjust the bits per word
}
int value_bit_mask = (1U << value_bit_width) - 1U;
// calculate the number of values (biggest number) for each input
int nofvalues[ninputs];
int nofpi[ninputs];
for (int i = 0; i < ninputs; i++) {
nofvalues[i] = 0; // initialize
nofpi[i] = 0; // initialize
for (int r = 0; r < ttrows; r++) {
if (nofvalues[i] < p_tt[i * ttrows + r]) {
nofvalues[i] = p_tt[i * ttrows + r];
}
}
// add 1 because if the biggest number is 2 then it has three levels: 0, 1 and 2
nofvalues[i] += 1;
if (nofvalues[i] == 1) {
// no input ever has less than two values
nofvalues[i] = 2;
}
}
// preallocating for an estimated large number of 10000 found PIs
// this number will be iteratively increased when the found PIs reach the upper limit
int estimPI = 250000;
// the index of the PIs, in descending order of their number of covered ON-set minterms
int *p_covered = (int *) R_Calloc(estimPI, int);
// many PIs will have the same coverage, but they don't necessarily cover the same minterms
// to employ row dominance when solving the coverage matrix, we need to compare the coverage of the
// current PI with the coverage of the previous PIs. If this PI survives the comparison, its
// coverage has to be added in the p_covered vector, and its order in the p_covered
// vector, at the last index of the PI coverage with the same number of minterms
int last_index[posrows]; // descending order
// p_pichart = malloc(estimPI * posrows * sizeof(int));
// memset(p_pichart, false, estimPI * posrows * sizeof(int));
int *p_pichart = (int *) R_Calloc(estimPI * posrows, int);
// prefixing (int *) before R_Calloc() prefills all values with 0s
int pichart_words = (posrows + BITS_PER_WORD - 1) / BITS_PER_WORD; // Words needed per coverage matrix columns
unsigned int *p_pichart_pos = (unsigned int *) R_Calloc(estimPI * pichart_words, unsigned int);
int implicant_words = (ninputs + BITS_PER_WORD - 1) / BITS_PER_WORD; // Words needed per PI representation
unsigned int *p_implicants_pos = (unsigned int *) R_Calloc(estimPI * implicant_words, unsigned int);
unsigned int *p_implicants_val = (unsigned int *) R_Calloc(estimPI * implicant_words, unsigned int);
int prevfoundPI = 0; // the number of previously found PIs
int foundPI = 0;
int prevsolmin = 0; // the previous (level k - 1), minimum number of PIs to solve the coverage matrix
int solmin = 0;
// the positions of the PIs solving the coverage matrix
// this vector can never be lengthier than the number of ON minterms (posrows)
int previndices[posrows];
int indices[posrows];
for (int i = 0; i < posrows; i++) {
previndices[i] = 0;
indices[i] = 0;
last_index[i] = 0;
}
Rboolean ON_set_covered = false;
if (PRINT_INFO) {
Rprintf("ON-set minterms: %d\n", posrows);
#ifdef _OPENMP
Rprintf("OpenMP enabled, %d workers\n", omp_get_max_threads());
#endif
}
int stop_counter = 0; // to stop if two consecutive levels of complexity yield no PIs
int k;
for (k = 1; k <= ninputs; k++) {
if (PRINT_INFO) {
Rprintf("---k: %d\n", k);
}
int n_tasks = nchoosek(ninputs, k);
if (n_tasks == 0) {
// overflow, too many tasks
return (R_NilValue);
}
int n_tasks_batch = 512;
for (int task = 0; task < n_tasks; task+=n_tasks_batch) {
/* adjust if batch size is larger than total job size */
int current_batch = n_tasks < n_tasks_batch ? n_tasks : n_tasks_batch;
log_debug("ccubes", "Tasks %d - %d out of %d",
task, task + current_batch - 1, n_tasks);
bool *coverage;
unsigned int *fixed_bits;
unsigned int *value_bits;
unsigned int *pichart_values;
ccubes_do_tasks(ctx,
current_batch,
task,
k,
ninputs,
posrows,
negrows,
implicant_words,
value_bit_width,
value_bit_mask,
pichart_words,
estimPI,
nofvalues,
ON_set,
OFF_set,
p_implicants_pos,
p_implicants_val,
last_index,
p_covered,
p_pichart_pos,
coverage,
fixed_bits,
value_bits,
pichart_values
);
for (int i = 0; i < current_batch; i++) {
log_debug("ccubes", "Task %d", i);
log_debug_raw("ccubes", "coverage[%d]:", i);
for (int j = 0; j < posrows; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_coverage[i * posrows + j]);
}
log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "fixed_bits[%d]:", i);
for (int j = 0; j < implicant_words; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_fixed_bits[i * implicant_words + j]);
}
log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "value_bits[%d]:", i);
for (int j = 0; j < implicant_words; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_value_bits[i * implicant_words + j]);
}
log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "pichart_values[%d]:", i);
for (int j = 0; j < pichart_words; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_pichart_values[i * pichart_words + j]);
}
log_debug_raw("ccubes", "\n");
}
/* change to something less aggresive for reuse */
ccubes_clean_up(ctx);
}
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic)
#endif
for (int task = 0; task < n_tasks; task++) {
int tempk[k];
int x = 0;
int combination = task;
// fill the combination for the current task / combination number
for (int i = 0; i < k; i++) {
while (nchoosek(ninputs - (x + 1), k - (i + 1)) <= combination) {
combination -= nchoosek(ninputs - (x + 1), k - (i + 1));
x++;
}
tempk[i] = x;
x++;
}
// allocate vectors of decimal numbers for the ON-set and OFF-set rows
int decpos[posrows];
int decneg[negrows];
// create the vector of multiple bases, useful when calculating the decimal representation
// of a particular combination of columns, for each row
int mbase[k];
mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input
// calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
// 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
for (int i = 1; i < k; i++) {
mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
}
// calculate decimal numbers, using mbase, fills in decpos and decneg
for (int r = 0; r < posrows; r++) {
decpos[r] = 0;
for (int c = 0; c < k; c++) {
decpos[r] += ON_set[tempk[c] * posrows + r] * mbase[c];
}
}
for (int r = 0; r < negrows; r++) {
decneg[r] = 0;
for (int c = 0; c < k; c++) {
decneg[r] += OFF_set[tempk[c] * negrows + r] * mbase[c];
}
}
int possible_rows[posrows];
Rboolean possible_cover[posrows];
possible_cover[0] = true; // boolean flag, to be set with false if found among the OFF set
int found = 0;
// identifies all unique decimal rows, for the selected combination of k inputs
for (int r = 0; r < posrows; r++) {
int prev = 0;
Rboolean unique = true; // Rboolean flag, assume the row is unique
while (prev < found && unique) {
unique = decpos[possible_rows[prev]] != decpos[r];
prev++;
}
if (unique) {
possible_rows[found] = r;
possible_cover[found] = true;
found++;
}
}
if (found > 0) {
// some of the ON set numbers are possible PIs (not found in the OFF set)
int frows[found];
// verify if this is a possible PI
// (if the same decimal number is not found in the OFF set)
for (int i = found - 1; i >= 0; i--) {
int j = 0;
while (j < negrows && possible_cover[i]) {
if (decpos[possible_rows[i]] == decneg[j]) {
possible_cover[i] = false;
found--;
}
j++;
}
if (possible_cover[i]) {
frows[found - i - 1] = possible_rows[i];
}
}
// Rprintf("task: %d; rows: %d\n", task, found);
for (int f = 0; f < found; f++) {
// create a temporary vector of length k, containing the values from the initial ON set
// plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
int tempc[k];
// using bit shifting, store the fixed bits and value bits
unsigned int fixed_bits[implicant_words];
unsigned int value_bits[implicant_words];
for (int i = 0; i < implicant_words; i++) {
fixed_bits[i] = 0U;
value_bits[i] = 0U;
}
for (int c = 0; c < k; c++) {
int value = ON_set[tempk[c] * posrows + frows[f]];
tempc[c] = value + 1;
int word_index = tempk[c] / (BITS_PER_WORD / value_bit_width);
int bit_index = (tempk[c] % (BITS_PER_WORD / value_bit_width)) * value_bit_width;
fixed_bits[word_index] |= (value_bit_mask << bit_index);
value_bits[word_index] |= ((unsigned int)value << bit_index);
}
// check if the current PI is not redundant
Rboolean redundant = false;
int i = 0;
while (i < prevfoundPI && !redundant) {
// /*
// - ck contains the complexity level for each of the previously found non-redundant PIs
// - indx is a matrix containing the indexes of the columns where the values were stored
// - a redundant PI is one for which all values from a previous PI are exactly the same:
// 0 0 1 2 0, let's say previously found PI
// which means a corresponding ck = 2 and a corresponding indx = [3, 4]
// 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
// therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
// */
Rboolean is_subset = true; // Assume it's a subset unless proven otherwise
for (int w = 0; w < implicant_words; w++) {
// If the new PI has values on positions outside the existing PIs fixed positions, its not a subset
unsigned int index_mask = p_implicants_pos[i * implicant_words + w];
if ((fixed_bits[w] & index_mask) != index_mask) {
is_subset = false;
break;
}
// then compare the value bits, if one or more values on those positions are different, its not a subset
if ((value_bits[w] & index_mask) != (p_implicants_val[i * implicant_words + w] & index_mask)) {
is_subset = false;
break;
}
}
redundant = is_subset;
i++;
}
if (redundant) continue;
Rboolean coverage[posrows];
int covsum = 0;
unsigned int pichart_values[pichart_words];
for (int w = 0; w < pichart_words; w++) {
pichart_values[w] = 0U;
}
for (int r = 0; r < posrows; r++) {
coverage[r] = decpos[r] == decpos[frows[f]];
if (coverage[r]) {
int word_index = r / BITS_PER_WORD;
int bit_index = r % BITS_PER_WORD;
pichart_values[word_index] |= (1U << bit_index);
}
covsum += coverage[r];
}
// verify row dominance
int rd = 0;
while (rd < last_index[covsum - 1] && !redundant) {
bool dominated = true;
for (int w = 0; w < pichart_words; w++) {
if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * pichart_words + w]) != pichart_values[w]) {
dominated = false;
break;
}
}
redundant = dominated;
rd++;
}
if (redundant) continue;
// Rprintf("It is a prime implicant\n");
// This operation first gets a new index to push in the global array in a concurrent way
// Then adds the result there.
// We could synchronize only the index and let the copy operation happen in parallel BUT this
// creates a false sharing problem and the performance is down by several factors.
#ifdef _OPENMP
#pragma omp critical
#endif
{
// push the PI information to the global arrays
for (int i = foundPI; i > last_index[covsum - 1]; i--) {
p_covered[i] = p_covered[i - 1];
}
p_covered[last_index[covsum - 1]] = foundPI;
for (int l = 1; l < covsum; l++) {
last_index[l - 1] += 1;
}
for (int w = 0; w < implicant_words; w++) {
p_implicants_pos[implicant_words * foundPI + w] = fixed_bits[w];
p_implicants_val[implicant_words * foundPI + w] = value_bits[w];
}
// populate the coverage matrix
for (int r = 0; r < posrows; r++) {
for (int w = 0; w < pichart_words; w++) {
p_pichart_pos[foundPI * pichart_words + w] = pichart_values[w];
}
p_pichart[posrows * foundPI + r] = coverage[r];
}
++foundPI;
// when needed, increase allocated memory
if (foundPI / estimPI > 0.9) {
estimPI += 100000;
p_pichart = R_Realloc(p_pichart, posrows * estimPI, int);
p_pichart_pos = R_Realloc(p_pichart_pos, pichart_words * estimPI, unsigned int);
p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int);
p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int);
p_covered = R_Realloc(p_covered, estimPI, int);
for (unsigned int i = foundPI; i < posrows * estimPI; i++) {
p_pichart[i] = 0;
}
for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) {
p_pichart_pos[i] = 0U;
}
for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) {
p_implicants_val[i] = 0U;
p_implicants_pos[i] = 0U;
}
if (PRINT_INFO) {
multiplier++;
Rprintf("%dx ", multiplier);
}
}
}
}
}
}
nofpi[k - 1] = foundPI;
if (foundPI > 0 && !ON_set_covered) {
Rboolean test_coverage = true;
int r = 0;
while (r < posrows && test_coverage) {
Rboolean minterm_covered = false;
int c = 0;
while (c < foundPI && !minterm_covered) {
minterm_covered = p_pichart[c * posrows + r];
c++;
}
test_coverage = minterm_covered;
r++;
}
ON_set_covered = test_coverage;
}
if (ON_set_covered) {
// Rprintf("posrows: %d; foundPI: %d\n", posrows, foundPI);
if (PRINT_INFO) {
gettimeofday(&end, NULL); // End time
elapsed_time = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1e6;
Rprintf("Time taken finding %d PIs: %f sec.\n", foundPI, elapsed_time);
gettimeofday(&start, NULL);
}
SEXP pic = PROTECT(allocMatrix(INTSXP, posrows, foundPI));
for (long long unsigned int i = 0; i < posrows * foundPI; i++) {
INTEGER(pic)[i] = p_pichart[i];
}
SEXP PIlayers = PROTECT(allocVector(INTSXP, ninputs));
for (int i = 0; i < ninputs; i++) {
INTEGER(PIlayers)[i] = nofpi[i];
}
setAttrib(pic, install("PIlayers"), PIlayers);
// if this file is run directly using SHLIB, the following line is needed
// R_ParseEvalString("library(IEEE)", R_GlobalEnv);
SEXP pkg_env = PROTECT(R_FindNamespace(mkString("IEEE")));
SEXP solvechart = PROTECT(Rf_findVarInFrame(pkg_env, Rf_install("solvechart")));
SEXP evalinR = PROTECT(R_tryEval(Rf_lang2(solvechart, pic), pkg_env, NULL));
solmin = length(evalinR);
for (int i = 0; i < solmin; i++) {
indices[i] = INTEGER(evalinR)[i] - 1; // R is 1-based
}
UNPROTECT(5);
// Rprintf("solution minima: %d\n", solmin);
if (PRINT_INFO) {
gettimeofday(&end, NULL);
elapsed_time = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1e6;
Rprintf("Time spent solving the coverage matrix: %f sec.\n", elapsed_time);
gettimeofday(&start, NULL);
}
if (solmin == prevsolmin) {
// the minimum number of PIs did not change in the current level of complexity
// we can safely retain the less complex PIs from the previous level
for (int i = 0; i < solmin; i++) {
indices[i] = previndices[i];
}
stop_counter += 1;
}
else {
// this means solmin is in fact smaller than the previously found solmin
// or it is the very first time a solmin was found
// only here it makes sense to overwrite prevsolmin and previndices,
// otherwise they are just as good as the ones from the previous level
prevsolmin = solmin;
for (int i = 0; i < solmin; i++) {
previndices[i] = indices[i];
}
stop_counter = 0;
}
}
prevfoundPI = foundPI;
// printf("stop_counter: %d\n", stop_counter);
// One level of complexity up, and the solution minima does not change
if (stop_counter > 0) {
// the search can stop
break;
}
}
// Rprintf("solmin: %d\n", solmin);
if (PRINT_INFO) {
Rprintf("--- END ---\n");
}
SEXP sol = PROTECT(allocMatrix(INTSXP, solmin, ninputs));
int *p_sol = INTEGER(sol);
for (int c = 0; c < solmin; c++) {
for (int r = 0; r < ninputs; r++) {
unsigned int value = 0;
int word_index = r / (BITS_PER_WORD / value_bit_width); // Word index within the implicant
int bit_index = (r % (BITS_PER_WORD / value_bit_width)) * value_bit_width; // Bit position within the word
// (1U << bit_index) is just as good
if (p_implicants_pos[indices[c] * implicant_words + word_index] & (value_bit_mask << bit_index)) {
value = (p_implicants_val[indices[c] * implicant_words + word_index] >> bit_index) & value_bit_mask;
value++; // 0 indicates a minimization, so we increment the value
}
p_sol[r * solmin + c] = value; // transposed
}
}
ccubes_destroy(ctx);
R_Free(p_pichart);
R_Free(p_implicants_val);
R_Free(p_implicants_pos);
R_Free(p_pichart_pos);
R_Free(p_covered);
UNPROTECT(1);
return (sol);
}
long long unsigned int nchoosek(
int n,
int k
) {
if (k > n) return 0;
if (k == 0 || k == n) return 1;
long long unsigned int result = 1;
if (k > n - k) {
k = n - k;
}
for (int i = 0; i < k; i++) {
// result = result * (n - i) / (i + 1);
// Check for potential overflow before multiplication
if (result > ULLONG_MAX / (n - i)) {
return 0; // Indicate overflow
}
result *= (n - i);
// Check for potential overflow before division
if (result % (i + 1) != 0) {
return 0; // Indicate overflow
}
result /= (i + 1);
}
return result;
}