ccubes-cl/src/CCubes.c

686 lines
24 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <R_ext/RS.h>
#include <R_ext/Boolean.h>
#include <R.h>
#include <Rinternals.h>
#include <Rmath.h>
#include <R_ext/Rdynload.h>
#include <sys/time.h>
#include <stdbool.h>
#include <math.h>
#include "CCubes.h"
#ifdef _OPENMP
#undef match
#include <omp.h>
#endif
#include "real.h"
#include "cl_setup.h"
#include "clccubes.h"
#include "config.h"
#include "logging.h"
SEXP CCubes(SEXP tt) {
// $ export BITS_PER_WORD=32 in the Terminal, before running R
// 32 bits per word, in bit shifting representation
char *bits_per_word = getenv("BITS_PER_WORD"); // Read from the PATH
int BITS_PER_WORD = bits_per_word ? atoi(bits_per_word) : 32;
if (BITS_PER_WORD != 8 && BITS_PER_WORD != 16 && BITS_PER_WORD != 32 && BITS_PER_WORD != 64) {
BITS_PER_WORD = 32; // Default to 32
}
// $ export PRINT_INFO=1 in the Terminal, before running R
char *print_info = getenv("PRINT_INFO"); // Read from the PATH
Rboolean PRINT_INFO = print_info && print_info[0] == '1';
int multiplier = 0;
struct timeval start, end;
double elapsed_time;
config_set_int("log", LOG_LEVEL_WARN);
config_set_int("log:clccubes", LOG_LEVEL_WARN);
config_set_int("log:ccubes", LOG_LEVEL_DEBUG);
config_set_int("log:cl", LOG_LEVEL_DEBUG);
char log[100] = {0};
sprintf(log, "log-ccubes");
config_set_string("out", log);
if (PRINT_INFO) {
Rprintf("--- START ---\n");
gettimeofday(&start, NULL); // Start time
}
int *p_tt = INTEGER(tt);
int ttrows = nrows(tt); // number of rows in the data matrix
int ninputs = ncols(tt) - 1; // number of inputs (columns - 1, the last one is the outcome)
// calculate the number of positive output rows (the ON set)
int posrows = 0;
for (int r = 0; r < ttrows; r++) {
posrows += p_tt[ninputs * ttrows + r];
}
// calculate the number of negative output rows (the OFF set)
int negrows = ttrows - posrows;
if (negrows == 0) {
// if there are no negative output rows, no PIs can be found
// all inputs will be completely minimized
return(R_NilValue);
}
// split the minterms in the ON and OFF set matrices
int ON_set[posrows * ninputs];
int OFF_set[ninputs * negrows];
int rowpos = 0, rowneg = 0;
int max_value = 0;
for (int r = 0; r < ttrows; r++) {
if (p_tt[ninputs * ttrows + r] == 1) { // positive
for (int c = 0; c < ninputs; c++) {
int value = p_tt[c * ttrows + r];
ON_set[c * posrows + rowpos] = value;
if (value > max_value) {
max_value = value;
}
}
rowpos += 1;
}
else { // negative
for (int c = 0; c < ninputs; c++) {
int value = p_tt[c * ttrows + r];
OFF_set[c * negrows + rowneg] = value;
if (value > max_value) {
max_value = value;
}
}
rowneg += 1;
}
}
int value_bit_width = 0;
while (max_value > 0) {
max_value >>= 1; // Shift right until no bits remain
value_bit_width++;
}
// calculate the number of values (biggest number) for each input
int nofvalues[ninputs];
int nofpi[ninputs];
for (int i = 0; i < ninputs; i++) {
nofvalues[i] = 0; // initialize
nofpi[i] = 0; // initialize
for (int r = 0; r < ttrows; r++) {
if (nofvalues[i] < p_tt[i * ttrows + r]) {
nofvalues[i] = p_tt[i * ttrows + r];
}
}
// add 1 because if the biggest number is 2 then it has three levels: 0, 1 and 2
nofvalues[i] += 1;
if (nofvalues[i] == 1) {
// no input ever has less than two values
nofvalues[i] = 2;
}
}
// preallocating for an estimated large number of 10000 found PIs
// this number will be iteratively increased when the found PIs reach the upper limit
int estimPI = 250000;
// the index of the PIs, in descending order of their number of covered ON-set minterms
int *p_covered = R_Calloc(estimPI, int);
// many PIs will have the same coverage, but they don't necessarily cover the same minterms
// to employ row dominance when solving the PI chart, we need to compare the coverage of the
// current PI with the coverage of the previous PIs. If this PI survives the comparison, its
// coverage has to be added in the p_covered vector, and its order in the p_covered
// vector, at the last index of the PI coverage with the same number of minterms
int last_index[posrows]; // descending order
// p_pichart = malloc(estimPI * posrows * sizeof(int));
// memset(p_pichart, false, estimPI * posrows * sizeof(int));
int *p_pichart = (int *) R_Calloc(estimPI * posrows, int);
// prefixing (int *) prefills in all values with 0s
int pichart_words = (posrows + BITS_PER_WORD - 1) / BITS_PER_WORD; // Words needed per PI chart columns
unsigned int *p_pichart_pos = (unsigned int *) R_Calloc(estimPI * pichart_words, unsigned int);
int implicant_words = (ninputs + BITS_PER_WORD - 1) / BITS_PER_WORD; // Words needed per PI representation
unsigned int *p_implicants_pos = (unsigned int *) R_Calloc(estimPI * implicant_words, unsigned int);
unsigned int *p_implicants_val = (unsigned int *) R_Calloc(estimPI * implicant_words, unsigned int);
int prevfoundPI = 0; // the number of previously found PIs
int foundPI = 0;
int prevsolmin = 0; // the minimum number of PIs that solve the PI chart
int solmin = 0;
// the positions of the PIs solving the PI chart
// a vector which can never be lengthier than the number of ON minterms (posrows)
int previndices[posrows];
int indices[posrows];
for (int i = 0; i < posrows; i++) {
previndices[i] = 0;
indices[i] = 0;
last_index[i] = 0;
}
Rboolean ON_set_covered = false;
if (PRINT_INFO) {
Rprintf("ON-set minterms: %d\n", posrows);
#ifdef _OPENMP
Rprintf("OpenMP enabled, %d workers\n", omp_get_max_threads());
#endif
}
int stop_counter = 0; // to stop if two consecutive levels of complexity yield no PIs
int k;
for (k = 1; k <= ninputs; k++) {
if (PRINT_INFO) {
Rprintf("---k: %d\n", k);
}
int n_tasks = 512;
struct ccubes_context *ctx = NULL;
for (int task = 0; task < nchoosek(ninputs, k); task+=n_tasks) {
bool *coverage;
unsigned int *fixed_bits;
unsigned int *value_bits;
unsigned int *pichart_values;
ctx = ccubes_do_tasks(n_tasks,
task,
k,
ninputs,
posrows,
negrows,
implicant_words,
value_bit_width,
pichart_words,
estimPI,
nofvalues,
ON_set,
OFF_set,
p_implicants_pos,
p_implicants_val,
last_index,
p_covered,
p_pichart_pos,
coverage,
fixed_bits,
value_bits,
pichart_values
);
if (ctx == NULL) {
log_error("ccubes", "ccubes_do_tasks failed");
}
for (int i = 0; i < n_tasks; i++) {
log_debug("ccubes", "Task %d", i);
log_debug_raw("ccubes", "coverage[%d]:", i);
for (int j = 0; j < posrows; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_coverage[i * posrows + j]);
}
log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "fixed_bits[%d]:", i);
for (int j = 0; j < implicant_words; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_fixed_bits[i * implicant_words + j]);
}
log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "value_bits[%d]:", i);
for (int j = 0; j < implicant_words; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_value_bits[i * implicant_words + j]);
}
log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "pichart_values[%d]:", i);
for (int j = 0; j < pichart_words; j++) {
log_debug_raw("ccubes", " %d",
ctx->h_pichart_values[i * pichart_words + j]);
}
log_debug_raw("ccubes", "\n");
}
}
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic)
#endif
for (int task = 0; task < nchoosek(ninputs, k); task++) {
int tempk[k];
int x = 0;
int start_point = task;
// fill the combination for the current task
for (int i = 0; i < k; i++) {
while (nchoosek(ninputs - (x + 1), k - (i + 1)) <= start_point) {
start_point -= nchoosek(ninputs - (x + 1), k - (i + 1));
x++;
}
tempk[i] = x;
x++;
}
// allocate vectors of decimal row numbers for the positive and negative rows
int decpos[posrows];
int decneg[negrows];
// create the vector of multiple bases, useful when calculating the decimal representation
// of a particular combination of columns, for each row
int mbase[k];
mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input
// calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
// 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
for (int i = 1; i < k; i++) {
mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
}
// calculate decimal numbers, using mbase, fills in decpos and decneg
for (int r = 0; r < posrows; r++) {
decpos[r] = 0;
for (int c = 0; c < k; c++) {
decpos[r] += ON_set[tempk[c] * posrows + r] * mbase[c];
}
}
for (int r = 0; r < negrows; r++) {
decneg[r] = 0;
for (int c = 0; c < k; c++) {
decneg[r] += OFF_set[tempk[c] * negrows + r] * mbase[c];
}
}
int possible_rows[posrows];
Rboolean possible_cover[posrows];
possible_cover[0] = true; // Rboolean flag, to be set with false if found among the OFF set
int found = 0;
// identifies all unique decimal rows, for the selected combination of k inputs
for (int r = 0; r < posrows; r++) {
int prev = 0;
Rboolean unique = true; // Rboolean flag, assume the row is unique
while (prev < found && unique) {
unique = decpos[possible_rows[prev]] != decpos[r];
prev++;
}
if (unique) {
possible_rows[found] = r;
possible_cover[found] = true;
found++;
}
}
if (found > 0) {
// some of the ON set numbers are possible PIs (not found in the OFF set)
int frows[found];
// verify if this is a possible PI
// (if the same decimal number is not found in the OFF set)
for (int i = found - 1; i >= 0; i--) {
int j = 0;
while (j < negrows && possible_cover[i]) {
if (decpos[possible_rows[i]] == decneg[j]) {
possible_cover[i] = false;
found--;
}
j++;
}
if (possible_cover[i]) {
frows[found - i - 1] = possible_rows[i];
}
}
// Rprintf("task: %d; rows: %d\n", task, found);
for (int f = 0; f < found; f++) {
// create a temporary vector of length k, containing the values from the initial ON set
// plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
int tempc[k];
// using bit shifting, store the fixed bits and value bits
unsigned int fixed_bits[implicant_words];
unsigned int value_bits[implicant_words];
for (int i = 0; i < implicant_words; i++) {
fixed_bits[i] = 0U;
value_bits[i] = 0U;
}
for (int c = 0; c < k; c++) {
int value = ON_set[tempk[c] * posrows + frows[f]];
tempc[c] = value + 1;
int word_index = tempk[c] / BITS_PER_WORD;
int bit_index = tempk[c] % BITS_PER_WORD;
fixed_bits[word_index] |= 1U << bit_index;
value_bits[word_index] |= (unsigned int)value << (bit_index * value_bit_width);
}
// check if the current PI is not redundant
Rboolean redundant = false;
int i = 0;
while (i < prevfoundPI && !redundant) {
// /*
// - ck contains the complexity level for each of the previously found non-redundant PIs
// - indx is a matrix containing the indexes of the columns where the values were stored
// - a redundant PI is one for which all values from a previous PI are exactly the same:
// 0 0 1 2 0, let's say previously found PI
// which means a corresponding ck = 2 and a corresponding indx = [3, 4]
// 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
// therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
// */
Rboolean is_subset = true; // Assume it's a subset unless proven otherwise
for (int w = 0; w < implicant_words; w++) {
// If the new PI has values on positions outside the existing PIs fixed positions, its not a subset
if ((fixed_bits[w] & p_implicants_pos[i * implicant_words + w]) != p_implicants_pos[i * implicant_words + w]) {
is_subset = false;
break;
}
// then compare the value bits, if one or more values on those positions are different, its not a subset
if ((value_bits[w] & p_implicants_val[i * implicant_words + w]) != p_implicants_val[i * implicant_words + w]) {
is_subset = false;
break;
}
}
redundant = is_subset;
i++;
}
if (redundant) continue;
Rboolean coverage[posrows];
int covsum = 0;
unsigned int pichart_values[pichart_words];
for (int w = 0; w < pichart_words; w++) {
pichart_values[w] = 0U;
}
for (int r = 0; r < posrows; r++) {
coverage[r] = decpos[r] == decpos[frows[f]];
if (coverage[r]) {
int word_index = r / BITS_PER_WORD;
int bit_index = r % BITS_PER_WORD;
pichart_values[word_index] |= (1U << bit_index);
}
covsum += coverage[r];
}
// verify row dominance
int rd = 0;
while (rd < last_index[covsum - 1] && !redundant) {
bool dominated = true;
for (int w = 0; w < pichart_words; w++) {
if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * pichart_words + w]) != pichart_values[w]) {
dominated = false;
break;
}
}
redundant = dominated;
rd++;
}
if (redundant) continue;
// Rprintf("It is a prime implicant\n");
// This operation first gets a new index to push in the global array in a concurrent way
// Then adds the result there.
// We could synchronize only the index and let the copy operation happen in parallel BUT this
// creates a false sharing problem and the performance is down by several factors.
#ifdef _OPENMP
#pragma omp critical
#endif
{
// push the PI information to the global arrays
for (int i = foundPI; i > last_index[covsum - 1]; i--) {
p_covered[i] = p_covered[i - 1];
}
p_covered[last_index[covsum - 1]] = foundPI;
for (int l = 1; l < covsum; l++) {
last_index[l - 1] += 1;
}
for (int w = 0; w < implicant_words; w++) {
p_implicants_pos[implicant_words * foundPI + w] = fixed_bits[w];
p_implicants_val[implicant_words * foundPI + w] = value_bits[w];
}
// populate the PI chart
for (int r = 0; r < posrows; r++) {
for (int w = 0; w < pichart_words; w++) {
p_pichart_pos[foundPI * pichart_words + w] = pichart_values[w];
}
p_pichart[posrows * foundPI + r] = coverage[r];
}
++foundPI;
// when needed, increase allocated memory
if (foundPI / estimPI > 0.9) {
int old_size = estimPI;
estimPI += 100000;
p_pichart = R_Realloc(p_pichart, posrows * estimPI, int);
p_pichart_pos = R_Realloc(p_pichart_pos, estimPI, unsigned int);
p_implicants_val = R_Realloc(p_implicants_val, ninputs * estimPI, unsigned int);
p_implicants_pos = R_Realloc(p_implicants_pos, ninputs * estimPI, unsigned int);
p_covered = R_Realloc(p_covered, estimPI, int);
for (unsigned int i = old_size; i < posrows * estimPI; i++) {
p_pichart[i] = 0;
}
for (unsigned int i = old_size; i < estimPI; i++) {
p_pichart_pos[i] = 0U;
}
for (unsigned int i = old_size; i < ninputs * estimPI; i++) {
p_implicants_val[i] = 0U;
p_implicants_pos[i] = 0U;
}
if (PRINT_INFO) {
multiplier++;
Rprintf("%dx ", multiplier);
}
}
}
}
}
}
nofpi[k - 1] = foundPI;
if (foundPI > 0 && !ON_set_covered) {
Rboolean test_coverage = true;
int r = 0;
while (r < posrows && test_coverage) {
Rboolean minterm_covered = false;
int c = 0;
while (c < foundPI && !minterm_covered) {
minterm_covered = p_pichart[c * posrows + r];
c++;
}
test_coverage = minterm_covered;
r++;
}
ON_set_covered = test_coverage;
}
if (ON_set_covered) {
// Rprintf("posrows: %d; foundPI: %d\n", posrows, foundPI);
if (PRINT_INFO) {
gettimeofday(&end, NULL); // End time
elapsed_time = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1e6;
Rprintf("Time taken finding %d PIs: %f sec.\n", foundPI, elapsed_time);
gettimeofday(&start, NULL);
}
SEXP pic = PROTECT(allocMatrix(INTSXP, posrows, foundPI));
for (long long unsigned int i = 0; i < posrows * foundPI; i++) {
INTEGER(pic)[i] = p_pichart[i];
}
SEXP PIlayers = PROTECT(allocVector(INTSXP, ninputs));
for (int i = 0; i < ninputs; i++) {
INTEGER(PIlayers)[i] = nofpi[i];
}
setAttrib(pic, install("PIlayers"), PIlayers);
// if this file is run directly using SHLIB, the following line is needed
// R_ParseEvalString("library(IEEE)", R_GlobalEnv);
SEXP pkg_env = PROTECT(R_FindNamespace(mkString("IEEE")));
SEXP solvechart = PROTECT(Rf_findVarInFrame(pkg_env, Rf_install("solvechart")));
SEXP evalinR = PROTECT(R_tryEval(Rf_lang2(solvechart, pic), pkg_env, NULL));
solmin = length(evalinR);
for (int i = 0; i < solmin; i++) {
indices[i] = INTEGER(evalinR)[i] - 1; // R is 1-based
}
UNPROTECT(5);
// Rprintf("solution minima: %d\n", solmin);
if (PRINT_INFO) {
gettimeofday(&end, NULL);
elapsed_time = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1e6;
Rprintf("Time spent solving the PI chart: %f sec.\n", elapsed_time);
gettimeofday(&start, NULL);
}
if (solmin == prevsolmin) {
// the minimum number of PIs did not change in the current level of complexity
// we can safely retain the less complex PIs from the previous level
for (int i = 0; i < solmin; i++) {
indices[i] = previndices[i];
}
stop_counter += 1;
}
else {
// this means solmin is in fact smaller than the previously found solmin
// or it is the very first time a solmin was found
// only here it makes sense to overwrite prevsolmin and previndices,
// otherwise they are just as good as the ones from the previous level
prevsolmin = solmin;
for (int i = 0; i < solmin; i++) {
previndices[i] = indices[i];
}
stop_counter = 0;
}
}
prevfoundPI = foundPI;
// printf("stop_counter: %d\n", stop_counter);
// One level of complexity up, and the solution minima does not change
if (stop_counter > 0) {
// the search can stop
break;
}
}
// printf("solmin: %d\n", solmin);
if (PRINT_INFO) {
Rprintf("--- END ---\n");
}
SEXP sol = PROTECT(allocMatrix(INTSXP, solmin, ninputs));
int *p_sol = INTEGER(sol);
for (int c = 0; c < solmin; c++) {
for (int r = 0; r < ninputs; r++) {
unsigned int value = 0;
int word_index = r / BITS_PER_WORD; // Word index within the implicant
int bit_index = r % BITS_PER_WORD; // Bit position within the word
if (p_implicants_pos[indices[c] * implicant_words + word_index] & (1U << bit_index)) {
value = 1U + ((p_implicants_val[indices[c] * implicant_words + word_index] >> (bit_index * value_bit_width)) & ((1U << value_bit_width) - 1U));
}
p_sol[r * solmin + c] = value; // transposed
}
}
R_Free(p_pichart);
R_Free(p_implicants_val);
R_Free(p_implicants_pos);
R_Free(p_pichart_pos);
R_Free(p_covered);
UNPROTECT(1);
return (sol);
}
long long unsigned int nchoosek(
int n,
int k
) {
if (k == 0 || k == n) return 1;
if (k == 1) return n;
long long unsigned int result = 1;
if (k > n - k) {
k = n - k;
}
for (int i = 0; i < k; i++) {
result = result * (n - i) / (i + 1);
}
return result;
}