#include #include #include #include #include #include #include #include #include #include "CCubes.h" #ifdef _OPENMP #undef match #include #endif #include "real.h" #include "cl_setup.h" #include "clccubes.h" #include "config.h" #include "logging.h" SEXP CCubes(SEXP tt) { // $ export BITS_PER_WORD=32 in the Terminal, before running R // 32 bits per word, in bit shifting representation char *bits_per_word = getenv("BITS_PER_WORD"); // Read from the PATH int BITS_PER_WORD = bits_per_word ? atoi(bits_per_word) : 32; if (BITS_PER_WORD != 8 && BITS_PER_WORD != 16 && BITS_PER_WORD != 32 && BITS_PER_WORD != 64) { BITS_PER_WORD = 32; // Default to 32 } // $ export PRINT_INFO=1 in the Terminal, before running R char *print_info = getenv("PRINT_INFO"); // Read from the PATH Rboolean PRINT_INFO = print_info && print_info[0] == '1'; int multiplier = 0; struct timeval start, end; double elapsed_time; config_set_int("log", LOG_LEVEL_WARN); config_set_int("log:clccubes", LOG_LEVEL_WARN); config_set_int("log:ccubes", LOG_LEVEL_DEBUG); config_set_int("log:cl", LOG_LEVEL_DEBUG); char log[100] = {0}; sprintf(log, "log-ccubes"); config_set_string("out", log); if (PRINT_INFO) { Rprintf("--- START ---\n"); gettimeofday(&start, NULL); // Start time } int *p_tt = INTEGER(tt); int ttrows = nrows(tt); // number of rows in the data matrix int ninputs = ncols(tt) - 1; // number of inputs (columns - 1, the last one is the outcome) // calculate the number of positive output rows (the ON set) int posrows = 0; for (int r = 0; r < ttrows; r++) { posrows += p_tt[ninputs * ttrows + r]; } // calculate the number of negative output rows (the OFF set) int negrows = ttrows - posrows; if (negrows == 0) { // if there are no negative output rows, no PIs can be found // all inputs will be completely minimized return(R_NilValue); } // split the minterms in the ON and OFF set matrices int ON_set[posrows * ninputs]; int OFF_set[ninputs * negrows]; int rowpos = 0, rowneg = 0; int max_value = 0; for (int r = 0; r < ttrows; r++) { if (p_tt[ninputs * ttrows + r] == 1) { // positive for (int c = 0; c < ninputs; c++) { int value = p_tt[c * ttrows + r]; ON_set[c * posrows + rowpos] = value; if (value > max_value) { max_value = value; } } rowpos += 1; } else { // negative for (int c = 0; c < ninputs; c++) { int value = p_tt[c * ttrows + r]; OFF_set[c * negrows + rowneg] = value; if (value > max_value) { max_value = value; } } rowneg += 1; } } int value_bit_width = 0; while (max_value > 0) { max_value >>= 1; // Shift right until no bits remain value_bit_width++; } // calculate the number of values (biggest number) for each input int nofvalues[ninputs]; int nofpi[ninputs]; for (int i = 0; i < ninputs; i++) { nofvalues[i] = 0; // initialize nofpi[i] = 0; // initialize for (int r = 0; r < ttrows; r++) { if (nofvalues[i] < p_tt[i * ttrows + r]) { nofvalues[i] = p_tt[i * ttrows + r]; } } // add 1 because if the biggest number is 2 then it has three levels: 0, 1 and 2 nofvalues[i] += 1; if (nofvalues[i] == 1) { // no input ever has less than two values nofvalues[i] = 2; } } // preallocating for an estimated large number of 10000 found PIs // this number will be iteratively increased when the found PIs reach the upper limit int estimPI = 250000; // the index of the PIs, in descending order of their number of covered ON-set minterms int *p_covered = R_Calloc(estimPI, int); // many PIs will have the same coverage, but they don't necessarily cover the same minterms // to employ row dominance when solving the PI chart, we need to compare the coverage of the // current PI with the coverage of the previous PIs. If this PI survives the comparison, its // coverage has to be added in the p_covered vector, and its order in the p_covered // vector, at the last index of the PI coverage with the same number of minterms int last_index[posrows]; // descending order // p_pichart = malloc(estimPI * posrows * sizeof(int)); // memset(p_pichart, false, estimPI * posrows * sizeof(int)); int *p_pichart = (int *) R_Calloc(estimPI * posrows, int); // prefixing (int *) prefills in all values with 0s int pichart_words = (posrows + BITS_PER_WORD - 1) / BITS_PER_WORD; // Words needed per PI chart columns unsigned int *p_pichart_pos = (unsigned int *) R_Calloc(estimPI * pichart_words, unsigned int); int implicant_words = (ninputs + BITS_PER_WORD - 1) / BITS_PER_WORD; // Words needed per PI representation unsigned int *p_implicants_pos = (unsigned int *) R_Calloc(estimPI * implicant_words, unsigned int); unsigned int *p_implicants_val = (unsigned int *) R_Calloc(estimPI * implicant_words, unsigned int); int prevfoundPI = 0; // the number of previously found PIs int foundPI = 0; int prevsolmin = 0; // the minimum number of PIs that solve the PI chart int solmin = 0; // the positions of the PIs solving the PI chart // a vector which can never be lengthier than the number of ON minterms (posrows) int previndices[posrows]; int indices[posrows]; for (int i = 0; i < posrows; i++) { previndices[i] = 0; indices[i] = 0; last_index[i] = 0; } Rboolean ON_set_covered = false; if (PRINT_INFO) { Rprintf("ON-set minterms: %d\n", posrows); #ifdef _OPENMP Rprintf("OpenMP enabled, %d workers\n", omp_get_max_threads()); #endif } int stop_counter = 0; // to stop if two consecutive levels of complexity yield no PIs int k; for (k = 1; k <= ninputs; k++) { if (PRINT_INFO) { Rprintf("---k: %d\n", k); } int n_tasks = 512; struct ccubes_context *ctx = NULL; for (int task = 0; task < nchoosek(ninputs, k); task+=n_tasks) { bool *coverage; unsigned int *fixed_bits; unsigned int *value_bits; unsigned int *pichart_values; ctx = ccubes_do_tasks(n_tasks, task, k, ninputs, posrows, negrows, implicant_words, value_bit_width, pichart_words, estimPI, nofvalues, ON_set, OFF_set, p_implicants_pos, p_implicants_val, last_index, p_covered, p_pichart_pos, coverage, fixed_bits, value_bits, pichart_values ); if (ctx == NULL) { log_error("ccubes", "ccubes_do_tasks failed"); } for (int i = 0; i < n_tasks; i++) { log_debug("ccubes", "Task %d", i); log_debug_raw("ccubes", "coverage[%d]:", i); for (int j = 0; j < posrows; j++) { log_debug_raw("ccubes", " %d", ctx->h_coverage[i * posrows + j]); } log_debug_raw("ccubes", "\n"); log_debug_raw("ccubes", "fixed_bits[%d]:", i); for (int j = 0; j < implicant_words; j++) { log_debug_raw("ccubes", " %d", ctx->h_fixed_bits[i * implicant_words + j]); } log_debug_raw("ccubes", "\n"); log_debug_raw("ccubes", "value_bits[%d]:", i); for (int j = 0; j < implicant_words; j++) { log_debug_raw("ccubes", " %d", ctx->h_value_bits[i * implicant_words + j]); } log_debug_raw("ccubes", "\n"); log_debug_raw("ccubes", "pichart_values[%d]:", i); for (int j = 0; j < pichart_words; j++) { log_debug_raw("ccubes", " %d", ctx->h_pichart_values[i * pichart_words + j]); } log_debug_raw("ccubes", "\n"); } } #ifdef _OPENMP #pragma omp parallel for schedule(dynamic) #endif for (int task = 0; task < nchoosek(ninputs, k); task++) { int tempk[k]; int x = 0; int start_point = task; // fill the combination for the current task for (int i = 0; i < k; i++) { while (nchoosek(ninputs - (x + 1), k - (i + 1)) <= start_point) { start_point -= nchoosek(ninputs - (x + 1), k - (i + 1)); x++; } tempk[i] = x; x++; } // allocate vectors of decimal row numbers for the positive and negative rows int decpos[posrows]; int decneg[negrows]; // create the vector of multiple bases, useful when calculating the decimal representation // of a particular combination of columns, for each row int mbase[k]; mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input // calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with // 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6 for (int i = 1; i < k; i++) { mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]]; } // calculate decimal numbers, using mbase, fills in decpos and decneg for (int r = 0; r < posrows; r++) { decpos[r] = 0; for (int c = 0; c < k; c++) { decpos[r] += ON_set[tempk[c] * posrows + r] * mbase[c]; } } for (int r = 0; r < negrows; r++) { decneg[r] = 0; for (int c = 0; c < k; c++) { decneg[r] += OFF_set[tempk[c] * negrows + r] * mbase[c]; } } int possible_rows[posrows]; Rboolean possible_cover[posrows]; possible_cover[0] = true; // Rboolean flag, to be set with false if found among the OFF set int found = 0; // identifies all unique decimal rows, for the selected combination of k inputs for (int r = 0; r < posrows; r++) { int prev = 0; Rboolean unique = true; // Rboolean flag, assume the row is unique while (prev < found && unique) { unique = decpos[possible_rows[prev]] != decpos[r]; prev++; } if (unique) { possible_rows[found] = r; possible_cover[found] = true; found++; } } if (found > 0) { // some of the ON set numbers are possible PIs (not found in the OFF set) int frows[found]; // verify if this is a possible PI // (if the same decimal number is not found in the OFF set) for (int i = found - 1; i >= 0; i--) { int j = 0; while (j < negrows && possible_cover[i]) { if (decpos[possible_rows[i]] == decneg[j]) { possible_cover[i] = false; found--; } j++; } if (possible_cover[i]) { frows[found - i - 1] = possible_rows[i]; } } // Rprintf("task: %d; rows: %d\n", task, found); for (int f = 0; f < found; f++) { // create a temporary vector of length k, containing the values from the initial ON set // plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc. int tempc[k]; // using bit shifting, store the fixed bits and value bits unsigned int fixed_bits[implicant_words]; unsigned int value_bits[implicant_words]; for (int i = 0; i < implicant_words; i++) { fixed_bits[i] = 0U; value_bits[i] = 0U; } for (int c = 0; c < k; c++) { int value = ON_set[tempk[c] * posrows + frows[f]]; tempc[c] = value + 1; int word_index = tempk[c] / BITS_PER_WORD; int bit_index = tempk[c] % BITS_PER_WORD; fixed_bits[word_index] |= 1U << bit_index; value_bits[word_index] |= (unsigned int)value << (bit_index * value_bit_width); } // check if the current PI is not redundant Rboolean redundant = false; int i = 0; while (i < prevfoundPI && !redundant) { // /* // - ck contains the complexity level for each of the previously found non-redundant PIs // - indx is a matrix containing the indexes of the columns where the values were stored // - a redundant PI is one for which all values from a previous PI are exactly the same: // 0 0 1 2 0, let's say previously found PI // which means a corresponding ck = 2 and a corresponding indx = [3, 4] // 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal // therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2 // */ Rboolean is_subset = true; // Assume it's a subset unless proven otherwise for (int w = 0; w < implicant_words; w++) { // If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset if ((fixed_bits[w] & p_implicants_pos[i * implicant_words + w]) != p_implicants_pos[i * implicant_words + w]) { is_subset = false; break; } // then compare the value bits, if one or more values on those positions are different, it’s not a subset if ((value_bits[w] & p_implicants_val[i * implicant_words + w]) != p_implicants_val[i * implicant_words + w]) { is_subset = false; break; } } redundant = is_subset; i++; } if (redundant) continue; Rboolean coverage[posrows]; int covsum = 0; unsigned int pichart_values[pichart_words]; for (int w = 0; w < pichart_words; w++) { pichart_values[w] = 0U; } for (int r = 0; r < posrows; r++) { coverage[r] = decpos[r] == decpos[frows[f]]; if (coverage[r]) { int word_index = r / BITS_PER_WORD; int bit_index = r % BITS_PER_WORD; pichart_values[word_index] |= (1U << bit_index); } covsum += coverage[r]; } // verify row dominance int rd = 0; while (rd < last_index[covsum - 1] && !redundant) { bool dominated = true; for (int w = 0; w < pichart_words; w++) { if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * pichart_words + w]) != pichart_values[w]) { dominated = false; break; } } redundant = dominated; rd++; } if (redundant) continue; // Rprintf("It is a prime implicant\n"); // This operation first gets a new index to push in the global array in a concurrent way // Then adds the result there. // We could synchronize only the index and let the copy operation happen in parallel BUT this // creates a false sharing problem and the performance is down by several factors. #ifdef _OPENMP #pragma omp critical #endif { // push the PI information to the global arrays for (int i = foundPI; i > last_index[covsum - 1]; i--) { p_covered[i] = p_covered[i - 1]; } p_covered[last_index[covsum - 1]] = foundPI; for (int l = 1; l < covsum; l++) { last_index[l - 1] += 1; } for (int w = 0; w < implicant_words; w++) { p_implicants_pos[implicant_words * foundPI + w] = fixed_bits[w]; p_implicants_val[implicant_words * foundPI + w] = value_bits[w]; } // populate the PI chart for (int r = 0; r < posrows; r++) { for (int w = 0; w < pichart_words; w++) { p_pichart_pos[foundPI * pichart_words + w] = pichart_values[w]; } p_pichart[posrows * foundPI + r] = coverage[r]; } ++foundPI; // when needed, increase allocated memory if (foundPI / estimPI > 0.9) { int old_size = estimPI; estimPI += 100000; p_pichart = R_Realloc(p_pichart, posrows * estimPI, int); p_pichart_pos = R_Realloc(p_pichart_pos, estimPI, unsigned int); p_implicants_val = R_Realloc(p_implicants_val, ninputs * estimPI, unsigned int); p_implicants_pos = R_Realloc(p_implicants_pos, ninputs * estimPI, unsigned int); p_covered = R_Realloc(p_covered, estimPI, int); for (unsigned int i = old_size; i < posrows * estimPI; i++) { p_pichart[i] = 0; } for (unsigned int i = old_size; i < estimPI; i++) { p_pichart_pos[i] = 0U; } for (unsigned int i = old_size; i < ninputs * estimPI; i++) { p_implicants_val[i] = 0U; p_implicants_pos[i] = 0U; } if (PRINT_INFO) { multiplier++; Rprintf("%dx ", multiplier); } } } } } } nofpi[k - 1] = foundPI; if (foundPI > 0 && !ON_set_covered) { Rboolean test_coverage = true; int r = 0; while (r < posrows && test_coverage) { Rboolean minterm_covered = false; int c = 0; while (c < foundPI && !minterm_covered) { minterm_covered = p_pichart[c * posrows + r]; c++; } test_coverage = minterm_covered; r++; } ON_set_covered = test_coverage; } if (ON_set_covered) { // Rprintf("posrows: %d; foundPI: %d\n", posrows, foundPI); if (PRINT_INFO) { gettimeofday(&end, NULL); // End time elapsed_time = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1e6; Rprintf("Time taken finding %d PIs: %f sec.\n", foundPI, elapsed_time); gettimeofday(&start, NULL); } SEXP pic = PROTECT(allocMatrix(INTSXP, posrows, foundPI)); for (long long unsigned int i = 0; i < posrows * foundPI; i++) { INTEGER(pic)[i] = p_pichart[i]; } SEXP PIlayers = PROTECT(allocVector(INTSXP, ninputs)); for (int i = 0; i < ninputs; i++) { INTEGER(PIlayers)[i] = nofpi[i]; } setAttrib(pic, install("PIlayers"), PIlayers); // if this file is run directly using SHLIB, the following line is needed // R_ParseEvalString("library(IEEE)", R_GlobalEnv); SEXP pkg_env = PROTECT(R_FindNamespace(mkString("IEEE"))); SEXP solvechart = PROTECT(Rf_findVarInFrame(pkg_env, Rf_install("solvechart"))); SEXP evalinR = PROTECT(R_tryEval(Rf_lang2(solvechart, pic), pkg_env, NULL)); solmin = length(evalinR); for (int i = 0; i < solmin; i++) { indices[i] = INTEGER(evalinR)[i] - 1; // R is 1-based } UNPROTECT(5); // Rprintf("solution minima: %d\n", solmin); if (PRINT_INFO) { gettimeofday(&end, NULL); elapsed_time = (end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec) / 1e6; Rprintf("Time spent solving the PI chart: %f sec.\n", elapsed_time); gettimeofday(&start, NULL); } if (solmin == prevsolmin) { // the minimum number of PIs did not change in the current level of complexity // we can safely retain the less complex PIs from the previous level for (int i = 0; i < solmin; i++) { indices[i] = previndices[i]; } stop_counter += 1; } else { // this means solmin is in fact smaller than the previously found solmin // or it is the very first time a solmin was found // only here it makes sense to overwrite prevsolmin and previndices, // otherwise they are just as good as the ones from the previous level prevsolmin = solmin; for (int i = 0; i < solmin; i++) { previndices[i] = indices[i]; } stop_counter = 0; } } prevfoundPI = foundPI; // printf("stop_counter: %d\n", stop_counter); // One level of complexity up, and the solution minima does not change if (stop_counter > 0) { // the search can stop break; } } // printf("solmin: %d\n", solmin); if (PRINT_INFO) { Rprintf("--- END ---\n"); } SEXP sol = PROTECT(allocMatrix(INTSXP, solmin, ninputs)); int *p_sol = INTEGER(sol); for (int c = 0; c < solmin; c++) { for (int r = 0; r < ninputs; r++) { unsigned int value = 0; int word_index = r / BITS_PER_WORD; // Word index within the implicant int bit_index = r % BITS_PER_WORD; // Bit position within the word if (p_implicants_pos[indices[c] * implicant_words + word_index] & (1U << bit_index)) { value = 1U + ((p_implicants_val[indices[c] * implicant_words + word_index] >> (bit_index * value_bit_width)) & ((1U << value_bit_width) - 1U)); } p_sol[r * solmin + c] = value; // transposed } } R_Free(p_pichart); R_Free(p_implicants_val); R_Free(p_implicants_pos); R_Free(p_pichart_pos); R_Free(p_covered); UNPROTECT(1); return (sol); } long long unsigned int nchoosek( int n, int k ) { if (k == 0 || k == n) return 1; if (k == 1) return n; long long unsigned int result = 1; if (k > n - k) { k = n - k; } for (int i = 0; i < k; i++) { result = result * (n - i) / (i + 1); } return result; }