From feb82c35df1d6eb69529f7fda78ede61d18e119f Mon Sep 17 00:00:00 2001 From: Paul Irofti Date: Sat, 29 Mar 2025 15:16:39 +0200 Subject: [PATCH] Adapt OMP critical section for the OpenCL variant. --- src/CCubes.c | 355 +++++++++++---------------------------------------- 1 file changed, 71 insertions(+), 284 deletions(-) diff --git a/src/CCubes.c b/src/CCubes.c index fa5838b..21b15c6 100755 --- a/src/CCubes.c +++ b/src/CCubes.c @@ -11,11 +11,6 @@ #include #include "CCubes.h" -#ifdef _OPENMP - #undef match - #include -#endif - #include "real.h" #include "cl_setup.h" @@ -216,9 +211,6 @@ SEXP CCubes(SEXP tt) { Rboolean ON_set_covered = false; if (PRINT_INFO) { Rprintf("ON-set minterms: %d\n", posrows); - #ifdef _OPENMP - Rprintf("OpenMP enabled, %d workers\n", omp_get_max_threads()); - #endif } @@ -275,309 +267,104 @@ SEXP CCubes(SEXP tt) { pichart_values ); - for (int i = 0; i < current_batch; i++) { - log_debug("ccubes", "Task %d", i); + for (int current_task = 0; current_task < current_batch; current_task++) { + log_debug("ccubes", "Task %d", current_task); - log_debug_raw("ccubes", "redundant[%d]: %d\n", i, ctx->h_redundant[i]); + log_debug_raw("ccubes", "redundant[%d]: %d\n", current_task, ctx->h_redundant[current_task]); - log_debug_raw("ccubes", "coverage[%d]:", i); + log_debug_raw("ccubes", "coverage[%d]:", current_task); for (int j = 0; j < posrows; j++) { log_debug_raw("ccubes", " %d", - ctx->h_coverage[i * posrows + j]); + ctx->h_coverage[current_task * posrows + j]); } log_debug_raw("ccubes", "\n"); - log_debug_raw("ccubes", "fixed_bits[%d]:", i); + log_debug_raw("ccubes", "fixed_bits[%d]:", current_task); for (int j = 0; j < implicant_words; j++) { log_debug_raw("ccubes", " %d", - ctx->h_fixed_bits[i * implicant_words + j]); + ctx->h_fixed_bits[current_task * implicant_words + j]); } log_debug_raw("ccubes", "\n"); - log_debug_raw("ccubes", "value_bits[%d]:", i); + log_debug_raw("ccubes", "value_bits[%d]:", current_task); for (int j = 0; j < implicant_words; j++) { log_debug_raw("ccubes", " %d", - ctx->h_value_bits[i * implicant_words + j]); + ctx->h_value_bits[current_task * implicant_words + j]); } log_debug_raw("ccubes", "\n"); - log_debug_raw("ccubes", "pichart_values[%d]:", i); + log_debug_raw("ccubes", "pichart_values[%d]:", current_task); for (int j = 0; j < pichart_words; j++) { log_debug_raw("ccubes", " %d", - ctx->h_pichart_values[i * pichart_words + j]); + ctx->h_pichart_values[current_task * pichart_words + j]); } log_debug_raw("ccubes", "\n"); + + if (!ctx->h_redundant[current_task]) { + int covsum = 0; + for (int i = 0; i < posrows; i++) { + covsum += ctx->h_coverage[current_task * posrows + i]; + } + // push the PI information to the global arrays + + for (int i = foundPI; i > last_index[covsum - 1]; i--) { + p_covered[i] = p_covered[i - 1]; + } + + p_covered[last_index[covsum - 1]] = foundPI; + + for (int l = 1; l < covsum; l++) { + last_index[l - 1] += 1; + } + + for (int w = 0; w < implicant_words; w++) { + p_implicants_pos[implicant_words * foundPI + w] = ctx->h_fixed_bits[current_task * implicant_words + w]; + p_implicants_val[implicant_words * foundPI + w] = ctx->h_value_bits[current_task * implicant_words + w]; + } + + // populate the coverage matrix + for (int r = 0; r < posrows; r++) { + for (int w = 0; w < pichart_words; w++) { + p_pichart_pos[foundPI * pichart_words + w] = ctx->h_pichart_values[current_task * pichart_words + w]; + } + + p_pichart[posrows * foundPI + r] = ctx->h_coverage[current_task * posrows + r]; + } + + ++foundPI; + + // when needed, increase allocated memory + if (foundPI / estimPI > 0.9) { + estimPI += 100000; + p_pichart = R_Realloc(p_pichart, posrows * estimPI, int); + p_pichart_pos = R_Realloc(p_pichart_pos, pichart_words * estimPI, unsigned int); + p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int); + p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int); + p_covered = R_Realloc(p_covered, estimPI, int); + + for (unsigned int i = foundPI; i < posrows * estimPI; i++) { + p_pichart[i] = 0; + } + for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) { + p_pichart_pos[i] = 0U; + } + for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) { + p_implicants_val[i] = 0U; + p_implicants_pos[i] = 0U; + } + + if (PRINT_INFO) { + multiplier++; + Rprintf("%dx ", multiplier); + } + } + } } /* change to something less aggresive for reuse */ ccubes_clean_up(ctx); } - #ifdef _OPENMP - #pragma omp parallel for schedule(dynamic) - #endif - - for (int task = 0; task < n_tasks; task++) { - int tempk[k]; - int x = 0; - int combination = task; - - // fill the combination for the current task / combination number - for (int i = 0; i < k; i++) { - while (nchoosek(ninputs - (x + 1), k - (i + 1)) <= combination) { - combination -= nchoosek(ninputs - (x + 1), k - (i + 1)); - x++; - } - tempk[i] = x; - x++; - } - - // allocate vectors of decimal numbers for the ON-set and OFF-set rows - int decpos[posrows]; - int decneg[negrows]; - - // create the vector of multiple bases, useful when calculating the decimal representation - // of a particular combination of columns, for each row - int mbase[k]; - mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input - - // calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with - // 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6 - for (int i = 1; i < k; i++) { - mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]]; - } - - // calculate decimal numbers, using mbase, fills in decpos and decneg - for (int r = 0; r < posrows; r++) { - decpos[r] = 0; - for (int c = 0; c < k; c++) { - decpos[r] += ON_set[tempk[c] * posrows + r] * mbase[c]; - } - } - - for (int r = 0; r < negrows; r++) { - decneg[r] = 0; - for (int c = 0; c < k; c++) { - decneg[r] += OFF_set[tempk[c] * negrows + r] * mbase[c]; - } - } - - - int possible_rows[posrows]; - - Rboolean possible_cover[posrows]; - possible_cover[0] = true; // boolean flag, to be set with false if found among the OFF set - - int found = 0; - - // identifies all unique decimal rows, for the selected combination of k inputs - for (int r = 0; r < posrows; r++) { - int prev = 0; - Rboolean unique = true; // Rboolean flag, assume the row is unique - while (prev < found && unique) { - unique = decpos[possible_rows[prev]] != decpos[r]; - prev++; - } - - if (unique) { - possible_rows[found] = r; - possible_cover[found] = true; - found++; - } - } - - if (found > 0) { - // some of the ON set numbers are possible PIs (not found in the OFF set) - int frows[found]; - - // verify if this is a possible PI - // (if the same decimal number is not found in the OFF set) - for (int i = found - 1; i >= 0; i--) { - int j = 0; - while (j < negrows && possible_cover[i]) { - if (decpos[possible_rows[i]] == decneg[j]) { - possible_cover[i] = false; - found--; - } - j++; - } - - if (possible_cover[i]) { - frows[found - i - 1] = possible_rows[i]; - } - } - // Rprintf("task: %d; rows: %d\n", task, found); - - for (int f = 0; f < found; f++) { - - // create a temporary vector of length k, containing the values from the initial ON set - // plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc. - int tempc[k]; - - // using bit shifting, store the fixed bits and value bits - unsigned int fixed_bits[implicant_words]; - unsigned int value_bits[implicant_words]; - - for (int i = 0; i < implicant_words; i++) { - fixed_bits[i] = 0U; - value_bits[i] = 0U; - } - - for (int c = 0; c < k; c++) { - int value = ON_set[tempk[c] * posrows + frows[f]]; - tempc[c] = value + 1; - - int word_index = tempk[c] / (BITS_PER_WORD / value_bit_width); - int bit_index = (tempk[c] % (BITS_PER_WORD / value_bit_width)) * value_bit_width; - - fixed_bits[word_index] |= (value_bit_mask << bit_index); - value_bits[word_index] |= ((unsigned int)value << bit_index); - } - - // check if the current PI is not redundant - Rboolean redundantOMP = false; - - int i = 0; - while (i < prevfoundPI && !redundantOMP) { - // /* - // - ck contains the complexity level for each of the previously found non-redundant PIs - // - indx is a matrix containing the indexes of the columns where the values were stored - // - a redundant PI is one for which all values from a previous PI are exactly the same: - // 0 0 1 2 0, let's say previously found PI - // which means a corresponding ck = 2 and a corresponding indx = [3, 4] - // 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal - // therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2 - // */ - - Rboolean is_subset = true; // Assume it's a subset unless proven otherwise - - for (int w = 0; w < implicant_words; w++) { - // If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset - unsigned int index_mask = p_implicants_pos[i * implicant_words + w]; - - if ((fixed_bits[w] & index_mask) != index_mask) { - is_subset = false; - break; - } - - // then compare the value bits, if one or more values on those positions are different, it’s not a subset - if ((value_bits[w] & index_mask) != (p_implicants_val[i * implicant_words + w] & index_mask)) { - is_subset = false; - break; - } - } - - redundantOMP = is_subset; - - i++; - } - - if (redundantOMP) continue; - - Rboolean coverage[posrows]; - int covsum = 0; - unsigned int pichart_values[pichart_words]; - for (int w = 0; w < pichart_words; w++) { - pichart_values[w] = 0U; - } - - for (int r = 0; r < posrows; r++) { - coverage[r] = decpos[r] == decpos[frows[f]]; - if (coverage[r]) { - int word_index = r / BITS_PER_WORD; - int bit_index = r % BITS_PER_WORD; - pichart_values[word_index] |= (1U << bit_index); - } - covsum += coverage[r]; - } - - // verify row dominance - int rd = 0; - while (rd < last_index[covsum - 1] && !redundantOMP) { - - bool dominated = true; - for (int w = 0; w < pichart_words; w++) { - if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * pichart_words + w]) != pichart_values[w]) { - dominated = false; - break; - } - } - - redundantOMP = dominated; - rd++; - } - - if (redundantOMP) continue; - - - // Rprintf("It is a prime implicant\n"); - // This operation first gets a new index to push in the global array in a concurrent way - // Then adds the result there. - // We could synchronize only the index and let the copy operation happen in parallel BUT this - // creates a false sharing problem and the performance is down by several factors. - - #ifdef _OPENMP - #pragma omp critical - #endif - { - // push the PI information to the global arrays - - for (int i = foundPI; i > last_index[covsum - 1]; i--) { - p_covered[i] = p_covered[i - 1]; - } - - p_covered[last_index[covsum - 1]] = foundPI; - - for (int l = 1; l < covsum; l++) { - last_index[l - 1] += 1; - } - - for (int w = 0; w < implicant_words; w++) { - p_implicants_pos[implicant_words * foundPI + w] = fixed_bits[w]; - p_implicants_val[implicant_words * foundPI + w] = value_bits[w]; - } - - // populate the coverage matrix - for (int r = 0; r < posrows; r++) { - for (int w = 0; w < pichart_words; w++) { - p_pichart_pos[foundPI * pichart_words + w] = pichart_values[w]; - } - - p_pichart[posrows * foundPI + r] = coverage[r]; - } - - ++foundPI; - - // when needed, increase allocated memory - if (foundPI / estimPI > 0.9) { - estimPI += 100000; - p_pichart = R_Realloc(p_pichart, posrows * estimPI, int); - p_pichart_pos = R_Realloc(p_pichart_pos, pichart_words * estimPI, unsigned int); - p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int); - p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int); - p_covered = R_Realloc(p_covered, estimPI, int); - - for (unsigned int i = foundPI; i < posrows * estimPI; i++) { - p_pichart[i] = 0; - } - for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) { - p_pichart_pos[i] = 0U; - } - for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) { - p_implicants_val[i] = 0U; - p_implicants_pos[i] = 0U; - } - - if (PRINT_INFO) { - multiplier++; - Rprintf("%dx ", multiplier); - } - } - } - } - } - } - nofpi[k - 1] = foundPI; if (foundPI > 0 && !ON_set_covered) {