Adapt OMP critical section for the OpenCL variant.

This commit is contained in:
Paul Irofti 2025-03-29 15:16:39 +02:00
parent 2bb176f813
commit feb82c35df

View file

@ -11,11 +11,6 @@
#include <math.h> #include <math.h>
#include "CCubes.h" #include "CCubes.h"
#ifdef _OPENMP
#undef match
#include <omp.h>
#endif
#include "real.h" #include "real.h"
#include "cl_setup.h" #include "cl_setup.h"
@ -216,9 +211,6 @@ SEXP CCubes(SEXP tt) {
Rboolean ON_set_covered = false; Rboolean ON_set_covered = false;
if (PRINT_INFO) { if (PRINT_INFO) {
Rprintf("ON-set minterms: %d\n", posrows); Rprintf("ON-set minterms: %d\n", posrows);
#ifdef _OPENMP
Rprintf("OpenMP enabled, %d workers\n", omp_get_max_threads());
#endif
} }
@ -275,251 +267,44 @@ SEXP CCubes(SEXP tt) {
pichart_values pichart_values
); );
for (int i = 0; i < current_batch; i++) { for (int current_task = 0; current_task < current_batch; current_task++) {
log_debug("ccubes", "Task %d", i); log_debug("ccubes", "Task %d", current_task);
log_debug_raw("ccubes", "redundant[%d]: %d\n", i, ctx->h_redundant[i]); log_debug_raw("ccubes", "redundant[%d]: %d\n", current_task, ctx->h_redundant[current_task]);
log_debug_raw("ccubes", "coverage[%d]:", i); log_debug_raw("ccubes", "coverage[%d]:", current_task);
for (int j = 0; j < posrows; j++) { for (int j = 0; j < posrows; j++) {
log_debug_raw("ccubes", " %d", log_debug_raw("ccubes", " %d",
ctx->h_coverage[i * posrows + j]); ctx->h_coverage[current_task * posrows + j]);
} }
log_debug_raw("ccubes", "\n"); log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "fixed_bits[%d]:", i); log_debug_raw("ccubes", "fixed_bits[%d]:", current_task);
for (int j = 0; j < implicant_words; j++) { for (int j = 0; j < implicant_words; j++) {
log_debug_raw("ccubes", " %d", log_debug_raw("ccubes", " %d",
ctx->h_fixed_bits[i * implicant_words + j]); ctx->h_fixed_bits[current_task * implicant_words + j]);
} }
log_debug_raw("ccubes", "\n"); log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "value_bits[%d]:", i); log_debug_raw("ccubes", "value_bits[%d]:", current_task);
for (int j = 0; j < implicant_words; j++) { for (int j = 0; j < implicant_words; j++) {
log_debug_raw("ccubes", " %d", log_debug_raw("ccubes", " %d",
ctx->h_value_bits[i * implicant_words + j]); ctx->h_value_bits[current_task * implicant_words + j]);
} }
log_debug_raw("ccubes", "\n"); log_debug_raw("ccubes", "\n");
log_debug_raw("ccubes", "pichart_values[%d]:", i); log_debug_raw("ccubes", "pichart_values[%d]:", current_task);
for (int j = 0; j < pichart_words; j++) { for (int j = 0; j < pichart_words; j++) {
log_debug_raw("ccubes", " %d", log_debug_raw("ccubes", " %d",
ctx->h_pichart_values[i * pichart_words + j]); ctx->h_pichart_values[current_task * pichart_words + j]);
} }
log_debug_raw("ccubes", "\n"); log_debug_raw("ccubes", "\n");
}
/* change to something less aggresive for reuse */ if (!ctx->h_redundant[current_task]) {
ccubes_clean_up(ctx);
}
#ifdef _OPENMP
#pragma omp parallel for schedule(dynamic)
#endif
for (int task = 0; task < n_tasks; task++) {
int tempk[k];
int x = 0;
int combination = task;
// fill the combination for the current task / combination number
for (int i = 0; i < k; i++) {
while (nchoosek(ninputs - (x + 1), k - (i + 1)) <= combination) {
combination -= nchoosek(ninputs - (x + 1), k - (i + 1));
x++;
}
tempk[i] = x;
x++;
}
// allocate vectors of decimal numbers for the ON-set and OFF-set rows
int decpos[posrows];
int decneg[negrows];
// create the vector of multiple bases, useful when calculating the decimal representation
// of a particular combination of columns, for each row
int mbase[k];
mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input
// calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
// 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
for (int i = 1; i < k; i++) {
mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
}
// calculate decimal numbers, using mbase, fills in decpos and decneg
for (int r = 0; r < posrows; r++) {
decpos[r] = 0;
for (int c = 0; c < k; c++) {
decpos[r] += ON_set[tempk[c] * posrows + r] * mbase[c];
}
}
for (int r = 0; r < negrows; r++) {
decneg[r] = 0;
for (int c = 0; c < k; c++) {
decneg[r] += OFF_set[tempk[c] * negrows + r] * mbase[c];
}
}
int possible_rows[posrows];
Rboolean possible_cover[posrows];
possible_cover[0] = true; // boolean flag, to be set with false if found among the OFF set
int found = 0;
// identifies all unique decimal rows, for the selected combination of k inputs
for (int r = 0; r < posrows; r++) {
int prev = 0;
Rboolean unique = true; // Rboolean flag, assume the row is unique
while (prev < found && unique) {
unique = decpos[possible_rows[prev]] != decpos[r];
prev++;
}
if (unique) {
possible_rows[found] = r;
possible_cover[found] = true;
found++;
}
}
if (found > 0) {
// some of the ON set numbers are possible PIs (not found in the OFF set)
int frows[found];
// verify if this is a possible PI
// (if the same decimal number is not found in the OFF set)
for (int i = found - 1; i >= 0; i--) {
int j = 0;
while (j < negrows && possible_cover[i]) {
if (decpos[possible_rows[i]] == decneg[j]) {
possible_cover[i] = false;
found--;
}
j++;
}
if (possible_cover[i]) {
frows[found - i - 1] = possible_rows[i];
}
}
// Rprintf("task: %d; rows: %d\n", task, found);
for (int f = 0; f < found; f++) {
// create a temporary vector of length k, containing the values from the initial ON set
// plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
int tempc[k];
// using bit shifting, store the fixed bits and value bits
unsigned int fixed_bits[implicant_words];
unsigned int value_bits[implicant_words];
for (int i = 0; i < implicant_words; i++) {
fixed_bits[i] = 0U;
value_bits[i] = 0U;
}
for (int c = 0; c < k; c++) {
int value = ON_set[tempk[c] * posrows + frows[f]];
tempc[c] = value + 1;
int word_index = tempk[c] / (BITS_PER_WORD / value_bit_width);
int bit_index = (tempk[c] % (BITS_PER_WORD / value_bit_width)) * value_bit_width;
fixed_bits[word_index] |= (value_bit_mask << bit_index);
value_bits[word_index] |= ((unsigned int)value << bit_index);
}
// check if the current PI is not redundant
Rboolean redundantOMP = false;
int i = 0;
while (i < prevfoundPI && !redundantOMP) {
// /*
// - ck contains the complexity level for each of the previously found non-redundant PIs
// - indx is a matrix containing the indexes of the columns where the values were stored
// - a redundant PI is one for which all values from a previous PI are exactly the same:
// 0 0 1 2 0, let's say previously found PI
// which means a corresponding ck = 2 and a corresponding indx = [3, 4]
// 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
// therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
// */
Rboolean is_subset = true; // Assume it's a subset unless proven otherwise
for (int w = 0; w < implicant_words; w++) {
// If the new PI has values on positions outside the existing PIs fixed positions, its not a subset
unsigned int index_mask = p_implicants_pos[i * implicant_words + w];
if ((fixed_bits[w] & index_mask) != index_mask) {
is_subset = false;
break;
}
// then compare the value bits, if one or more values on those positions are different, its not a subset
if ((value_bits[w] & index_mask) != (p_implicants_val[i * implicant_words + w] & index_mask)) {
is_subset = false;
break;
}
}
redundantOMP = is_subset;
i++;
}
if (redundantOMP) continue;
Rboolean coverage[posrows];
int covsum = 0; int covsum = 0;
unsigned int pichart_values[pichart_words]; for (int i = 0; i < posrows; i++) {
for (int w = 0; w < pichart_words; w++) { covsum += ctx->h_coverage[current_task * posrows + i];
pichart_values[w] = 0U;
} }
for (int r = 0; r < posrows; r++) {
coverage[r] = decpos[r] == decpos[frows[f]];
if (coverage[r]) {
int word_index = r / BITS_PER_WORD;
int bit_index = r % BITS_PER_WORD;
pichart_values[word_index] |= (1U << bit_index);
}
covsum += coverage[r];
}
// verify row dominance
int rd = 0;
while (rd < last_index[covsum - 1] && !redundantOMP) {
bool dominated = true;
for (int w = 0; w < pichart_words; w++) {
if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * pichart_words + w]) != pichart_values[w]) {
dominated = false;
break;
}
}
redundantOMP = dominated;
rd++;
}
if (redundantOMP) continue;
// Rprintf("It is a prime implicant\n");
// This operation first gets a new index to push in the global array in a concurrent way
// Then adds the result there.
// We could synchronize only the index and let the copy operation happen in parallel BUT this
// creates a false sharing problem and the performance is down by several factors.
#ifdef _OPENMP
#pragma omp critical
#endif
{
// push the PI information to the global arrays // push the PI information to the global arrays
for (int i = foundPI; i > last_index[covsum - 1]; i--) { for (int i = foundPI; i > last_index[covsum - 1]; i--) {
@ -533,17 +318,17 @@ SEXP CCubes(SEXP tt) {
} }
for (int w = 0; w < implicant_words; w++) { for (int w = 0; w < implicant_words; w++) {
p_implicants_pos[implicant_words * foundPI + w] = fixed_bits[w]; p_implicants_pos[implicant_words * foundPI + w] = ctx->h_fixed_bits[current_task * implicant_words + w];
p_implicants_val[implicant_words * foundPI + w] = value_bits[w]; p_implicants_val[implicant_words * foundPI + w] = ctx->h_value_bits[current_task * implicant_words + w];
} }
// populate the coverage matrix // populate the coverage matrix
for (int r = 0; r < posrows; r++) { for (int r = 0; r < posrows; r++) {
for (int w = 0; w < pichart_words; w++) { for (int w = 0; w < pichart_words; w++) {
p_pichart_pos[foundPI * pichart_words + w] = pichart_values[w]; p_pichart_pos[foundPI * pichart_words + w] = ctx->h_pichart_values[current_task * pichart_words + w];
} }
p_pichart[posrows * foundPI + r] = coverage[r]; p_pichart[posrows * foundPI + r] = ctx->h_coverage[current_task * posrows + r];
} }
++foundPI; ++foundPI;
@ -575,7 +360,9 @@ SEXP CCubes(SEXP tt) {
} }
} }
} }
}
/* change to something less aggresive for reuse */
ccubes_clean_up(ctx);
} }
nofpi[k - 1] = foundPI; nofpi[k - 1] = foundPI;