Adapt OMP critical section for the OpenCL variant.
This commit is contained in:
parent
2bb176f813
commit
feb82c35df
1 changed files with 71 additions and 284 deletions
355
src/CCubes.c
355
src/CCubes.c
|
@ -11,11 +11,6 @@
|
|||
#include <math.h>
|
||||
#include "CCubes.h"
|
||||
|
||||
#ifdef _OPENMP
|
||||
#undef match
|
||||
#include <omp.h>
|
||||
#endif
|
||||
|
||||
#include "real.h"
|
||||
#include "cl_setup.h"
|
||||
|
||||
|
@ -216,9 +211,6 @@ SEXP CCubes(SEXP tt) {
|
|||
Rboolean ON_set_covered = false;
|
||||
if (PRINT_INFO) {
|
||||
Rprintf("ON-set minterms: %d\n", posrows);
|
||||
#ifdef _OPENMP
|
||||
Rprintf("OpenMP enabled, %d workers\n", omp_get_max_threads());
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
@ -275,309 +267,104 @@ SEXP CCubes(SEXP tt) {
|
|||
pichart_values
|
||||
);
|
||||
|
||||
for (int i = 0; i < current_batch; i++) {
|
||||
log_debug("ccubes", "Task %d", i);
|
||||
for (int current_task = 0; current_task < current_batch; current_task++) {
|
||||
log_debug("ccubes", "Task %d", current_task);
|
||||
|
||||
log_debug_raw("ccubes", "redundant[%d]: %d\n", i, ctx->h_redundant[i]);
|
||||
log_debug_raw("ccubes", "redundant[%d]: %d\n", current_task, ctx->h_redundant[current_task]);
|
||||
|
||||
log_debug_raw("ccubes", "coverage[%d]:", i);
|
||||
log_debug_raw("ccubes", "coverage[%d]:", current_task);
|
||||
for (int j = 0; j < posrows; j++) {
|
||||
log_debug_raw("ccubes", " %d",
|
||||
ctx->h_coverage[i * posrows + j]);
|
||||
ctx->h_coverage[current_task * posrows + j]);
|
||||
}
|
||||
log_debug_raw("ccubes", "\n");
|
||||
|
||||
log_debug_raw("ccubes", "fixed_bits[%d]:", i);
|
||||
log_debug_raw("ccubes", "fixed_bits[%d]:", current_task);
|
||||
for (int j = 0; j < implicant_words; j++) {
|
||||
log_debug_raw("ccubes", " %d",
|
||||
ctx->h_fixed_bits[i * implicant_words + j]);
|
||||
ctx->h_fixed_bits[current_task * implicant_words + j]);
|
||||
}
|
||||
log_debug_raw("ccubes", "\n");
|
||||
|
||||
log_debug_raw("ccubes", "value_bits[%d]:", i);
|
||||
log_debug_raw("ccubes", "value_bits[%d]:", current_task);
|
||||
for (int j = 0; j < implicant_words; j++) {
|
||||
log_debug_raw("ccubes", " %d",
|
||||
ctx->h_value_bits[i * implicant_words + j]);
|
||||
ctx->h_value_bits[current_task * implicant_words + j]);
|
||||
}
|
||||
log_debug_raw("ccubes", "\n");
|
||||
|
||||
log_debug_raw("ccubes", "pichart_values[%d]:", i);
|
||||
log_debug_raw("ccubes", "pichart_values[%d]:", current_task);
|
||||
for (int j = 0; j < pichart_words; j++) {
|
||||
log_debug_raw("ccubes", " %d",
|
||||
ctx->h_pichart_values[i * pichart_words + j]);
|
||||
ctx->h_pichart_values[current_task * pichart_words + j]);
|
||||
}
|
||||
log_debug_raw("ccubes", "\n");
|
||||
|
||||
if (!ctx->h_redundant[current_task]) {
|
||||
int covsum = 0;
|
||||
for (int i = 0; i < posrows; i++) {
|
||||
covsum += ctx->h_coverage[current_task * posrows + i];
|
||||
}
|
||||
// push the PI information to the global arrays
|
||||
|
||||
for (int i = foundPI; i > last_index[covsum - 1]; i--) {
|
||||
p_covered[i] = p_covered[i - 1];
|
||||
}
|
||||
|
||||
p_covered[last_index[covsum - 1]] = foundPI;
|
||||
|
||||
for (int l = 1; l < covsum; l++) {
|
||||
last_index[l - 1] += 1;
|
||||
}
|
||||
|
||||
for (int w = 0; w < implicant_words; w++) {
|
||||
p_implicants_pos[implicant_words * foundPI + w] = ctx->h_fixed_bits[current_task * implicant_words + w];
|
||||
p_implicants_val[implicant_words * foundPI + w] = ctx->h_value_bits[current_task * implicant_words + w];
|
||||
}
|
||||
|
||||
// populate the coverage matrix
|
||||
for (int r = 0; r < posrows; r++) {
|
||||
for (int w = 0; w < pichart_words; w++) {
|
||||
p_pichart_pos[foundPI * pichart_words + w] = ctx->h_pichart_values[current_task * pichart_words + w];
|
||||
}
|
||||
|
||||
p_pichart[posrows * foundPI + r] = ctx->h_coverage[current_task * posrows + r];
|
||||
}
|
||||
|
||||
++foundPI;
|
||||
|
||||
// when needed, increase allocated memory
|
||||
if (foundPI / estimPI > 0.9) {
|
||||
estimPI += 100000;
|
||||
p_pichart = R_Realloc(p_pichart, posrows * estimPI, int);
|
||||
p_pichart_pos = R_Realloc(p_pichart_pos, pichart_words * estimPI, unsigned int);
|
||||
p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int);
|
||||
p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int);
|
||||
p_covered = R_Realloc(p_covered, estimPI, int);
|
||||
|
||||
for (unsigned int i = foundPI; i < posrows * estimPI; i++) {
|
||||
p_pichart[i] = 0;
|
||||
}
|
||||
for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) {
|
||||
p_pichart_pos[i] = 0U;
|
||||
}
|
||||
for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) {
|
||||
p_implicants_val[i] = 0U;
|
||||
p_implicants_pos[i] = 0U;
|
||||
}
|
||||
|
||||
if (PRINT_INFO) {
|
||||
multiplier++;
|
||||
Rprintf("%dx ", multiplier);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* change to something less aggresive for reuse */
|
||||
ccubes_clean_up(ctx);
|
||||
}
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for schedule(dynamic)
|
||||
#endif
|
||||
|
||||
for (int task = 0; task < n_tasks; task++) {
|
||||
int tempk[k];
|
||||
int x = 0;
|
||||
int combination = task;
|
||||
|
||||
// fill the combination for the current task / combination number
|
||||
for (int i = 0; i < k; i++) {
|
||||
while (nchoosek(ninputs - (x + 1), k - (i + 1)) <= combination) {
|
||||
combination -= nchoosek(ninputs - (x + 1), k - (i + 1));
|
||||
x++;
|
||||
}
|
||||
tempk[i] = x;
|
||||
x++;
|
||||
}
|
||||
|
||||
// allocate vectors of decimal numbers for the ON-set and OFF-set rows
|
||||
int decpos[posrows];
|
||||
int decneg[negrows];
|
||||
|
||||
// create the vector of multiple bases, useful when calculating the decimal representation
|
||||
// of a particular combination of columns, for each row
|
||||
int mbase[k];
|
||||
mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input
|
||||
|
||||
// calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
|
||||
// 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
|
||||
for (int i = 1; i < k; i++) {
|
||||
mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
|
||||
}
|
||||
|
||||
// calculate decimal numbers, using mbase, fills in decpos and decneg
|
||||
for (int r = 0; r < posrows; r++) {
|
||||
decpos[r] = 0;
|
||||
for (int c = 0; c < k; c++) {
|
||||
decpos[r] += ON_set[tempk[c] * posrows + r] * mbase[c];
|
||||
}
|
||||
}
|
||||
|
||||
for (int r = 0; r < negrows; r++) {
|
||||
decneg[r] = 0;
|
||||
for (int c = 0; c < k; c++) {
|
||||
decneg[r] += OFF_set[tempk[c] * negrows + r] * mbase[c];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int possible_rows[posrows];
|
||||
|
||||
Rboolean possible_cover[posrows];
|
||||
possible_cover[0] = true; // boolean flag, to be set with false if found among the OFF set
|
||||
|
||||
int found = 0;
|
||||
|
||||
// identifies all unique decimal rows, for the selected combination of k inputs
|
||||
for (int r = 0; r < posrows; r++) {
|
||||
int prev = 0;
|
||||
Rboolean unique = true; // Rboolean flag, assume the row is unique
|
||||
while (prev < found && unique) {
|
||||
unique = decpos[possible_rows[prev]] != decpos[r];
|
||||
prev++;
|
||||
}
|
||||
|
||||
if (unique) {
|
||||
possible_rows[found] = r;
|
||||
possible_cover[found] = true;
|
||||
found++;
|
||||
}
|
||||
}
|
||||
|
||||
if (found > 0) {
|
||||
// some of the ON set numbers are possible PIs (not found in the OFF set)
|
||||
int frows[found];
|
||||
|
||||
// verify if this is a possible PI
|
||||
// (if the same decimal number is not found in the OFF set)
|
||||
for (int i = found - 1; i >= 0; i--) {
|
||||
int j = 0;
|
||||
while (j < negrows && possible_cover[i]) {
|
||||
if (decpos[possible_rows[i]] == decneg[j]) {
|
||||
possible_cover[i] = false;
|
||||
found--;
|
||||
}
|
||||
j++;
|
||||
}
|
||||
|
||||
if (possible_cover[i]) {
|
||||
frows[found - i - 1] = possible_rows[i];
|
||||
}
|
||||
}
|
||||
// Rprintf("task: %d; rows: %d\n", task, found);
|
||||
|
||||
for (int f = 0; f < found; f++) {
|
||||
|
||||
// create a temporary vector of length k, containing the values from the initial ON set
|
||||
// plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
|
||||
int tempc[k];
|
||||
|
||||
// using bit shifting, store the fixed bits and value bits
|
||||
unsigned int fixed_bits[implicant_words];
|
||||
unsigned int value_bits[implicant_words];
|
||||
|
||||
for (int i = 0; i < implicant_words; i++) {
|
||||
fixed_bits[i] = 0U;
|
||||
value_bits[i] = 0U;
|
||||
}
|
||||
|
||||
for (int c = 0; c < k; c++) {
|
||||
int value = ON_set[tempk[c] * posrows + frows[f]];
|
||||
tempc[c] = value + 1;
|
||||
|
||||
int word_index = tempk[c] / (BITS_PER_WORD / value_bit_width);
|
||||
int bit_index = (tempk[c] % (BITS_PER_WORD / value_bit_width)) * value_bit_width;
|
||||
|
||||
fixed_bits[word_index] |= (value_bit_mask << bit_index);
|
||||
value_bits[word_index] |= ((unsigned int)value << bit_index);
|
||||
}
|
||||
|
||||
// check if the current PI is not redundant
|
||||
Rboolean redundantOMP = false;
|
||||
|
||||
int i = 0;
|
||||
while (i < prevfoundPI && !redundantOMP) {
|
||||
// /*
|
||||
// - ck contains the complexity level for each of the previously found non-redundant PIs
|
||||
// - indx is a matrix containing the indexes of the columns where the values were stored
|
||||
// - a redundant PI is one for which all values from a previous PI are exactly the same:
|
||||
// 0 0 1 2 0, let's say previously found PI
|
||||
// which means a corresponding ck = 2 and a corresponding indx = [3, 4]
|
||||
// 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
|
||||
// therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
|
||||
// */
|
||||
|
||||
Rboolean is_subset = true; // Assume it's a subset unless proven otherwise
|
||||
|
||||
for (int w = 0; w < implicant_words; w++) {
|
||||
// If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset
|
||||
unsigned int index_mask = p_implicants_pos[i * implicant_words + w];
|
||||
|
||||
if ((fixed_bits[w] & index_mask) != index_mask) {
|
||||
is_subset = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// then compare the value bits, if one or more values on those positions are different, it’s not a subset
|
||||
if ((value_bits[w] & index_mask) != (p_implicants_val[i * implicant_words + w] & index_mask)) {
|
||||
is_subset = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
redundantOMP = is_subset;
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (redundantOMP) continue;
|
||||
|
||||
Rboolean coverage[posrows];
|
||||
int covsum = 0;
|
||||
unsigned int pichart_values[pichart_words];
|
||||
for (int w = 0; w < pichart_words; w++) {
|
||||
pichart_values[w] = 0U;
|
||||
}
|
||||
|
||||
for (int r = 0; r < posrows; r++) {
|
||||
coverage[r] = decpos[r] == decpos[frows[f]];
|
||||
if (coverage[r]) {
|
||||
int word_index = r / BITS_PER_WORD;
|
||||
int bit_index = r % BITS_PER_WORD;
|
||||
pichart_values[word_index] |= (1U << bit_index);
|
||||
}
|
||||
covsum += coverage[r];
|
||||
}
|
||||
|
||||
// verify row dominance
|
||||
int rd = 0;
|
||||
while (rd < last_index[covsum - 1] && !redundantOMP) {
|
||||
|
||||
bool dominated = true;
|
||||
for (int w = 0; w < pichart_words; w++) {
|
||||
if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * pichart_words + w]) != pichart_values[w]) {
|
||||
dominated = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
redundantOMP = dominated;
|
||||
rd++;
|
||||
}
|
||||
|
||||
if (redundantOMP) continue;
|
||||
|
||||
|
||||
// Rprintf("It is a prime implicant\n");
|
||||
// This operation first gets a new index to push in the global array in a concurrent way
|
||||
// Then adds the result there.
|
||||
// We could synchronize only the index and let the copy operation happen in parallel BUT this
|
||||
// creates a false sharing problem and the performance is down by several factors.
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp critical
|
||||
#endif
|
||||
{
|
||||
// push the PI information to the global arrays
|
||||
|
||||
for (int i = foundPI; i > last_index[covsum - 1]; i--) {
|
||||
p_covered[i] = p_covered[i - 1];
|
||||
}
|
||||
|
||||
p_covered[last_index[covsum - 1]] = foundPI;
|
||||
|
||||
for (int l = 1; l < covsum; l++) {
|
||||
last_index[l - 1] += 1;
|
||||
}
|
||||
|
||||
for (int w = 0; w < implicant_words; w++) {
|
||||
p_implicants_pos[implicant_words * foundPI + w] = fixed_bits[w];
|
||||
p_implicants_val[implicant_words * foundPI + w] = value_bits[w];
|
||||
}
|
||||
|
||||
// populate the coverage matrix
|
||||
for (int r = 0; r < posrows; r++) {
|
||||
for (int w = 0; w < pichart_words; w++) {
|
||||
p_pichart_pos[foundPI * pichart_words + w] = pichart_values[w];
|
||||
}
|
||||
|
||||
p_pichart[posrows * foundPI + r] = coverage[r];
|
||||
}
|
||||
|
||||
++foundPI;
|
||||
|
||||
// when needed, increase allocated memory
|
||||
if (foundPI / estimPI > 0.9) {
|
||||
estimPI += 100000;
|
||||
p_pichart = R_Realloc(p_pichart, posrows * estimPI, int);
|
||||
p_pichart_pos = R_Realloc(p_pichart_pos, pichart_words * estimPI, unsigned int);
|
||||
p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int);
|
||||
p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int);
|
||||
p_covered = R_Realloc(p_covered, estimPI, int);
|
||||
|
||||
for (unsigned int i = foundPI; i < posrows * estimPI; i++) {
|
||||
p_pichart[i] = 0;
|
||||
}
|
||||
for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) {
|
||||
p_pichart_pos[i] = 0U;
|
||||
}
|
||||
for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) {
|
||||
p_implicants_val[i] = 0U;
|
||||
p_implicants_pos[i] = 0U;
|
||||
}
|
||||
|
||||
if (PRINT_INFO) {
|
||||
multiplier++;
|
||||
Rprintf("%dx ", multiplier);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nofpi[k - 1] = foundPI;
|
||||
|
||||
if (foundPI > 0 && !ON_set_covered) {
|
||||
|
|
Loading…
Add table
Reference in a new issue