Adapt OMP critical section for the OpenCL variant.

2025-03-29 15:16:39 +02:00 · 2025-03-29 15:16:39 +02:00 · feb82c35df
commit feb82c35df
parent 2bb176f813
1 changed files with 71 additions and 284 deletions
--- a/src/CCubes.c
+++ b/src/CCubes.c
@ -11,11 +11,6 @@
 #include <math.h>
 #include "CCubes.h"

-#ifdef _OPENMP
-    #undef match
-    #include <omp.h>
-#endif
-
 #include "real.h"
 #include "cl_setup.h"

@ -216,9 +211,6 @@ SEXP CCubes(SEXP tt) {
    Rboolean ON_set_covered = false;
    if (PRINT_INFO) {
        Rprintf("ON-set minterms: %d\n", posrows);
-        #ifdef _OPENMP
-            Rprintf("OpenMP enabled, %d workers\n", omp_get_max_threads());
-        #endif
    }


@ -275,309 +267,104 @@ SEXP CCubes(SEXP tt) {
 		    pichart_values
 			);

-		for (int i = 0; i < current_batch; i++) {
-			log_debug("ccubes", "Task %d", i);
+		for (int current_task = 0; current_task < current_batch; current_task++) {
+			log_debug("ccubes", "Task %d", current_task);

-			log_debug_raw("ccubes", "redundant[%d]: %d\n", i, ctx->h_redundant[i]);
+			log_debug_raw("ccubes", "redundant[%d]: %d\n", current_task, ctx->h_redundant[current_task]);

-			log_debug_raw("ccubes", "coverage[%d]:", i);
+			log_debug_raw("ccubes", "coverage[%d]:", current_task);
 			for (int j = 0; j < posrows; j++) {
 				log_debug_raw("ccubes", " %d",
-				    ctx->h_coverage[i * posrows + j]);
+				    ctx->h_coverage[current_task * posrows + j]);
 			}
 			log_debug_raw("ccubes", "\n");

-			log_debug_raw("ccubes", "fixed_bits[%d]:", i);
+			log_debug_raw("ccubes", "fixed_bits[%d]:", current_task);
 			for (int j = 0; j < implicant_words; j++) {
 				log_debug_raw("ccubes", " %d",
-				    ctx->h_fixed_bits[i * implicant_words + j]);
+				    ctx->h_fixed_bits[current_task * implicant_words + j]);
 			}
 			log_debug_raw("ccubes", "\n");

-			log_debug_raw("ccubes", "value_bits[%d]:", i);
+			log_debug_raw("ccubes", "value_bits[%d]:", current_task);
 			for (int j = 0; j < implicant_words; j++) {
 				log_debug_raw("ccubes", " %d",
-				    ctx->h_value_bits[i * implicant_words + j]);
+				    ctx->h_value_bits[current_task * implicant_words + j]);
 			}
 			log_debug_raw("ccubes", "\n");

-			log_debug_raw("ccubes", "pichart_values[%d]:", i);
+			log_debug_raw("ccubes", "pichart_values[%d]:", current_task);
 			for (int j = 0; j < pichart_words; j++) {
 				log_debug_raw("ccubes", " %d",
-				    ctx->h_pichart_values[i * pichart_words + j]);
+				    ctx->h_pichart_values[current_task * pichart_words + j]);
 			}
 			log_debug_raw("ccubes", "\n");
+
+			if (!ctx->h_redundant[current_task]) {
+				int covsum = 0;
+				for (int i = 0; i < posrows; i++) {
+					covsum += ctx->h_coverage[current_task * posrows + i];
+				}
+				// push the PI information to the global arrays
+
+				for (int i = foundPI; i > last_index[covsum - 1]; i--) {
+				    p_covered[i] = p_covered[i - 1];
+				}
+
+				p_covered[last_index[covsum - 1]] = foundPI;
+
+				for (int l = 1; l < covsum; l++) {
+				    last_index[l - 1] += 1;
+				}
+
+				for (int w = 0; w < implicant_words; w++) {
+				    p_implicants_pos[implicant_words * foundPI + w] = ctx->h_fixed_bits[current_task * implicant_words + w];
+				    p_implicants_val[implicant_words * foundPI + w] = ctx->h_value_bits[current_task * implicant_words + w];
+				}
+
+				// populate the coverage matrix
+				for (int r = 0; r < posrows; r++) {
+				    for (int w = 0; w < pichart_words; w++) {
+					p_pichart_pos[foundPI * pichart_words + w] = ctx->h_pichart_values[current_task * pichart_words + w];
+				    }
+
+				    p_pichart[posrows * foundPI + r] = ctx->h_coverage[current_task * posrows + r];
+				}
+
+				++foundPI;
+
+				// when needed, increase allocated memory
+				if (foundPI / estimPI > 0.9) {
+				    estimPI += 100000;
+				    p_pichart =        R_Realloc(p_pichart,        posrows * estimPI,         int);
+				    p_pichart_pos =    R_Realloc(p_pichart_pos,    pichart_words * estimPI,   unsigned int);
+				    p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int);
+				    p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int);
+				    p_covered =        R_Realloc(p_covered,        estimPI,                   int);
+
+				    for (unsigned int i = foundPI; i < posrows * estimPI; i++) {
+					p_pichart[i] = 0;
+				    }
+				    for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) {
+					p_pichart_pos[i] = 0U;
+				    }
+				    for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) {
+					p_implicants_val[i] = 0U;
+					p_implicants_pos[i] = 0U;
+				    }
+
+				    if (PRINT_INFO) {
+					multiplier++;
+					Rprintf("%dx ", multiplier);
+				    }
+				}
+			}
 		}

 		/* change to something less aggresive for reuse */
 		ccubes_clean_up(ctx);
 	}

-        #ifdef _OPENMP
-            #pragma omp parallel for schedule(dynamic)
-        #endif
-
-        for (int task = 0; task < n_tasks; task++) {
-            int tempk[k];
-            int x = 0;
-            int combination = task;
-
-            // fill the combination for the current task / combination number
-            for (int i = 0; i < k; i++) {
-                while (nchoosek(ninputs - (x + 1), k - (i + 1)) <= combination) {
-                    combination -= nchoosek(ninputs - (x + 1), k - (i + 1));
-                    x++;
-                }
-                tempk[i] = x;
-                x++;
-            }
-
-            // allocate vectors of decimal numbers for the ON-set and OFF-set rows
-            int decpos[posrows];
-            int decneg[negrows];
-
-            // create the vector of multiple bases, useful when calculating the decimal representation
-            // of a particular combination of columns, for each row
-            int mbase[k];
-            mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input
-
-            // calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
-            // 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
-            for (int i = 1; i < k; i++) {
-                mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
-            }
-
-            // calculate decimal numbers, using mbase, fills in decpos and decneg
-            for (int r = 0; r < posrows; r++) {
-                decpos[r] = 0;
-                for (int c = 0; c < k; c++) {
-                    decpos[r] += ON_set[tempk[c] * posrows + r] * mbase[c];
-                }
-            }
-
-            for (int r = 0; r < negrows; r++) {
-                decneg[r] = 0;
-                for (int c = 0; c < k; c++) {
-                    decneg[r] += OFF_set[tempk[c] * negrows + r] * mbase[c];
-                }
-            }
-
-
-            int possible_rows[posrows];
-
-            Rboolean possible_cover[posrows];
-            possible_cover[0] = true; // boolean flag, to be set with false if found among the OFF set
-
-            int found = 0;
-
-            // identifies all unique decimal rows, for the selected combination of k inputs
-            for (int r = 0; r < posrows; r++) {
-                int prev = 0;
-                Rboolean unique = true; // Rboolean flag, assume the row is unique
-                while (prev < found && unique) {
-                    unique = decpos[possible_rows[prev]] != decpos[r];
-                    prev++;
-                }
-
-                if (unique) {
-                    possible_rows[found] = r;
-                    possible_cover[found] = true;
-                    found++;
-                }
-            }
-
-            if (found > 0) {
-                // some of the ON set numbers are possible PIs (not found in the OFF set)
-                int frows[found];
-
-                // verify if this is a possible PI
-                // (if the same decimal number is not found in the OFF set)
-                for (int i = found - 1; i >= 0; i--) {
-                    int j = 0;
-                    while (j < negrows && possible_cover[i]) {
-                        if (decpos[possible_rows[i]] == decneg[j]) {
-                            possible_cover[i] = false;
-                            found--;
-                        }
-                        j++;
-                    }
-
-                    if (possible_cover[i]) {
-                        frows[found - i - 1] = possible_rows[i];
-                    }
-                }
-                // Rprintf("task: %d; rows: %d\n", task, found);
-
-                for (int f = 0; f < found; f++) {
-
-                    // create a temporary vector of length k, containing the values from the initial ON set
-                    // plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
-                    int tempc[k];
-
-                    // using bit shifting, store the fixed bits and value bits
-                    unsigned int fixed_bits[implicant_words];
-                    unsigned int value_bits[implicant_words];
-
-                    for (int i = 0; i < implicant_words; i++) {
-                        fixed_bits[i] = 0U;
-                        value_bits[i] = 0U;
-                    }
-
-                    for (int c = 0; c < k; c++) {
-                        int value = ON_set[tempk[c] * posrows + frows[f]];
-                        tempc[c] = value + 1;
-
-                        int word_index = tempk[c] / (BITS_PER_WORD / value_bit_width);
-                        int bit_index = (tempk[c] % (BITS_PER_WORD / value_bit_width)) * value_bit_width;
-
-                        fixed_bits[word_index] |= (value_bit_mask << bit_index);
-                        value_bits[word_index] |= ((unsigned int)value << bit_index);
-                    }
-
-                    // check if the current PI is not redundant
-                    Rboolean redundantOMP = false;
-
-                    int i = 0;
-                    while (i < prevfoundPI && !redundantOMP) {
-                        // /*
-                        // - ck contains the complexity level for each of the previously found non-redundant PIs
-                        // - indx is a matrix containing the indexes of the columns where the values were stored
-                        // - a redundant PI is one for which all values from a previous PI are exactly the same:
-                        // 0 0 1 2 0, let's say previously found PI
-                        // which means a corresponding ck = 2 and a corresponding indx = [3, 4]
-                        // 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
-                        // therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
-                        // */
-
-                        Rboolean is_subset = true; // Assume it's a subset unless proven otherwise
-
-                        for (int w = 0; w < implicant_words; w++) {
-                            // If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset
-                            unsigned int index_mask = p_implicants_pos[i * implicant_words + w];
-
-                            if ((fixed_bits[w] & index_mask) != index_mask) {
-                                is_subset = false;
-                                break;
-                            }
-
-                            // then compare the value bits, if one or more values on those positions are different, it’s not a subset
-                            if ((value_bits[w] & index_mask) != (p_implicants_val[i * implicant_words + w] & index_mask)) {
-                                is_subset = false;
-                                break;
-                            }
-                        }
-
-                        redundantOMP = is_subset;
-
-                        i++;
-                    }
-
-                    if (redundantOMP) continue;
-
-                    Rboolean coverage[posrows];
-                    int covsum = 0;
-                    unsigned int pichart_values[pichart_words];
-                    for (int w = 0; w < pichart_words; w++) {
-                        pichart_values[w] = 0U;
-                    }
-
-                    for (int r = 0; r < posrows; r++) {
-                        coverage[r] = decpos[r] == decpos[frows[f]];
-                        if (coverage[r]) {
-                            int word_index = r / BITS_PER_WORD;
-                            int bit_index = r % BITS_PER_WORD;
-                            pichart_values[word_index] |= (1U << bit_index);
-                        }
-                        covsum += coverage[r];
-                    }
-
-                    // verify row dominance
-                    int rd = 0;
-                    while (rd < last_index[covsum - 1] && !redundantOMP) {
-
-                        bool dominated = true;
-                        for (int w = 0; w < pichart_words; w++) {
-                            if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * pichart_words + w]) != pichart_values[w]) {
-                                dominated = false;
-                                break;
-                            }
-                        }
-
-                        redundantOMP = dominated;
-                        rd++;
-                    }
-
-                    if (redundantOMP) continue;
-
-
-                    // Rprintf("It is a prime implicant\n");
-                    // This operation first gets a new index to push in the global array in a concurrent way
-                    // Then adds the result there.
-                    // We could synchronize only the index and let the copy operation happen in parallel BUT this
-                    // creates a false sharing problem and the performance is down by several factors.
-
-                    #ifdef _OPENMP
-                        #pragma omp critical
-                    #endif
-                    {
-                        // push the PI information to the global arrays
-
-                        for (int i = foundPI; i > last_index[covsum - 1]; i--) {
-                            p_covered[i] = p_covered[i - 1];
-                        }
-
-                        p_covered[last_index[covsum - 1]] = foundPI;
-
-                        for (int l = 1; l < covsum; l++) {
-                            last_index[l - 1] += 1;
-                        }
-
-                        for (int w = 0; w < implicant_words; w++) {
-                            p_implicants_pos[implicant_words * foundPI + w] = fixed_bits[w];
-                            p_implicants_val[implicant_words * foundPI + w] = value_bits[w];
-                        }
-
-                        // populate the coverage matrix
-                        for (int r = 0; r < posrows; r++) {
-                            for (int w = 0; w < pichart_words; w++) {
-                                p_pichart_pos[foundPI * pichart_words + w] = pichart_values[w];
-                            }
-
-                            p_pichart[posrows * foundPI + r] = coverage[r];
-                        }
-
-                        ++foundPI;
-
-                        // when needed, increase allocated memory
-                        if (foundPI / estimPI > 0.9) {
-                            estimPI += 100000;
-                            p_pichart =        R_Realloc(p_pichart,        posrows * estimPI,         int);
-                            p_pichart_pos =    R_Realloc(p_pichart_pos,    pichart_words * estimPI,   unsigned int);
-                            p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int);
-                            p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int);
-                            p_covered =        R_Realloc(p_covered,        estimPI,                   int);
-
-                            for (unsigned int i = foundPI; i < posrows * estimPI; i++) {
-                                p_pichart[i] = 0;
-                            }
-                            for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) {
-                                p_pichart_pos[i] = 0U;
-                            }
-                            for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) {
-                                p_implicants_val[i] = 0U;
-                                p_implicants_pos[i] = 0U;
-                            }
-
-                            if (PRINT_INFO) {
-                                multiplier++;
-                                Rprintf("%dx ", multiplier);
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
        nofpi[k - 1] = foundPI;

        if (foundPI > 0 && !ON_set_covered) {