diff --git a/inst/ccubes.cl b/inst/ccubes.cl index fcd0b15..1a70e35 100755 --- a/inst/ccubes.cl +++ b/inst/ccubes.cl @@ -73,6 +73,7 @@ nchoosek(int n, int k) * * INPUT: * k - current input + * prevfoundPI - number of previously found PIs (at the previous levels of complexity) * nofvalues (ninputs x 1) - read, copy-host - number of values * ON_set (posrows x ninputs) - read, copy-host - ON set * OFF_set (ninputs x negrows) - read, copy-host - OFF set @@ -92,11 +93,11 @@ nchoosek(int n, int k) * * OUTPUT: * covsum - sum of coverage (reproduce on host instead?) - * redundant (1) - read, write - * coverage (posrows x 1) - read, write - * fixed_bits (implicant_words x 1) - read, write - * value_bits (implicant_words x 1) - read, write - * pichart_values (pichart_words x 1) - read, write + * redundant (posrows x 1) - read, write + * coverage (posrows x posrows x 1) - read, write + * fixed_bits (posrows x implicant_words x 1) - read, write + * value_bits (posrows x implicant_words x 1) - read, write + * pichart_values (posrows x pichart_words x 1) - read, write * * NOTE: Both input and output must be allocated before calling this funciton. */ @@ -110,6 +111,7 @@ nchoosek(int n, int k) #endif __kernel void ccubes_task(int k, + int prevfoundPI, __global const int *nofvalues, /* IN: RC */ __global const int *ON_set, /* IN: RC */ __global const int *OFF_set, /* IN: RC */ @@ -120,6 +122,7 @@ ccubes_task(int k, __global const int *p_pichart_pos, /* IN: RC */ __global bool *g_redundant, /* OUT: RW */ __global bool *g_coverage, /* OUT: RW */ + __global int *g_taskpis, /* OUT: RW */ __global unsigned int *g_fixed_bits, /* OUT: RW */ __global unsigned int *g_value_bits, /* OUT: RW */ __global unsigned int *g_pichart_values /* OUT: RW */ @@ -140,28 +143,32 @@ ccubes_task(int k, size_t task = get_global_id(0); size_t gid = get_global_linear_id(); - __global bool *redundant = &g_redundant[gid]; - __global bool *coverage = &g_coverage[gid * POSROWS]; - __global unsigned int *fixed_bits = &g_fixed_bits[gid * IMPLICANT_WORDS]; - __global unsigned int *value_bits = &g_value_bits[gid * IMPLICANT_WORDS]; - __global unsigned int *pichart_values = &g_pichart_values[gid * PICHART_WORDS]; + __global bool *redundant = &g_redundant[gid * POSROWS]; + __global bool *coverage = &g_coverage[gid * POSROWS * POSROWS]; - *redundant = true; + // taskpis is "found" in the kernel, as in: "how many PIs does this task finds" + __global int *found = &g_taskpis[gid]; + + __global unsigned int *fixed_bits = &g_fixed_bits[gid * POSROWS * IMPLICANT_WORDS]; + __global unsigned int *value_bits = &g_value_bits[gid * POSROWS * IMPLICANT_WORDS]; + __global unsigned int *pichart_values = &g_pichart_values[gid * POSROWS * PICHART_WORDS]; - int prevfoundPI = 0; int tempk[NINPUTS]; /* max is tempk[ninputs] */ int x = 0; int combination = task; /* INIT */ + *found = 0; for (int i = 0; i < POSROWS; i++) + redundant[i] = true; + for (int i = 0; i < POSROWS * POSROWS; i++) coverage[i] = 0; - for (int i = 0; i < IMPLICANT_WORDS; i++) { + for (int i = 0; i < POSROWS * IMPLICANT_WORDS; i++) { fixed_bits[i] = 0U; value_bits[i] = 0U; } - for (int i = 0; i < PICHART_WORDS; i++) + for (int i = 0; i < POSROWS * PICHART_WORDS; i++) pichart_values[i] = 0U; @@ -211,62 +218,52 @@ ccubes_task(int k, bool possible_cover[POSROWS]; possible_cover[0] = true; // bool flag, to be set with false if found among the OFF set - int found = 0; + // int found = 0; // identifies all unique decimal rows, for the selected combination of k inputs for (int r = 0; r < POSROWS; r++) { int prev = 0; bool unique = true; // bool flag, assume the row is unique - while (prev < found && unique) { + while (prev < *found && unique) { unique = decpos[possible_rows[prev]] != decpos[r]; prev++; } if (unique) { - possible_rows[found] = r; - possible_cover[found] = true; - found++; + possible_rows[*found] = r; + possible_cover[*found] = true; + (*found)++; } } - if (found > 0) { + if (*found > 0) { // some of the ON set numbers are possible PIs (not found in the OFF set) int frows[POSROWS]; // verify if this is a possible PI // (if the same decimal number is not found in the OFF set) - for (int i = found - 1; i >= 0; i--) { + for (int i = *found - 1; i >= 0; i--) { int j = 0; while (j < NEGROWS && possible_cover[i]) { if (decpos[possible_rows[i]] == decneg[j]) { possible_cover[i] = false; - found--; + (*found)--; } j++; } if (possible_cover[i]) { - frows[found - i - 1] = possible_rows[i]; + frows[*found - i - 1] = possible_rows[i]; } } - // Rprintf("task: %d; rows: %d\n", task, found); - - for (int f = 0; f < found; f++) { + // Rprintf("task: %d; rows: %d\n", task, *found); + for (int f = 0; f < *found; f++) { // create a temporary vector of length k, containing the values from the initial ON set - // plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc. + // plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc). int tempc[NINPUTS]; - // using bit shifting, store the fixed bits and value bits - // unsigned int fixed_bits[IMPLICANT_WORDS]; - // unsigned int value_bits[IMPLICANT_WORDS]; - - for (int i = 0; i < IMPLICANT_WORDS; i++) { - fixed_bits[i] = 0U; - value_bits[i] = 0U; - } - for (int c = 0; c < k; c++) { int value = ON_set[tempk[c] * POSROWS + frows[f]]; tempc[c] = value + 1; @@ -274,16 +271,16 @@ ccubes_task(int k, int word_index = tempk[c] / (BITS_PER_WORD / VALUE_BIT_WIDTH); int bit_index = (tempk[c] % (BITS_PER_WORD / VALUE_BIT_WIDTH)) * VALUE_BIT_WIDTH; - fixed_bits[word_index] |= (VALUE_BIT_MASK << bit_index); - value_bits[word_index] |= ((unsigned int)value << bit_index); + fixed_bits[f * POSROWS + word_index] |= (VALUE_BIT_MASK << bit_index); + value_bits[f * POSROWS + word_index] |= ((unsigned int)value << bit_index); } // check if the current PI is not redundant // bool redundant = false; - *redundant = false; + redundant[f] = false; int i = 0; - while (i < prevfoundPI && !*redundant) { + while (i < prevfoundPI && !redundant[f]) { // /* // - ck contains the complexity level for each of the previously found non-redundant PIs // - indx is a matrix containing the indexes of the columns where the values were stored @@ -300,59 +297,55 @@ ccubes_task(int k, // If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset unsigned int index_mask = p_implicants_pos[i * IMPLICANT_WORDS + w]; - if ((fixed_bits[w] & index_mask) != index_mask) { + if ((fixed_bits[f * POSROWS + w] & index_mask) != index_mask) { is_subset = false; break; } // then compare the value bits, if one or more values on those positions are different, it’s not a subset - if ((value_bits[w] & index_mask) != (p_implicants_val[i * IMPLICANT_WORDS + w] & index_mask)) { + if ((value_bits[f * POSROWS + w] & index_mask) != (p_implicants_val[i * IMPLICANT_WORDS + w] & index_mask)) { is_subset = false; break; } } - *redundant = is_subset; + redundant[f] = is_subset; i++; } - if (*redundant) continue; + if (redundant[f]) continue; + - // bool coverage[POSROWS]; int covsum = 0; - // unsigned int pichart_values[PICHART_WORDS]; - for (int w = 0; w < PICHART_WORDS; w++) { - pichart_values[w] = 0U; - } for (int r = 0; r < POSROWS; r++) { - coverage[r] = decpos[r] == decpos[frows[f]]; - if (coverage[r]) { + coverage[f * POSROWS + r] = decpos[r] == decpos[frows[f]]; + if (coverage[f * POSROWS + r]) { int word_index = r / BITS_PER_WORD; int bit_index = r % BITS_PER_WORD; - pichart_values[word_index] |= (1U << bit_index); + pichart_values[f * POSROWS + word_index] |= (1U << bit_index); + covsum++; } - covsum += coverage[r]; } // verify row dominance int rd = 0; - while (rd < last_index[covsum - 1] && !*redundant) { + while (rd < last_index[covsum - 1] && !redundant[f]) { bool dominated = true; for (int w = 0; w < PICHART_WORDS; w++) { - if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * PICHART_WORDS + w]) != pichart_values[w]) { + if ((pichart_values[f * POSROWS + w] & p_pichart_pos[p_covered[rd] * PICHART_WORDS + w]) != pichart_values[w]) { dominated = false; break; } } - *redundant = dominated; + redundant[f] = dominated; rd++; } - if (*redundant) continue; + if (redundant[f]) continue; } } } diff --git a/src/CCubes.c b/src/CCubes.c index a55cded..248fe1a 100755 --- a/src/CCubes.c +++ b/src/CCubes.c @@ -240,6 +240,7 @@ SEXP CCubes(SEXP tt) { bool *redundant; bool *coverage; + int *taskpis; unsigned int *fixed_bits; unsigned int *value_bits; unsigned int *pichart_values; @@ -247,6 +248,7 @@ SEXP CCubes(SEXP tt) { current_batch, task, k, + prevfoundPI, ninputs, posrows, negrows, @@ -265,111 +267,122 @@ SEXP CCubes(SEXP tt) { p_pichart_pos, redundant, coverage, + taskpis, fixed_bits, value_bits, pichart_values ); for (int current_task = 0; current_task < current_batch; current_task++) { - log_debug("ccubes", "Task %d", current_task); + int foundpis = ctx->h_taskpis[current_task]; + log_debug("ccubes", "Task %d, found: %d", current_task, foundpis); - if (!ctx->h_redundant[current_task]) { - /* LOG TASK */ - log_debug_raw("ccubes", "redundant[%d]: %d\n", current_task, ctx->h_redundant[current_task]); + if (foundpis > 5) { + log_debug("ccubes", "Something is wrong\n"); + foundpis = 0; + } - log_debug_raw("ccubes", "coverage[%d]:", current_task); - for (int j = 0; j < posrows; j++) { - log_debug_raw("ccubes", " %d", - ctx->h_coverage[current_task * posrows + j]); - } - log_debug_raw("ccubes", "\n"); + for (int f = 0; f < foundpis; f++) { + // if (!ctx->h_redundant[current_task + f]) { + /* LOG TASK */ - log_debug_raw("ccubes", "fixed_bits[%d]:", current_task); - for (int j = 0; j < implicant_words; j++) { - log_debug_raw("ccubes", " %d", - ctx->h_fixed_bits[current_task * implicant_words + j]); - } - log_debug_raw("ccubes", "\n"); + log_debug_raw("ccubes", "coverage[%d][%d]:", current_task, f); + for (int j = 0; j < posrows; j++) { + log_debug_raw("ccubes", " %d", + ctx->h_coverage[current_task * posrows * posrows + f * posrows + j]); + } + log_debug_raw("ccubes", "\n"); - log_debug_raw("ccubes", "value_bits[%d]:", current_task); - for (int j = 0; j < implicant_words; j++) { - log_debug_raw("ccubes", " %d", - ctx->h_value_bits[current_task * implicant_words + j]); - } - log_debug_raw("ccubes", "\n"); + log_debug_raw("ccubes", "fixed_bits[%d][%d]:", current_task, f); + for (int j = 0; j < implicant_words; j++) { + log_debug_raw("ccubes", " %d", + ctx->h_fixed_bits[current_task * posrows * implicant_words + f * posrows + j]); + } + log_debug_raw("ccubes", "\n"); - log_debug_raw("ccubes", "pichart_values[%d]:", current_task); - for (int j = 0; j < pichart_words; j++) { - log_debug_raw("ccubes", " %d", - ctx->h_pichart_values[current_task * pichart_words + j]); - } - log_debug_raw("ccubes", "\n"); + log_debug_raw("ccubes", "value_bits[%d][%d]:", current_task, f); + for (int j = 0; j < implicant_words; j++) { + log_debug_raw("ccubes", " %d", + ctx->h_value_bits[current_task * posrows * implicant_words + f * posrows + j]); + } + log_debug_raw("ccubes", "\n"); + + log_debug_raw("ccubes", "pichart_values[%d][%d]:", current_task, f); + for (int j = 0; j < pichart_words; j++) { + log_debug_raw("ccubes", " %d", + ctx->h_pichart_values[current_task * posrows * pichart_words + f * posrows + j]); + } + log_debug_raw("ccubes", "\n"); - int covsum = 0; - for (int i = 0; i < posrows; i++) { - covsum += ctx->h_coverage[current_task * posrows + i]; - } - // push the PI information to the global arrays + int covsum = 0; + for (int i = 0; i < posrows; i++) { + covsum += ctx->h_coverage[current_task * posrows * posrows + f * posrows + i]; + } + // push the PI information to the global arrays - for (int i = foundPI; i > last_index[covsum - 1]; i--) { - p_covered[i] = p_covered[i - 1]; - } + for (int i = foundPI; i > last_index[covsum - 1]; i--) { + p_covered[i] = p_covered[i - 1]; + } - p_covered[last_index[covsum - 1]] = foundPI; + p_covered[last_index[covsum - 1]] = foundPI; - for (int l = 1; l < covsum; l++) { - last_index[l - 1] += 1; - } + for (int l = 1; l < covsum; l++) { + last_index[l - 1] += 1; + } - for (int w = 0; w < implicant_words; w++) { - p_implicants_pos[implicant_words * foundPI + w] = ctx->h_fixed_bits[current_task * implicant_words + w]; - p_implicants_val[implicant_words * foundPI + w] = ctx->h_value_bits[current_task * implicant_words + w]; - } + for (int w = 0; w < implicant_words; w++) { + p_implicants_pos[implicant_words * foundPI + w] = ctx->h_fixed_bits[current_task * posrows * implicant_words + f * implicant_words + w]; + p_implicants_val[implicant_words * foundPI + w] = ctx->h_value_bits[current_task * posrows * implicant_words + f * implicant_words + w]; + } - // populate the coverage matrix - for (int r = 0; r < posrows; r++) { - for (int w = 0; w < pichart_words; w++) { - p_pichart_pos[foundPI * pichart_words + w] = ctx->h_pichart_values[current_task * pichart_words + w]; - } + // populate the coverage matrix + for (int r = 0; r < posrows; r++) { + for (int w = 0; w < pichart_words; w++) { + p_pichart_pos[foundPI * pichart_words + w] = ctx->h_pichart_values[current_task * posrows * pichart_words + f * pichart_words + w]; + } - p_pichart[posrows * foundPI + r] = ctx->h_coverage[current_task * posrows + r]; - } + p_pichart[posrows * foundPI + r] = ctx->h_coverage[current_task * posrows * posrows + f * posrows + r]; + } - ++foundPI; + ++foundPI; - // when needed, increase allocated memory - if (foundPI / estimPI > 0.9) { - estimPI += 100000; - p_pichart = R_Realloc(p_pichart, posrows * estimPI, int); - p_pichart_pos = R_Realloc(p_pichart_pos, pichart_words * estimPI, unsigned int); - p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int); - p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int); - p_covered = R_Realloc(p_covered, estimPI, int); + // when needed, increase allocated memory + if (foundPI / estimPI > 0.9) { + estimPI += 100000; + p_pichart = R_Realloc(p_pichart, posrows * estimPI, int); + p_pichart_pos = R_Realloc(p_pichart_pos, pichart_words * estimPI, unsigned int); + p_implicants_val = R_Realloc(p_implicants_val, implicant_words * estimPI, unsigned int); + p_implicants_pos = R_Realloc(p_implicants_pos, implicant_words * estimPI, unsigned int); + p_covered = R_Realloc(p_covered, estimPI, int); - for (unsigned int i = foundPI; i < posrows * estimPI; i++) { - p_pichart[i] = 0; - } - for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) { - p_pichart_pos[i] = 0U; - } - for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) { - p_implicants_val[i] = 0U; - p_implicants_pos[i] = 0U; - } + for (unsigned int i = foundPI; i < posrows * estimPI; i++) { + p_pichart[i] = 0; + } + for (unsigned int i = foundPI; i < pichart_words * estimPI; i++) { + p_pichart_pos[i] = 0U; + } + for (unsigned int i = foundPI; i < implicant_words * estimPI; i++) { + p_implicants_val[i] = 0U; + p_implicants_pos[i] = 0U; + } - if (PRINT_INFO) { - multiplier++; - Rprintf("%dx ", multiplier); - } - } - } + if (PRINT_INFO) { + multiplier++; + Rprintf("%dx ", multiplier); + } + } + // } + } } /* change to something less aggresive for reuse */ ccubes_clean_up(ctx); } + // TODO: remove this after fixing the problems above + // return(R_NilValue); + nofpi[k - 1] = foundPI; if (foundPI > 0 && !ON_set_covered) { diff --git a/src/clccubes.c b/src/clccubes.c index b771f5a..e12b6d7 100755 --- a/src/clccubes.c +++ b/src/clccubes.c @@ -78,6 +78,7 @@ ccubes_init(struct ccubes_context *ctx, int n_tasks, int n_tasks_off, int k, + int prevfoundPI, int ninputs, int posrows, int negrows, @@ -91,6 +92,7 @@ ccubes_init(struct ccubes_context *ctx, int rc = 0; ctx->k = k; + ctx->prevfoundPI = prevfoundPI; ctx->ninputs = ninputs; ctx->posrows = posrows; ctx->negrows = negrows; @@ -176,6 +178,7 @@ ccubes_alloc(struct ccubes_context *ctx, int *p_pichart_pos, /* IN: RC */ bool *redundant, /* OUT: RW */ bool *coverage, /* OUT: RW */ + int *taskpis, /* OUT: RW */ unsigned int *fixed_bits, /* OUT: RW */ unsigned int *value_bits, /* OUT: RW */ unsigned int *pichart_values /* OUT: RW */ @@ -188,6 +191,7 @@ ccubes_alloc(struct ccubes_context *ctx, */ ctx->h_redundant = redundant; ctx->h_coverage = coverage; + ctx->h_taskpis = taskpis; ctx->h_fixed_bits = fixed_bits; ctx->h_value_bits = value_bits; ctx->h_pichart_values = pichart_values; @@ -259,32 +263,36 @@ ccubes_alloc(struct ccubes_context *ctx, /* __global bool *redundant, OUT: RW */ ctx->redundant = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, - ctx->gws * sizeof(bool), NULL, &rc); + ctx->gws * ctx->posrows * sizeof(bool), NULL, &rc); /* __global bool *coverage, OUT: RW */ ctx->coverage = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, - ctx->gws * ctx->posrows * sizeof(bool), NULL, &rc); + ctx->gws * ctx->posrows * ctx->posrows * sizeof(bool), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } + /* __global int *taskpis, OUT: RW */ + ctx->taskpis = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, + ctx->gws * sizeof(int), NULL, &rc); + /* __global unsigned int *fixed_bits, OUT: RW */ ctx->fixed_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, - ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); + ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global unsigned int *value_bits, OUT: RW */ ctx->value_bits = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, - ctx->gws * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); + ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } /* __global unsigned int *pichart_values, OUT: RW */ ctx->pichart_values = clCreateBuffer(ctx->clctx->ctx, CL_MEM_READ_WRITE, - ctx->gws * ctx->pichart_words * sizeof(unsigned int), NULL, &rc); + ctx->gws * ctx->posrows * ctx->pichart_words * sizeof(unsigned int), NULL, &rc); if (rc != CL_SUCCESS) { goto err; } @@ -303,6 +311,8 @@ ccubes_run(struct ccubes_context *ctx) arg = 0; rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(int), (void *)&ctx->k); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(int), (void *)&ctx->prevfoundPI); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->nofvalues); @@ -326,6 +336,8 @@ ccubes_run(struct ccubes_context *ctx) sizeof(cl_mem), (void *)&ctx->redundant); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->coverage); + rc |= clSetKernelArg(ctx->ccubes_task, arg++, + sizeof(cl_mem), (void *)&ctx->taskpis); rc |= clSetKernelArg(ctx->ccubes_task, arg++, sizeof(cl_mem), (void *)&ctx->fixed_bits); rc |= clSetKernelArg(ctx->ccubes_task, arg++, @@ -359,21 +371,28 @@ ccubes_map(struct ccubes_context *ctx) ctx->h_redundant = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->redundant, CL_TRUE, CL_MAP_READ, 0, - ctx->gws * sizeof(bool), 0, NULL, NULL, &rc); + ctx->gws * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "redundant mapping failed (%d)", rc); goto err; } ctx->h_coverage = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->coverage, CL_TRUE, CL_MAP_READ, 0, - ctx->gws * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc); + ctx->gws * ctx->posrows * ctx->posrows * sizeof(bool), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "coverage mapping failed (%d)", rc); goto err; } + ctx->h_taskpis = clEnqueueMapBuffer(ctx->clctx->gpu_queue, + ctx->taskpis, CL_TRUE, CL_MAP_READ, 0, + ctx->gws * sizeof(int), 0, NULL, NULL, &rc); + if (rc != CL_SUCCESS) { + log_error("clccubes", "taskpis mapping failed (%d)", rc); + goto err; + } ctx->h_fixed_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->fixed_bits, CL_TRUE, CL_MAP_READ, 0, - ctx->gws * ctx->implicant_words * sizeof(unsigned int), + ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "fixed_bits mapping failed (%d)", rc); @@ -381,7 +400,7 @@ ccubes_map(struct ccubes_context *ctx) } ctx->h_value_bits = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->value_bits, CL_TRUE, CL_MAP_READ, 0, - ctx->gws * ctx->implicant_words * sizeof(unsigned int), + ctx->gws * ctx->posrows * ctx->implicant_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "value_bits mapping failed (%d)", rc); @@ -389,7 +408,7 @@ ccubes_map(struct ccubes_context *ctx) } ctx->h_pichart_values = clEnqueueMapBuffer(ctx->clctx->gpu_queue, ctx->pichart_values, CL_TRUE, CL_MAP_READ, 0, - ctx->gws * ctx->pichart_words * sizeof(unsigned int), + ctx->gws * ctx->posrows * ctx->pichart_words * sizeof(unsigned int), 0, NULL, NULL, &rc); if (rc != CL_SUCCESS) { log_error("clccubes", "pichart_values mapping failed (%d)", rc); @@ -418,6 +437,11 @@ ccubes_unmap(struct ccubes_context *ctx) if (rc != CL_SUCCESS) { log_error("clccubes", "coverage unmapping failed (%d)", rc); } + rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->taskpis, + ctx->h_taskpis, 0, NULL, NULL); + if (rc != CL_SUCCESS) { + log_error("clccubes", "taskpis unmapping failed (%d)", rc); + } rc = clEnqueueUnmapMemObject(ctx->clctx->gpu_queue, ctx->fixed_bits, ctx->h_fixed_bits, 0, NULL, NULL); if (rc != CL_SUCCESS) { @@ -443,6 +467,7 @@ ccubes_do_tasks(struct ccubes_context *ccubesctx, int n_tasks, int n_tasks_off, int k, + int prevfoundPI, int ninputs, int posrows, int negrows, @@ -461,6 +486,7 @@ ccubes_do_tasks(struct ccubes_context *ccubesctx, int *p_pichart_pos, /* IN: RC */ bool *redundant, /* OUT: RW */ bool *coverage, /* OUT: RW */ + int *taskpis, /* OUT: RW */ unsigned int *fixed_bits, /* OUT: RW */ unsigned int *value_bits, /* OUT: RW */ unsigned int *pichart_values /* OUT: RW */ @@ -472,6 +498,7 @@ ccubes_do_tasks(struct ccubes_context *ccubesctx, n_tasks, n_tasks_off, k, + prevfoundPI, ninputs, posrows, negrows, @@ -508,6 +535,7 @@ ccubes_do_tasks(struct ccubes_context *ccubesctx, p_pichart_pos, redundant, coverage, + taskpis, fixed_bits, value_bits, pichart_values); @@ -554,6 +582,7 @@ ccubes_clean_up(struct ccubes_context *ctx) /* OUTPUTS */ clReleaseMemObject(ctx->redundant); clReleaseMemObject(ctx->coverage); + clReleaseMemObject(ctx->taskpis); clReleaseMemObject(ctx->fixed_bits); clReleaseMemObject(ctx->value_bits); clReleaseMemObject(ctx->pichart_values); diff --git a/src/clccubes.h b/src/clccubes.h index 6dc1425..17eadee 100755 --- a/src/clccubes.h +++ b/src/clccubes.h @@ -39,6 +39,7 @@ struct ccubes_context { /* internal memory sizes */ int k; + int prevfoundPI; int ninputs; int posrows; int negrows; @@ -61,6 +62,7 @@ struct ccubes_context { /* OUTPUTS */ cl_mem redundant; cl_mem coverage; + cl_mem taskpis; cl_mem fixed_bits; cl_mem value_bits; cl_mem pichart_values; @@ -68,6 +70,7 @@ struct ccubes_context { /* Host outputs */ bool *h_redundant; bool *h_coverage; + int *h_taskpis; unsigned int *h_fixed_bits; unsigned int *h_value_bits; unsigned int *h_pichart_values; @@ -85,6 +88,7 @@ ccubes_do_tasks(struct ccubes_context *ccubesctx, int n_tasks, int n_tasks_off, int k, + int prevfoundPI, int ninputs, int posrows, int negrows, @@ -103,6 +107,7 @@ ccubes_do_tasks(struct ccubes_context *ccubesctx, int *p_pichart_pos, /* IN: RC */ bool *redundant, /* OUT: RW */ bool *coverage, /* OUT: RW */ + int *taskpis, /* OUT: RW */ unsigned int *fixed_bits, /* OUT: RW */ unsigned int *value_bits, /* OUT: RW */ unsigned int *pichart_values /* OUT: RW */