ccubes-cl/inst/ccubes.cl

/*
 * Copyright (c) 2025 Adrian Dușa <adrian.dusa@unibuc.ro>
 * Copyright (c) 2025 Paul Irofti <paul@irofti.net>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 */

#include "ccubes_generated.h"

#ifdef USE_DOUBLE
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
typedef double real;
#define R_ZERO	1e-14
#else
typedef float real;
#define R_ZERO	1e-10
#endif

#define BITS_PER_WORD 32

#define ROW_DIM 0
#define COL_DIM 1

// #pragma OPENCL EXTENSION cl_amd_printf : enable
// #pragma OPENCL EXTENSION cl_khr_select_fprounding_mode : enable
// #pragma OPENCL SELECT_ROUNDING_MODE rtz

#ifdef RANGE_DEBUG
#define RANGE_CHECK(lower, upper, value, str) do {	\
	if ((value) < (lower) || (value) > (upper)) {	\
		printf("%s", (str));			\
		return;					\
	}						\
} while(0);
#else
#define RANGE_CHECK(lower, upper, value, str)
#endif

unsigned long int
nchoosek(int n, int k)
{
    if (k == 0 || k == n) return 1;
    if (k == 1) return n;

    unsigned long int result = 1;

    if (k > n - k) {
        k = n - k;
    }

    for (int i = 0; i < k; i++) {
        result = result * (n - i) / (i + 1);
    }

    return result;
}

/*
 *
 * PROBLEM: CCubes
 *
 * NDRANGE: nchoosek(ninputs, k) work-items
 *
 * INPUT:
 *	k - current input
 *	nofvalues (ninputs x 1) - read, copy-host - number of values
 *	ON_set (posrows x ninputs) - read, copy-host - ON set
 *	OFF_set (ninputs x negrows) - read, copy-host - OFF set
 *	p_implicants_pos - read, copy-host
 *	p_implicants_val - read, copy-host
 *	last_index - read, copy-host
 *	p_covered - read, copy-host
 *	p_pichart_pos - read, copy-host
 *
 * CONSTANTS:
 *	NINPUTS - number of inputs
 *	POSROWS - positive output rows (the ON set)
 *	NEGROWS - negative output rows (the OFF set)
 *	IMPLICANT_WORDS - words needed per PI representation
 *	VALUE_BIT_WIDTH - largest bit used (ffs)
 *	PICHART_WORDS - words needed per PI chart columns
 *
 * OUTPUT:
 *	covsum - sum of coverage (reproduce on host instead?)
 *  	redundant (1) - read, write
 *	coverage (posrows x 1) - read, write
 *	fixed_bits (implicant_words x 1) - read, write
 *	value_bits (implicant_words x 1) - read, write
 *	pichart_values (pichart_words x 1) - read, write
 *
 * NOTE: Both input and output must be allocated before calling this funciton.
 */
#if 0
#define NINPUTS 64
#define POSROWS 128
#define NEGROWS 128
#define IMPLICANT_WORDS 64
#define VALUE_BIT_WIDTH 32
#define PICHART_WORDS 8
#endif
__kernel void
ccubes_task(int k,
		__global const int *nofvalues,			/* IN: RC */
		__global const int *ON_set,			/* IN: RC */
		__global const int *OFF_set,			/* IN: RC */
		__global const unsigned int *p_implicants_pos,	/* IN: RC */
		__global const unsigned int *p_implicants_val,	/* IN: RC */
		__global const int *last_index,			/* IN: RC */
		__global const int *p_covered,			/* IN: RC */
		__global const int *p_pichart_pos,		/* IN: RC */
		__global bool *g_redundant,			/* OUT: RW */
		__global bool *g_coverage,			/* OUT: RW */
		__global unsigned int *g_fixed_bits,		/* OUT: RW */
		__global unsigned int *g_value_bits,		/* OUT: RW */
		__global unsigned int *g_pichart_values		/* OUT: RW */
		)
{
	/* work-item?: task in nchoosek(ninputs, k) */
	/* work-group?: k in 1 to ninputs */
	/* total work: tasks in nchoosek for k in 1 to ninputs */

	// size_t gws = get_global_size(0);
	// size_t goffset = task - gws;
	// size_t goffset = get_global_offset(0);
	// size_t gid = task - goffset;
	// coverage[0] = 1;
	// printf("task %d, gws %d, goffset %d, gid %d\n", task, gws, goffset, gid);
	// return;

	size_t task = get_global_id(0);
	size_t gid = get_global_linear_id();

	__global bool *redundant = &g_redundant[gid];
	__global bool *coverage = &g_coverage[gid * POSROWS];
	__global unsigned int *fixed_bits = &g_fixed_bits[gid * IMPLICANT_WORDS];
	__global unsigned int *value_bits = &g_value_bits[gid * IMPLICANT_WORDS];
	__global unsigned int *pichart_values = &g_pichart_values[gid * PICHART_WORDS];

	*redundant = true;

	int prevfoundPI = 0;

	int tempk[NINPUTS]; /* max is tempk[ninputs] */
	int x = 0;
	int combination = task;

	/* INIT */
	for (int i = 0; i < POSROWS; i++)
		coverage[i] = 0;
	for (int i = 0; i < IMPLICANT_WORDS; i++) {
		fixed_bits[i] = 0U;
		value_bits[i] = 0U;
	}
	for (int i = 0; i < PICHART_WORDS; i++)
		pichart_values[i] = 0U;


	// fill the combination for the current task
	for (int i = 0; i < k; i++) {
		while (nchoosek(NINPUTS - (x + 1), k - (i + 1)) <= combination) {
			combination -= nchoosek(NINPUTS - (x + 1), k - (i + 1));
			x++;
		}
		tempk[i] = x;
		x++;
	}

	// allocate vectors of decimal row numbers for the positive and negative rows
	int decpos[POSROWS];
	int decneg[NEGROWS];

	// create the vector of multiple bases, useful when calculating the decimal representation
	// of a particular combination of columns, for each row
	int mbase[NINPUTS];
	mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input

	// calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
	// 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
	for (int i = 1; i < k; i++) {
		mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
	}

	// calculate decimal numbers, using mbase, fills in decpos and decneg
	for (int r = 0; r < POSROWS; r++) {
		decpos[r] = 0;
		for (int c = 0; c < k; c++) {
			decpos[r] += ON_set[tempk[c] * POSROWS + r] * mbase[c];
		}
	}

	for (int r = 0; r < NEGROWS; r++) {
		decneg[r] = 0;
		for (int c = 0; c < k; c++) {
			decneg[r] += OFF_set[tempk[c] * NEGROWS + r] * mbase[c];
		}
	}


	int possible_rows[POSROWS];

	bool possible_cover[POSROWS];
	possible_cover[0] = true; // bool flag, to be set with false if found among the OFF set

	int found = 0;

	// identifies all unique decimal rows, for the selected combination of k inputs
	for (int r = 0; r < POSROWS; r++) {
		int prev = 0;
		bool unique = true; // bool flag, assume the row is unique
		while (prev < found && unique) {
			unique = decpos[possible_rows[prev]] != decpos[r];
			prev++;
		}

		if (unique) {
			possible_rows[found] = r;
			possible_cover[found] = true;
			found++;
		}
	}

	if (found > 0) {
		// some of the ON set numbers are possible PIs (not found in the OFF set)
		int frows[POSROWS];

		// verify if this is a possible PI
		// (if the same decimal number is not found in the OFF set)
		for (int i = found - 1; i >= 0; i--) {
			int j = 0;
			while (j < NEGROWS && possible_cover[i]) {
				if (decpos[possible_rows[i]] == decneg[j]) {
					possible_cover[i] = false;
					found--;
				}
				j++;
			}

			if (possible_cover[i]) {
				frows[found - i - 1] = possible_rows[i];
			}
		}
		// Rprintf("task: %d; rows: %d\n", task, found);

		for (int f = 0; f < found; f++) {


			// create a temporary vector of length k, containing the values from the initial ON set
			// plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
			int tempc[NINPUTS];

			// using bit shifting, store the fixed bits and value bits
			// unsigned int fixed_bits[IMPLICANT_WORDS];
			// unsigned int value_bits[IMPLICANT_WORDS];

			for (int i = 0; i < IMPLICANT_WORDS; i++) {
				fixed_bits[i] = 0U;
				value_bits[i] = 0U;
			}

			for (int c = 0; c < k; c++) {
				int value = ON_set[tempk[c] * POSROWS + frows[f]];
				tempc[c] = value + 1;

				int word_index = tempk[c] / (BITS_PER_WORD / VALUE_BIT_WIDTH);
				int bit_index = (tempk[c] % (BITS_PER_WORD / VALUE_BIT_WIDTH)) * VALUE_BIT_WIDTH;

				fixed_bits[word_index] |= (VALUE_BIT_MASK << bit_index);
				value_bits[word_index] |= ((unsigned int)value << bit_index);
			}

			// check if the current PI is not redundant
			// bool redundant = false;
			*redundant = false;

			int i = 0;
			while (i < prevfoundPI && !*redundant) {
				// /*
				// - ck contains the complexity level for each of the previously found non-redundant PIs
				// - indx is a matrix containing the indexes of the columns where the values were stored
				// - a redundant PI is one for which all values from a previous PI are exactly the same:
				// 0 0 1 2 0, let's say previously found PI
				// which means a corresponding ck = 2 and a corresponding indx = [3, 4]
				// 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
				// therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
				// */

				bool is_subset = true; // Assume it's a subset unless proven otherwise

				for (int w = 0; w < IMPLICANT_WORDS; w++) {
					// If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset
					unsigned int index_mask = p_implicants_pos[i * IMPLICANT_WORDS + w];

					if ((fixed_bits[w] & index_mask) != index_mask) {
						is_subset = false;
						break;
					}

					// then compare the value bits, if one or more values on those positions are different, it’s not a subset
					if ((value_bits[w] & index_mask) != (p_implicants_val[i * IMPLICANT_WORDS + w] & index_mask)) {
						is_subset = false;
						break;
					}
				}

				*redundant = is_subset;

				i++;
			}

			if (*redundant) continue;

			// bool coverage[POSROWS];
			int covsum = 0;
			// unsigned int pichart_values[PICHART_WORDS];
			for (int w = 0; w < PICHART_WORDS; w++) {
				pichart_values[w] = 0U;
			}

			for (int r = 0; r < POSROWS; r++) {
				coverage[r] = decpos[r] == decpos[frows[f]];
				if (coverage[r]) {
					int word_index = r / BITS_PER_WORD;
					int bit_index = r % BITS_PER_WORD;
					pichart_values[word_index] |= (1U << bit_index);
				}
				covsum += coverage[r];
			}

			// verify row dominance
			int rd = 0;
			while (rd < last_index[covsum - 1] && !*redundant) {

				bool dominated = true;
				for (int w = 0; w < PICHART_WORDS; w++) {
					if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * PICHART_WORDS + w]) != pichart_values[w]) {
						dominated = false;
						break;
					}
				}

				*redundant = dominated;
				rd++;
			}

			if (*redundant) continue;
		}
	}
}