diff --git a/inst/ccubes.cl b/inst/ccubes.cl
new file mode 100755
index 0000000..20e2081
--- /dev/null
+++ b/inst/ccubes.cl
@@ -0,0 +1,346 @@
+/*
+ * Copyright (c) 2025 Adrian Dușa <adrian.dusa@unibuc.ro>
+ * Copyright (c) 2025 Paul Irofti <paul@irofti.net>
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "ccubes_generated.h"
+
+#ifdef USE_DOUBLE
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+typedef double real;
+#define R_ZERO	1e-14
+#else
+typedef float real;
+#define R_ZERO	1e-10
+#endif
+
+#define BITS_PER_WORD 32
+
+#define ROW_DIM 0
+#define COL_DIM 1
+
+// #pragma OPENCL EXTENSION cl_amd_printf : enable
+// #pragma OPENCL EXTENSION cl_khr_select_fprounding_mode : enable
+// #pragma OPENCL SELECT_ROUNDING_MODE rtz
+
+#ifdef RANGE_DEBUG
+#define RANGE_CHECK(lower, upper, value, str) do {	\
+	if ((value) < (lower) || (value) > (upper)) {	\
+		printf("%s", (str));			\
+		return;					\
+	}						\
+} while(0);
+#else
+#define RANGE_CHECK(lower, upper, value, str)
+#endif
+
+unsigned long int
+nchoosek(int n, int k)
+{
+    if (k == 0 || k == n) return 1;
+    if (k == 1) return n;
+
+    unsigned long int result = 1;
+
+    if (k > n - k) {
+        k = n - k;
+    }
+
+    for (int i = 0; i < k; i++) {
+        result = result * (n - i) / (i + 1);
+    }
+
+    return result;
+}
+
+/*
+ *
+ * PROBLEM: CCubes
+ *
+ * NDRANGE: nchoosek(ninputs, k) work-items
+ *
+ * INPUT:
+ *	k - current input
+ *	nofvalues (ninputs x 1) - read, copy-host - number of values
+ *	ON_set (posrows x ninputs) - read, copy-host - ON set
+ *	OFF_set (ninputs x negrows) - read, copy-host - OFF set
+ *	p_implicants_pos - read, copy-host
+ *	p_implicants_val - read, copy-host
+ *	last_index - read, copy-host
+ *	p_covered - read, copy-host
+ *	p_pichart_pos - read, copy-host
+ *
+ * CONSTANTS:
+ *	NINPUTS - number of inputs
+ *	POSROWS - positive output rows (the ON set)
+ *	NEGROWS - negative output rows (the OFF set)
+ *	IMPLICANT_WORDS - words needed per PI representation
+ *	VALUE_BIT_WIDTH - largest bit used (ffs)
+ *	PICHART_WORDS - words needed per PI chart columns
+ *
+ * OUTPUT:
+ *	covsum - sum of coverage (reproduce on host instead?)
+ *  redundant (1) - read, write
+ *	coverage (posrows x 1) - read, write
+ *	fixed_bits (implicant_words x 1) - read, write
+ *	value_bits (implicant_words x 1) - read, write
+ *	pichart_values (pichart_words x 1) - read, write
+ *
+ * NOTE: Both input and output must be allocated before calling this funciton.
+ */
+#if 0
+#define NINPUTS 64
+#define POSROWS 128
+#define NEGROWS 128
+#define IMPLICANT_WORDS 64
+#define VALUE_BIT_WIDTH 32
+#define PICHART_WORDS 8
+#endif
+__kernel void
+ccubes_task(int k,
+		__global const int *nofvalues,			/* IN: RC */
+		__global const int *ON_set,			/* IN: RC */
+		__global const int *OFF_set,			/* IN: RC */
+		__global const unsigned int *p_implicants_pos,	/* IN: RC */
+		__global const unsigned int *p_implicants_val,	/* IN: RC */
+		__global const int *last_index,			/* IN: RC */
+		__global const int *p_covered,			/* IN: RC */
+		__global const int *p_pichart_pos,		/* IN: RC */
+		__global bool *g_redundant,			/* OUT: RW */
+		__global bool *g_coverage,			/* OUT: RW */
+		__global unsigned int *g_fixed_bits,		/* OUT: RW */
+		__global unsigned int *g_value_bits,		/* OUT: RW */
+		__global unsigned int *g_pichart_values		/* OUT: RW */
+		)
+{
+	/* work-item?: task in nchoosek(ninputs, k) */
+	/* work-group?: k in 1 to ninputs */
+	/* total work: tasks in nchoosek for k in 1 to ninputs */
+
+	size_t task = get_global_id(0);
+	size_t gws = get_global_size(0);
+	// size_t goffset = task - gws;
+	size_t goffset = get_global_offset(0);
+	// size_t gid = task - goffset;
+	size_t gid = get_global_linear_id();
+
+	__global bool *redundant = &g_redundant[gid];
+	__global bool *coverage = &g_coverage[gid * POSROWS];
+	__global unsigned int *fixed_bits = &g_fixed_bits[gid * IMPLICANT_WORDS];
+	__global unsigned int *value_bits = &g_value_bits[gid * IMPLICANT_WORDS];
+	__global unsigned int *pichart_values = &g_pichart_values[gid * PICHART_WORDS];
+
+	// coverage[0] = 1;
+	// printf("task %d, gws %d, goffset %d, gid %d\n", task, gws, goffset, gid);
+	// return;
+	redundant = true;
+
+	int prevfoundPI = 0;
+
+	int tempk[NINPUTS]; /* max is tempk[ninputs] */
+	int x = 0;
+	int combination = task;
+
+	// fill the combination for the current task
+	for (int i = 0; i < k; i++) {
+		while (nchoosek(NINPUTS - (x + 1), k - (i + 1)) <= combination) {
+			combination -= nchoosek(NINPUTS - (x + 1), k - (i + 1));
+			x++;
+		}
+		tempk[i] = x;
+		x++;
+	}
+
+	// allocate vectors of decimal row numbers for the positive and negative rows
+	int decpos[POSROWS];
+	int decneg[NEGROWS];
+
+	// create the vector of multiple bases, useful when calculating the decimal representation
+	// of a particular combination of columns, for each row
+	int mbase[NINPUTS];
+	mbase[0] = 1; // the first number is _always_ equal to 1, irrespective of the number of values in a certain input
+
+	// calculate the vector of multiple bases, for example if we have k = 3 (three inputs) with
+	// 2, 3 and 2 values then mbase will be [1, 2, 6] from: 1, 1 * 2 = 2, 2 * 3 = 6
+	for (int i = 1; i < k; i++) {
+		mbase[i] = mbase[i - 1] * nofvalues[tempk[i - 1]];
+	}
+
+	// calculate decimal numbers, using mbase, fills in decpos and decneg
+	for (int r = 0; r < POSROWS; r++) {
+		decpos[r] = 0;
+		for (int c = 0; c < k; c++) {
+			decpos[r] += ON_set[tempk[c] * POSROWS + r] * mbase[c];
+		}
+	}
+
+	for (int r = 0; r < NEGROWS; r++) {
+		decneg[r] = 0;
+		for (int c = 0; c < k; c++) {
+			decneg[r] += OFF_set[tempk[c] * NEGROWS + r] * mbase[c];
+		}
+	}
+
+
+	int possible_rows[POSROWS];
+
+	bool possible_cover[POSROWS];
+	possible_cover[0] = true; // bool flag, to be set with false if found among the OFF set
+
+	int found = 0;
+
+	// identifies all unique decimal rows, for the selected combination of k inputs
+	for (int r = 0; r < POSROWS; r++) {
+		int prev = 0;
+		bool unique = true; // bool flag, assume the row is unique
+		while (prev < found && unique) {
+			unique = decpos[possible_rows[prev]] != decpos[r];
+			prev++;
+		}
+
+		if (unique) {
+			possible_rows[found] = r;
+			possible_cover[found] = true;
+			found++;
+		}
+	}
+
+	if (found > 0) {
+		// some of the ON set numbers are possible PIs (not found in the OFF set)
+		int frows[POSROWS];
+
+		// verify if this is a possible PI
+		// (if the same decimal number is not found in the OFF set)
+		for (int i = found - 1; i >= 0; i--) {
+			int j = 0;
+			while (j < NEGROWS && possible_cover[i]) {
+				if (decpos[possible_rows[i]] == decneg[j]) {
+					possible_cover[i] = false;
+					found--;
+				}
+				j++;
+			}
+
+			if (possible_cover[i]) {
+				frows[found - i - 1] = possible_rows[i];
+			}
+		}
+		// Rprintf("task: %d; rows: %d\n", task, found);
+
+		for (int f = 0; f < found; f++) {
+
+
+			// create a temporary vector of length k, containing the values from the initial ON set
+			// plus 1 (because 0 now signals a minimization, it becomes 1, and 1 becomes 2 etc.
+			int tempc[NINPUTS];
+
+			// using bit shifting, store the fixed bits and value bits
+			// unsigned int fixed_bits[IMPLICANT_WORDS];
+			// unsigned int value_bits[IMPLICANT_WORDS];
+
+			for (int i = 0; i < IMPLICANT_WORDS; i++) {
+				fixed_bits[i] = 0U;
+				value_bits[i] = 0U;
+			}
+
+			for (int c = 0; c < k; c++) {
+				int value = ON_set[tempk[c] * POSROWS + frows[f]];
+				tempc[c] = value + 1;
+
+				int word_index = tempk[c] / (BITS_PER_WORD / VALUE_BIT_WIDTH);
+				int bit_index = (tempk[c] % (BITS_PER_WORD / VALUE_BIT_WIDTH)) * VALUE_BIT_WIDTH;
+
+				fixed_bits[word_index] |= (VALUE_BIT_MASK << bit_index);
+				value_bits[word_index] |= ((unsigned int)value << bit_index);
+			}
+
+			// check if the current PI is not redundant
+			// bool redundant = false;
+			redundant = false;
+
+			int i = 0;
+			while (i < prevfoundPI && !redundant) {
+				// /*
+				// - ck contains the complexity level for each of the previously found non-redundant PIs
+				// - indx is a matrix containing the indexes of the columns where the values were stored
+				// - a redundant PI is one for which all values from a previous PI are exactly the same:
+				// 0 0 1 2 0, let's say previously found PI
+				// which means a corresponding ck = 2 and a corresponding indx = [3, 4]
+				// 0 0 1 2 1 is redundant because on both columns 3 and 4 the values are equal
+				// therefore sumeq = 2 and it will be equal to v = 2 when reaching the complexity level ck = 2
+				// */
+
+				bool is_subset = true; // Assume it's a subset unless proven otherwise
+
+				for (int w = 0; w < IMPLICANT_WORDS; w++) {
+					// If the new PI has values on positions outside the existing PI’s fixed positions, it’s not a subset
+					unsigned int index_mask = p_implicants_pos[i * IMPLICANT_WORDS + w];
+
+					if ((fixed_bits[w] & index_mask) != index_mask) {
+						is_subset = false;
+						break;
+					}
+
+					// then compare the value bits, if one or more values on those positions are different, it’s not a subset
+					if ((value_bits[w] & index_mask) != (p_implicants_val[i * IMPLICANT_WORDS + w] & index_mask)) {
+						is_subset = false;
+						break;
+					}
+				}
+
+				redundant = is_subset;
+
+				i++;
+			}
+
+			if (redundant) continue;
+
+			// bool coverage[POSROWS];
+			int covsum = 0;
+			// unsigned int pichart_values[PICHART_WORDS];
+			for (int w = 0; w < PICHART_WORDS; w++) {
+				pichart_values[w] = 0U;
+			}
+
+			for (int r = 0; r < POSROWS; r++) {
+				coverage[r] = decpos[r] == decpos[frows[f]];
+				if (coverage[r]) {
+					int word_index = r / BITS_PER_WORD;
+					int bit_index = r % BITS_PER_WORD;
+					pichart_values[word_index] |= (1U << bit_index);
+				}
+				covsum += coverage[r];
+			}
+
+			// verify row dominance
+			int rd = 0;
+			while (rd < last_index[covsum - 1] && !redundant) {
+
+				bool dominated = true;
+				for (int w = 0; w < PICHART_WORDS; w++) {
+					if ((pichart_values[w] & p_pichart_pos[p_covered[rd] * PICHART_WORDS + w]) != pichart_values[w]) {
+						dominated = false;
+						break;
+					}
+				}
+
+				redundant = dominated;
+				rd++;
+			}
+
+			if (redundant) continue;
+		}
+	}
+}
diff --git a/src/CCubes.c b/src/CCubes.c
index 5e4868d..eb07e46 100755
--- a/src/CCubes.c
+++ b/src/CCubes.c
@@ -28,6 +28,27 @@
 
 SEXP CCubes(SEXP tt) {
 
+    // simulate the R command:
+    // system.file("ccubes.cl", package = "IEEE")
+
+    SEXP args = PROTECT(allocVector(VECSXP, 2));
+    SET_VECTOR_ELT(args, 0, mkString("ccubes.cl"));
+    SET_VECTOR_ELT(args, 1, mkString("IEEE"));
+
+    SEXP names = PROTECT(allocVector(STRSXP, 2));
+    SET_STRING_ELT(names, 1, mkChar("package"));
+    Rf_setAttrib(args, R_NamesSymbol, names);
+
+    SEXP r_call = PROTECT();
+    SEXP r_result = PROTECT(R_tryEval(
+        Rf_lang3(install("do.call"), mkString("system.file"), args),
+        R_GlobalEnv,
+        NULL
+    ));
+
+    Rprintf("Kernel file: '%s'\n", CHAR(STRING_ELT(r_result, 0)));
+    UNPROTECT(3);
+
     // 32 bits per word, in bit shifting representation
     char *bits_per_word = getenv("BITS_PER_WORD"); // Read from the PATH
     int BITS_PER_WORD = bits_per_word ? atoi(bits_per_word) : 32;