I was trying to implement the algorithm specified in this research paper (please ignore the math, since it's irrelevant to the question). This algorithm is very basic in formal concept analysis. The input is supposed to be an NxM matrix and a list of lists or array of lists as you can see in the paper section 3.1. In fact, for simulation purposes, I use a simple .cxt file for representing the objects and their attributes as shown here (this is a very simple example).
This C# implementation performs the worst in comparison to the author's C++ implementation. The author offers a stand alone .exe file programmed in C++ and it's at least 35 times faster. Why? How can I improve this code?
XXX...
X.XXXX
XX..X.
.XX...
Each row represents an object with the attributes it has X
or has not .
. My implementation is very straightforward where I just read the .cxt file and store the values as . -> 0
and X -> 1
then proceed to the implementation of the two procedures illustrated in the pseudocode.
C# implementation:
public void generate_from(int[] B, int y)
{
int[] D;
Total_Concepts++;
//for (int i = 0; i < B.Length; i++)
// if (B[i] == 1)
// Console.Write(i+",");
//Console.WriteLine();
if (y < CT_WIDTH)
{
for (int j = y; j < CT_WIDTH; j++)
{
if (B[j] == 0)
{
B[j] = 1;
D = compute_closure(B, j);
bool skip = false;
for (int k = 0; k < j; k++)
{
if (D[k] != B[k])
{
skip = true;
break;
}
}
if (!skip)
generate_from(D, j + 1);
B[j] = 0;
}
}
}
}
int[] compute_closure(int[] B, int y)
{
bool match;
int[] D = new int[CT_WIDTH];
for (int j = 0; j < CT_WIDTH; j++)
D[j] = 1;
for (int i = 0; i < rows[y].Count; i++)
{
match = true;
for (int j = 0; j < CT_WIDTH; j++)
{
if (B[j] == 1 && FCAContext[rows[y][i] * CT_WIDTH + j] == 0)
{
match = false;
break;
}
}
if (match)
{
for (int j = 0; j < CT_WIDTH; j++)
if (FCAContext[rows[y][i] * CT_WIDTH + j] == 0)
D[j] = 0;
}
}
return D;
}
For tracing to the code, I form the input in the following manner:
public int Total_Concepts = 0;
// Total Number of Attributes : TABLE-WIDTH , Total Number of Objects : TABLE-HEIGHT
int CT_WIDTH, CT_HEIGHT;
// The Cross Table
int[] FCAContext = new int[5000000];
// The Second representation
List<int>[] rows = new List<int>[1000];
public alg(int Context_Width, int Context_Height, string SampleData)
{
CT_WIDTH = Context_Width;
CT_HEIGHT = Context_Height;
string instance;
int FCACC = 0;
using (StreamReader file = new StreamReader(SampleData))
{
for (int i = 0; i < CT_HEIGHT; i++)
{
instance = file.ReadLine();
for (int j = 0; j < CT_WIDTH; j++)
if (instance[j] == 'X')
{
FCAContext[FCACC] = 1;
FCACC++;
}
else
if (instance[j] == '.')
{
FCAContext[FCACC] = 0;
FCACC++;
}
}
}
for (int i = 0; i < CT_WIDTH; i++)
{
rows[i]= new List<int>();
for (int j = 0; j < CT_HEIGHT; j++)
if (FCAContext[j * CT_WIDTH + i] == 1)
rows[i].Add(j);
}
}
Moreover, the calling to all this class of algorithm is:
static void Main(string[] args)
{
int w = 126;
int h = 8124;
// Mushroom 8124 X 126
Stopwatch watch = new Stopwatch();
watch.Start();
alg fca = new alg(w, h, @"C:\FCA\mushroom.txt");
fca.generate_from(new int[w], 0, new int[1000000]);
//fca.generate_from(new int[w], 0);
watch.Stop();
Console.WriteLine("Total Concepts Generated : " + fca.Total_Concepts);
Console.WriteLine("Total Computation Time : " + watch.ElapsedMilliseconds);
Console.ReadKey();
}