This a sample code, when it runs it take only about 46% CPU time
I'm using code based on this sample to process big TSV files (from 100GB to 500GB)
I tried to use BlockingCollection but performance didn't improve
How I can improve performance of this sample?
private static IEnumerable<Tuple<string, int>> GetEnumerator(string s, int count)
{
for (var i = 0; i < count; i++)
{
yield return new Tuple<string, int>(s, i);
}
}
private static void Test()
{
var columns = 10000;
var rows = 50000;
var cols = new List<string>(columns);
for (var i = 0; i < columns; i++)
{
cols.Add(i.ToString());
}
var line = string.Join("\t", cols);
Func<Tuple<string, int>, string> action = li =>
{
var sl2 = li.Item1.Split('\t');
return string.Join("\t", sl2);
};
var dt = DateTime.Now;
GetEnumerator(line, rows).AsParallel().Select(action).ForAll(lline => {});
Console.WriteLine("Time taken {0}", DateTime.Now - dt);
}