I am trying to solve the 909th challenge of Project Euler. It is basically about replacing specific patterns in a string until no pattern is found. A given string like "S(S)(S(S))(S(Z))(A)(0)"
is called an L_expression and the rules for replacement are:
- $$A(u)\longrightarrow u+1$$
- $$Z(u)(v)\longrightarrow v$$
- $$S(u)(v)(w)\longrightarrow v(u(v)(w))$$
At first I thought regex would do the job, and it did for the two given examples, "S(Z)(A)(0)"
and "S(S)(S(S))(S(Z))(A)(0)"
. But for the challenge itself, it turned out the iterations are far too many. So in the end, I wrote a C program and tried to make it as fast and optimized as possible:
#include <string.h>
#include <stdio.h>
static int find_patterns(const char* input, char* output, size_t size)
{
size_t last_x = 0, last_y = 0;
for (size_t p, i = 0; (p = i) < size; ++i)
{
char c = input[p++], next = input[p++];
if ((c < 'A' || next != '(') && (c != '+' || next < '0' || next > '9'))
{
continue; // look for either "A(", "S(", "Z(", "+d"
}
if (c == 'S')
{
int depth = 1, args = 0, lu = 0, lv = 0, lw = 0;
char *u = input + p, *v = NULL, *w = NULL, *y = output + last_y;
do {
switch (input[p])
{
case '(':
++depth;
break;
case ')':
if (--depth > 0) break;
if (depth < 0 || (args < 2 && input[p + 1] != '(')) {
p = size; // exit the loop
break;
}
switch (++args)
{
case 1:
lu = p - i - 2;
v = input + p + 2;
break;
case 2:
lv = p - i - 4 - lu;
w = input + p + 2;
break;
case 3: // found pattern S(..)(..)(..)
lw = p - i - 6 - lu - lv;
memcpy(y, input + last_x, i - last_x);
y += i - last_x;
memcpy(y, v, lv); // replace with v(u(v)(w))
y += lv;
memcpy(y, u - 1, lu + 1);
y += lu + 1;
memcpy(y, v - 1, lv + lw + 4);
y += lv + lw + 5;
y[-1] = ')';
last_x = p + 1;
last_y = y - output;
i = p;
p = size;
}
break;
}
} while (p++ < size);
}
else if (c == 'Z')
{
int depth = 1, args = 0, lu = 0, lv = 0;
char *u = input + p, *v = NULL, *y = output + last_y;
do {
switch (input[p])
{
case '(':
++depth;
break;
case ')':
if (--depth > 0) break;
if (depth < 0 || (!args && input[p + 1] != '(')) {
p = size; // exit the loop
break;
}
switch (++args)
{
case 1:
lu = p - i - 2;
v = input + p + 2;
break;
case 2: // found pattern Z(..)(v)
lv = p - i - 4 - lu;
memcpy(y, input + last_x, i - last_x);
y += i - last_x;
memcpy(y, v, lv); // replace with v
y += lv;
last_x = p + 1;
last_y = y - output;
i = p;
p = size;
}
break;
}
} while (p++ < size);
}
else if (c == 'A')
{
int depth = 1, lu = 0;
char *u = input + p, *y = output + last_y;
do {
switch (input[p])
{
case '(':
++depth;
break;
case ')':
if (--depth > 0) break;
if (depth < 0) {
p = size; // exit the loop
break;
}
lu = p - i - 2; // found pattern A(u)
memcpy(y, input + last_x, i - last_x);
y += i - last_x;
memcpy(y, u, lu); // replace with u+1
y += lu + 2;
y[-2] = '+';
y[-1] = '1';
last_x = p + 1;
last_y = y - output;
i = p;
p = size;
break;
}
} while (p++ < size);
}
else // c == '+': look for u+v where both are numbers
{
size_t u = 0, v = 0, d = 1, lu;
for (p = i; p--; d *= 10) {
c = input[p] - '0';
if (c >= 0 && c < 10) u += d * c;
else break;
}
for (lu = i - p - 1, p = i + 1; 1; ++p) {
c = input[p] - '0';
if (c >= 0 && c < 10) v = v * 10 + c;
else break;
}
if (lu && v) // both u and v are non-negative numbers
{
char str[20], *y = output + last_y;
for (u += v, d = 0; u; u /= 10) { // convert u+v to string
str[d++] = u % 10 + '0';
}
for (str[d--] = v = 0; v < d; ) {
u = str[v];
str[v++] = str[d];
str[d--] = u;
}
memcpy(y, input + last_x, i - last_x - lu);
y += i - last_x - lu;
memcpy(y, str, lu = strlen(str));
y += lu;
last_x = p;
last_y = y - output;
i = p - 1;
}
}
}
if (last_x < size) {
memcpy(output + last_y, input + last_x, size - last_x);
}
output[size - last_x + last_y] = 0;
return last_x > 0;
}
#define MAX_ITERATIONS 1E6 // (size_t)(-2)
int main()
{
const char
*test1 = "S(Z)(A)(0)",
*test2 = "S(S)(S(S))(S(Z))(A)(0)",
*test3 = "S(S)(S(S))(S(S))(S(Z))(A)(0)";
char str[2][0x5000] = { { 0 } };
size_t iterations, i, max_size = 0;
strcpy(*str, test3);
for (iterations = i = 0; ++iterations < MAX_ITERATIONS; i = 1 - i)
{
size_t size = strlen(str[i]);
if (size > max_size) {
max_size = size;
}
if (!find_patterns(str[i], str[1 - i], size)) {
break;
}
}
printf("result of L-actions:\n%s\nmax size: %d, #iterations: %d\n",
str[1 - i], max_size, iterations);
return 0;
}
The problem is, the code still takes ages to complete like 10^8 iterations and even so, we still won't reach anywhere near the solution. I have tried configuring gcc/clang flags for speed optimizations and don't know what else I should do.