In response to another recent question I mentioned that one mechanism to avoid runtime overhead for creating a data structure was to create it at compile time and use it directly. Since there was some interest in that concept, I thought I would write code to show how that might be done and submit it for review here.
The theory
There are two pieces to this code.
The first is code that is used to construct an AVL tree of arbitrary data. For illustration purposes, the data inserted into the tree are the names of the months (in English) and the number of days in each month. The month name is used as the key for lookup and the number of days is the associated data retrieved. This first piece of code constructs the tree and then emits it as a C source code representing the resulting structure.
The second piece of code is the part that would be compiled and linked with the object file resulting from the first piece.
makeavl.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
char *key;
int data;
struct node *left;
struct node *right;
} node;
node *new_node(char *key, int data)
{
node *p = malloc(sizeof(*p));
if (p != NULL) {
p->key = key;
p->data = data;
p->left = NULL;
p->right = NULL;
}
return p;
}
int max(int a, int b)
{
return a > b ? a : b;
}
int nodecount(node *p)
{
int count = 0;
if (p != NULL) {
++count;
count += nodecount(p->left);
count += nodecount(p->right);
}
return count;
}
int relheight(node *p, int count)
{
if (p == NULL)
return count;
return max(relheight(p->left, count+1), relheight(p->right, count+1));
}
int height(node * p)
{
return relheight(p, 0);
}
node *rotate_right(node * p)
{
node *q = p->left;
p->left = q->right;
q->right = p;
return q;
}
node *rotate_left(node * p)
{
node *q = p->right;
p->right = q->left;
q->left = p;
return q;
}
node *balance(node * p)
{
if (height(p->left) - height(p->right) == 2) {
if (height(p->left->right) > height(p->left->left))
p->left = rotate_left(p->left);
return rotate_right(p);
} else if (height(p->right) - height(p->left) == 2) {
if (height(p->right->left) > height(p->right->right))
p->right = rotate_right(p->right);
return rotate_left(p);
}
return p;
}
node *insert(node * p, char *key, int data)
{
if (p == NULL)
return new_node(key, data);
int keycmp = strcmp(key, p->key);
if (keycmp < 0)
p->left = insert(p->left, key, data);
else if (keycmp > 0)
p->right = insert(p->right, key, data);
else
p->data = data;
return balance(p);
}
void free_tree(node * p)
{
if (p == NULL)
return;
free_tree(p->left);
free_tree(p->right);
free(p);
}
void emit(FILE * out, node * p, const char *name, int *count)
{
if (p != NULL) {
fprintf(out, "/* %d */ ", *count);
fprintf(out, "{ \"%s\", %d, ", p->key, p->data);
++(*count);
if (p->left) {
fprintf(out, "&%s[%d], ", name, *count);
} else {
fprintf(out, "NULL, ");
}
if (p->right) {
fprintf(out, "&%s[%d] },\n", name, *count + nodecount(p->left));
} else {
fprintf(out, "NULL },\n");
}
emit(out, p->left, name, count);
emit(out, p->right, name, count);
}
}
void makeC(FILE *out, node *p, const char *name)
{
int count = 0;
fprintf(out, "node %s[] = {\n", name);
emit(out, p, name, &count);
fprintf(out, "};\n");
}
int main()
{
node *root = NULL;
char *months[12] = { "January", "February", "March", "April",
"May", "June", "July", "August", "September",
"October", "November", "December"
};
int days[12] = { 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 };
for (int i = 0; i < 12; ++i) {
root = insert(root, months[i], days[i]);
}
makeC(stdout, root, "caltree");
free_tree(root);
}
useavl.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct node {
char *key;
int data;
struct node *left;
struct node *right;
} node;
node *search(node * p, char *key)
{
if (p == NULL)
return NULL;
int keycmp = strcmp(key, p->key);
if (keycmp < 0)
return search(p->left, key);
else if (keycmp > 0)
return search(p->right, key);
else
return p;
}
int main(void)
{
#include "staticavl.c"
node *n = search(caltree, "February");
printf("There are %d days in %s.\n", n->data, n->key);
}
staticavl.c (generated file)
node caltree[] = {
/* 0 */ { "March", 31, &caltree[1], &caltree[8] },
/* 1 */ { "January", 31, &caltree[2], &caltree[6] },
/* 2 */ { "August", 31, &caltree[3], &caltree[4] },
/* 3 */ { "April", 30, NULL, NULL },
/* 4 */ { "February", 28, &caltree[5], NULL },
/* 5 */ { "December", 31, NULL, NULL },
/* 6 */ { "June", 30, &caltree[7], NULL },
/* 7 */ { "July", 31, NULL, NULL },
/* 8 */ { "October", 31, &caltree[9], &caltree[11] },
/* 9 */ { "May", 31, NULL, &caltree[10] },
/* 10 */ { "November", 30, NULL, NULL },
/* 11 */ { "September", 30, NULL, NULL },
};
Sample output
There are 28 days in February.
The advantage to doing things this way is that only the lookup function needs to be in the useavl
program, so if it's an embedded system and especially if the data structure is large and complex, the burden for building the structure is shifted to compile time (or technically, to runtime for makeavl
). It's possible, and indeed probable, that makeavl
and useavl
would run on different machines.
Comments welcome.
if (strcmp(key, "February") == 0 && is_leap_year) { ...
:P – glampert Jul 27 at 0:28