I am trying to write a program which will count the occurrence of words in a paragraph.
The logic I am following : I am using a linked list for the purpose. And I am searching sequentially - if new word encountered adding the word in the list, but if word already exist in the list increase its count flag.
Code:
//case insensitive string matching
int strcicmp(char const *a, char const *b)
{
int d;
for(;;a++,b++)
{
d=tolower(*a)-tolower(*b);
if(d!=0 || !*a)
return d;
}
}
//declare the linked list structure to store distinct words and their count
typedef struct node
{
char *word;
int count;
struct node *next;
} node;
node *ptr, *newnode, *first=NULL, *last=NULL;
void insertnewword(char *ch)
{
newnode=(node*)malloc(sizeof(node));
if(newnode == NULL)
{
printf("\nMemory is not allocated\n");
exit(0);
}
else
{
newnode->word=ch;
newnode->count=1;
newnode->next=NULL;
}
if(first==last && last==NULL)
{
first=last=newnode;
first->next=NULL;
last->next=NULL;
}
else
{
last->next=newnode;
last=newnode;
last->next=NULL;
}
}
void processword(char *ch)
{
int found=0;
//if word is already in the list, increase the count
for(ptr=first;ptr!=NULL;ptr=ptr->next)
if(strcicmp(ptr->word, ch) == 0)
{
ptr->count += 1;
found=1;
break;
}
//if it's a new word, add the word to the list
if(!found)
insertnewword(ch);
}
int main()
{
const char *delimiters=" ~`!@#$%^&*()_-+={[}]:;<,>.?/|\\\'\"\t\n\r";
char *ch, *str;
str=(char*)malloc(sizeof(char));
ch=(char*)malloc(sizeof(char));
//get the original string
scanf("%[^\n]%*c", str);
//fgets(str, 500, stdin);
//get the tokenized string
ch=strtok(str,delimiters);
while(ch!=NULL)
{
//a, an, the should not be counted
if(strcicmp("a", ch)!=0 && strcicmp("an", ch)!=0 && strcicmp("the", ch)!=0)
processword(ch);
ch=strtok(NULL,delimiters);
}
//print the word and it's occurrence count
for(ptr=first; ptr!=NULL; ptr=ptr->next)
printf("%s\t\t%d\n",ptr->word,ptr->count);
return 0;
}
this seem to be working fine for few number of words, but if word count is more than 6-7, this program is encountering some problem.
I can always implement any other logic for the same problem, but I want to know the issue with this implementation.
Thanks in advance