This is my second attempt at K&R 1-23,
Write a program to remove all comments from a C program. Don't forget to handle quoted strings and character constants properly. C comments don't nest.
I was previously packing characters into various io buffers, and someone suggested this was not a good choice. Thought I'd try something more along the lines of a state machine:
#include <stdio.h>
#define NORMAL 0
#define SINGLE_QUOTE 1
#define DOUBLE_QUOTE 2
#define SLASH 3
#define MULTI_COMMENT 4
#define INLINE_COMMENT 5
#define STAR 6
int state_from_normal(char prev_symbol, char symbol)
{
int state = NORMAL;
if (symbol == '\'' && prev_symbol != '\\') {
state = SINGLE_QUOTE;
} else if (symbol == '"') {
state = DOUBLE_QUOTE;
} else if (symbol == '/') {
state = SLASH;
}
return state;
}
int state_from_single_quote(char prev_symbol, char symbol)
{
int state = SINGLE_QUOTE;
if (symbol == '\'' && prev_symbol != '\\') {
state = NORMAL;
}
return state;
}
int state_from_double_quote(char prev_symbol, char symbol)
{
int state = DOUBLE_QUOTE;
if (symbol == '"' && prev_symbol != '\\') {
state = NORMAL;
}
return state;
}
int state_from_slash(char symbol)
{
int state = SLASH;
if (symbol == '*') {
state = MULTI_COMMENT;
} else if (symbol == '/') {
state = INLINE_COMMENT;
} else {
state = NORMAL;
}
return state;
}
int state_from_multi_comment(char symbol)
{
int state = MULTI_COMMENT;
if (symbol == '*') {
state = STAR;
}
return state;
}
int state_from_star(char symbol)
{
int state = STAR;
if (symbol == '/') {
state = NORMAL;
} else {
state = MULTI_COMMENT;
}
return state;
}
int state_from_inline_comment(char symbol)
{
int state = INLINE_COMMENT;
if (symbol == '\n') {
state = NORMAL;
}
return state;
}
int state_from(int prev_state, char prev_symbol, char symbol)
{
switch(prev_state) {
case NORMAL :
return state_from_normal(prev_symbol, symbol);
case SINGLE_QUOTE :
return state_from_single_quote(prev_symbol, symbol);
case DOUBLE_QUOTE :
return state_from_double_quote(prev_symbol, symbol);
case SLASH :
return state_from_slash(symbol);
case MULTI_COMMENT :
return state_from_multi_comment(symbol);
case INLINE_COMMENT :
return state_from_inline_comment(symbol);
case STAR :
return state_from_star(symbol);
default :
return -1;
}
}
int main(void)
{
char input;
char symbol = '\0';
char prev_symbol;
int state = NORMAL;
int prev_state;
while ((input = getchar()) != EOF) {
prev_symbol = symbol;
prev_state = state;
symbol = input;
state = state_from(prev_state, prev_symbol, symbol);
if (prev_state == SLASH && state == NORMAL) {
putchar(prev_symbol);
}
if (prev_state != STAR && state < SLASH) {
putchar(symbol);
}
}
}
#define
's with anenum
. – syb0rg yesterdaygetchar
returns anint
, not achar
, and that's real important. See the "Application Usage" note in the POSIX spec: pubs.opengroup.org/onlinepubs/9699919799/functions/getchar.html – Mat yesterdayinput
toint
, problem solved. – Mat 1 hour ago