Take the 2-minute tour ×
Code Review Stack Exchange is a question and answer site for peer programmer code reviews. It's 100% free, no registration required.

I have written a program which basically reads a file named "data.txt" line-by-line. As a line is read, it validates the line with a certain specification. If the specification is met, it will output the data within a file named data.txt. If the line does not meet the validation criteria, then it will output that specific line to a separate file named error.txt.

I have finished the code and was wondering if anyone could look at it and see if they could help me with a few points in making the code look more compact and tidy. I feel I have done the same code over and over again for no reason. I have added comments so the code should make sense.

#include <stdio.h>          //library including standard input and output functions
#include <stdlib.h>         //library including exit and system functions used below
#include <string.h>         //library including string functions used

struct packet{
    int source;        // 1 - 1024 range (int)
    int destination;   // 1 - 1024 range (int)
    int type;          // 0 - 10 range (int)               // Varibles for the structure
    int port;          // 1 = 1024 (int)
    char data[50];     // 1 - 50 range (char)

};

int main()
{

    char filename[32] = { '\0' } ;   // variables which declare the I/O stream and the filename structure
    char DataLine[71];               // Reads the file one line at a time
    char ErrorLine[71];              // This is the varible that deals with the validation error
    char TempStorage[5];                // Stores data to be validated
    char TempData[50];                   // Stores the data which will be validated
    int  TempS, TempD, TempT, TempP;  // Stores the integer derived from the input file
    int  Flag = 0;                   // This is the Flag that indicates a Line has not passed validation
    int  Count = 0;                   // This is the Flag that indicated a line has passed validation
    int  Ecount = 0;                  // This counts the number of errors

 struct packet *DataRecords;
 DataRecords = malloc(sizeof(struct packet));    // This deals with storing the data needed for the next task.

printf("Enter the filename you wish to open\n");
scanf("%s", &filename);
                                      // user inputs the filename
FILE *DataFile;
if (( DataFile = fopen(filename, "r")) == NULL)
{
    printf ("\nfile could not be opened. : %s\n", filename);  // If a value of NULL is returned then the program will close.

}
else
{


FILE *ErrorFile = fopen("error.txt","w");                         // This will start searching through the lines and store the lines not passing the validation test to a txt file named "error.txt". 
printf("File has been found, checking validation");

while( fgets (DataLine, 71, DataFile)!=NULL) {
strcpy(ErrorLine, DataLine);
strcpy(TempStorage, strtok(DataLine,":"));
TempS = atoi(TempStorage);
strcpy(TempStorage, strtok( NULL, ":"));            // these lines of code looks through each line and stores the line within the "Temp Storage" varible, the : token is what the element within the line is seperated by.
TempD = atoi(TempStorage);
strcpy(TempStorage, strtok( NULL, ":"));
TempT = atoi(TempStorage);
strcpy(TempStorage, strtok( NULL, ":"));
TempP = atoi(TempStorage);
strcpy(TempData, strtok( NULL, ":"));

    if (TempS < 1 || TempS > 1024) Flag = 1;
    if (TempD < 1 || TempD > 1024) Flag = 1;
    if (TempT < 0 || TempT > 10) Flag = 1;                   // // Validation aspect, if the validation is not met then a flag is added to which then the line is posted within the error file. 
    if (TempP < 1 || TempP > 1024) Flag = 1;
    if (strlen(TempData) < 1 || strlen(TempData)> 50) Flag = 1;
    if (Flag == 1)
    {
        Ecount++;
        printf("Error %i %i:%i:%i:%i:%s",Ecount,TempS,TempD,TempT,TempP,TempData);  
        fprintf(ErrorFile,"%s", ErrorLine);
        // fprintf writes formatted text to the output stream you specify
    }
    else
    {
        DataRecords[Count].source = TempS;
        DataRecords[Count].destination = TempD;
        DataRecords[Count].type = TempT;
        DataRecords[Count].port = TempP;
        strncpy(DataRecords[Count].data,TempData,51);
        Count++; //increment sequence number
        DataRecords = realloc(DataRecords,(Count+1)*sizeof(struct packet));//allocate more memory for packetdata
    } 
Flag = 0;  
}
FILE *DFile = fopen("data2.txt","w");
int ii;
for (ii = 0; ii < Count; ii++)
{
fprintf(DFile,  "%04i:%04i:%04i:%04i:%s",DataRecords[ii].source,    // Where the data that has passed validation goes
DataRecords[ii].destination,
DataRecords[ii].type,
DataRecords[ii].port,
DataRecords[ii].data);
  }
fclose(DFile);
fclose(ErrorFile);
fclose(DataFile);
printf("\nNumber of errors: %i \n", Ecount);       
printf("Number of saved records: %i ", Count);
free(DataRecords);

}
return 0;
}

This is some of the data it needs to validate.

0005:0002:0002:0020:100000000000000000017
0001:0002:0002:0080:</BODY>
0006:0003:0002:0041:100000000000000000019
0006:0002:0002:0060:100000000000000000020
0002:0004:0002:0090:100000000000000000022
0001:0002:0003:0021:cp /etc/hosts /root
0002:0004:0002:0010:100000000000000000023
0003:0004:0002:0180:100000000000000000025
share|improve this question
    
I don't see the declaration of DataFile –  janos 7 hours ago
    
sorry, it is there but i forgot to post that part of the code in –  user3521585 7 hours ago
3  
As it is, this doesn't compile. You might want to delete it, fix it, and undelete when really ready. –  janos 7 hours ago
    
ok that has been changed –  user3521585 6 hours ago

2 Answers 2

Readability

This code is very hard to read and review, for many reasons:

  • Messy indenting: some code blocks are correctly indented (the struct), but most of the rest is not, the main function has a mix of various inexplicable indents
  • Very long lines: avoid long lines. Try to stay within ~80 characters when possible
  • As a general rule, comments should be on their own line. This will immediately make the code more readable, as I won't have to scroll to the far right to read something and then scroll back to the left to see the code

Naming

Use lowercase names for variables. For example, dataFile instead of DataFile.

Since you use TempS, TempD, ... are intended to be used at some point as fields of the packet struct, it would be better to name them after those fields in the first place:

  • tempSource instead of TempS for the source field
  • tempDestination instead of TempD for the destination field
  • ... and so on

The traditional loop variable is i, it would be just more natural to use that instead of ii.

Magic numbers

Some numbers appear multiple times. Their purpose is unclear, and if you ever change it in one place, you'll have to remember to change everywhere. To prevent possible errors, it's better to define constants for them. For example for the number 71.

Security

In general, strcpy is not safe, because it may overwrite beyond the end of the destination array. This line is immediately suspicious:

strcpy(ErrorLine, DataLine);

On closer look, I see that both ErrorLine and DataLine have the same size, so it should be ok, but it would be better if this was obvious without thinking, by using strncpy, and using a named constant as the size parameter, the same constant used in the declaration of the destination array (as I pointed out in the previous point).

share|improve this answer
  • You need a lot more error checking in your code.
  • You don’t need to use malloc, since you can process one record at a time.
  • Using atoi may be ok for very simple cases, but when you write a validator, you should be as strict as possible. In that case strtol is more complicated to use but still better.
  • The strtok function can return NULL. You need to check for that special return value. Otherwise your program will crash.

Here is the code that can handle overly long lines and malformed integers in the first fields:

#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static int next_int(char *line, int min, int max)
{
    const char *token;
    char *end;
    long number;

    token = strtok(line, ":");
    errno = 0;
    if (token == NULL)
        return -1;
    number = strtol(token, &end, 10);
    if (*end == '\0' && errno == 0 && min <= number && number <= max)
        return number;
    return -1;
}

static int validate_file(const char *in, const char *out, const char *err)
{
    int rv = -1;
    FILE *fin = NULL, *fout = NULL, *ferr = NULL;
    char line[71], linecopy[71];
    int source, destination, type, port;
    const char *data;
    int count = 0, errors = 0;

    if ((fin = fopen(in, "r")) == NULL)
        goto cleanup;
    if ((fout = fopen(out, "w")) == NULL)
        goto cleanup;
    if ((ferr = fopen(err, "w")) == NULL)
        goto cleanup;

    while (fgets(line, sizeof line, fin) != NULL) {
        strcpy(linecopy, line);
        source = next_int(line, 1, 1024);
        destination = next_int(NULL, 1, 1024);
        type = next_int(NULL, 0, 10);
        port = next_int(NULL, 1, 1024);
        data = strtok(NULL, "\n");

        if (source != -1 && destination != -1 && type != -1 && port != -1
            && data != NULL && 1 <= strlen(data) && strlen(data) <= 50) {
            count++;
            fprintf(fout, "%04i:%04i:%04i:%04i:%s\n", source, destination,
                    type, port, data);
        } else {
            errors++;
            fprintf(ferr, "Error %i %i:%i:%i:%i:%s\n", errors, source,
                    destination, type, port,
                    data != NULL ? data : "(null)");
            fprintf(ferr, "%s\n", strtok(linecopy, "\n"));

        }
    }
    rv = 0;

  cleanup:
    if (ferr != NULL) {
        if (fclose(ferr) == -1)
            rv = -1;
    }
    if (fout != NULL) {
        if (fclose(fout) == -1)
            rv = -1;
    }
    if (fin != NULL) {
        if (fclose(fin) == -1)
            rv = -1;
    }
    return rv;
}

int main()
{
    validate_file("data.txt", "data2.txt", "errors.txt");
    return 0;
}

Some test data:

1:1:1:1:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa overly long line
1a:1:1:1:invalid source
1:1a:1:1:invalid destination
1:1:1a:1:invalid type
1:1:1:1a:invalid port
10000:1:1:1:too large source
share|improve this answer
    
Thanks for the very detailed answer! I should learn alot from this, but I must ask, if I was to change my code so the program still works as it does now but the code would be using the strtok and strtol function, what would I change specifically? as I would like to keep the core features of my program the same, but it seems quite complicated changing it to fit what you have shown me if that makes sense. –  user3521585 3 hours ago

Your Answer

 
discard

By posting your answer, you agree to the privacy policy and terms of service.

Not the answer you're looking for? Browse other questions tagged or ask your own question.