What are the Steps/Algorithm to load/Parse an XML File? [closed]

Question

I want to write an xml parser by my own, i know to search for a string, load files into memory using vectors, but is theres a way i can use this to load and parse xml file, i were told to make a DOM or Document Object Model, what is it?? please tell what should i do, im currently loading the contents of a XML file into Vectors, and using a techninque to search for <tags> and </tags> please give me tips/suggestions and this is my code:

#include "stdafx.h"
#include <fstream.h>
#include <string.h>
#include <string>
#include <stdlib.h>
#include <vector>


struct VFILE{
    std::vector<char> FILE;
    int Push(char ch){FILE.push_back(ch); return 0;}
    int Size(){return FILE.size();}
};
struct ELEMENTS{
    char *fname;
    char *Element;
    int SPos, EPos;
};

ELEMENTS InitElements(){
    ELEMENTS Elements;
    Elements.fname='\0';
    Elements.Element='\0';
    Elements.SPos=0;
    Elements.EPos=0;
    return Elements;
    }

struct DOCUMENT
{
    VFILE vfile;
    ELEMENTS Element;
    char *fname;
    char fmem;
    char *EID;
    char ID[1024];
    int fPosS;
    int fPosE;
    int LoadFile(char *Fname) {
    fname=Fname; 
    fstream Gfile;
    Gfile.open(fname, ios::in);
    while (Gfile.get(fmem))
    {
        vfile.Push(fmem);
    }
    Gfile.close();
    return 0;}
    int GetSTag(int spos, char *tag)
{
    int i=spos, p=0, tg=0;
    tg=strlen(tag);
    while(i<=vfile.Size())
    {
        if (vfile.FILE[i]==tag[p])
        {
            p++;
            if (p>=tg)
            {
                goto end;
            }
        }
        else
            p=0;
        i++;
    }
end:
    if (p==0)
    {
        i=0;
    }
    if (i==0)
    {
        tg=0;
    }
    else
        tg=i+1;
    return (tg);
}

    int GetETag(int epos, char *tag)
{
    int i=epos-1, p=0, tg=0;
    tg=strlen(tag);
    while(i<=vfile.Size())
    {
        if (vfile.FILE[i]==tag[p])
        {
            p++;
            if (p>=tg)
            {
                goto end2;
            }
        }
        else
            p=0;
        i++;

    }
end2:
    if (p==0)
    {
        i=0;
    }
    if (i==0)
    {
        tg=0;
    }
    else
    {
        tg=i-tg+1;
    }
    return (tg);
}

    int CopyStr(int sp, int ep, char *string)
{
    int i=sp+1, p=0, r=0;
    string[0]='\0';
    while(r<=vfile.Size() && i<=ep)
    {
        string[p]=vfile.FILE[i];
        p++;
        i++;
    }
    string[p]='\0';
    if (strlen(string)>=1)
    return 0;
    else return 1;
}
    char* GetAttributes(ELEMENTS ELMNT, char *Attri)
    {
    char Att[1024];
    int TagP[2], Limit=0;
    strcpy(Att, Attri);
    strcat(Att, "=\"");
    TagP[0]=GetSTag(ELMNT.SPos, Att);
    Limit=GetETag(ELMNT.SPos, ">");
    if (TagP[0]!=0 && TagP[0]<=Limit)
    {
    TagP[1]=GetETag(TagP[0]+1, "\"");
    CopyStr(TagP[0]-1, TagP[1]-1, Att);
    }
    else
    Att[0]='\0';
    return (Att);
    }

    ELEMENTS ChildElement(ELEMENTS Elements, char *ElementName)
{
    fstream file;
    int StrCnt=strlen(ElementName);
    char ELS[1024], ELE[1024];
    strcpy(ELS, "<");
    strcat(ELS, ElementName);
    Elements.SPos=GetSTag(Elements.SPos, ELS);
    if(Elements.SPos!=0){
    strcpy(ELE, "</");
    strcat(ELE, ElementName);
    strcat(ELE, ">");
    Elements.EPos=GetETag(Elements.SPos, ELE)+1;
    }
    else
    {
    Elements.SPos=0;
    Elements.EPos=0;
    }
    Elements.fname=fname;
    Elements.Element=ElementName;
    return Elements;
}

    ELEMENTS NextElement(ELEMENTS EL1, ELEMENTS Elements, char *ElementName)
{
    fstream file;
    int StrCnt=strlen(ElementName);
    char ELS[1024], ELE[1024];
    strcpy(ELS, "<");
    strcat(ELS, ElementName);
    Elements.SPos=GetSTag(Elements.SPos, ELS);
    if(Elements.SPos!=0 && Elements.SPos<=EL1.EPos){
    strcpy(ELE, "</");
    strcat(ELE, ElementName);
    strcat(ELE, ">");
    Elements.EPos=GetETag(Elements.SPos, ELE)+1;
    }
    else
    {
    Elements.SPos=0;
    Elements.EPos=0;
    }
    Elements.fname=fname;
    Elements.Element=ElementName;
    return Elements;
}
    std::vector<int> RetInts(ELEMENTS Elements, XMLDoc doc)
{
    int i=Elements.SPos+1, p=0, k=0;
    std::vector<int> VInd;
    char digitc[20];
    while (i<=Elements.EPos-1)
    {
                if (vfile.FILE[i]!=' ' && vfile.FILE[i]!='<')
                {
                    digitc[p]=vfile.FILE[i];
                    p++;
                }
                else
                {
                    digitc[p]='\0';
                    p=0;
                    VInd.push_back(atoi(digitc));
                    k++;
                }
        i++;

    }
        return VInd;
}
};

and the main is something like this:

char fname[]="C:\\users\\rishab\\desktop\\cube.dae";
    DOCUMENT doc;
    ELEMENTS Elements=InitElements();
    doc.LoadFile(fname);
    Elements=doc.ChildElement(Elements, "collada");
    Elements=doc.ChildElement(Elements, "library_geometries");
    Elements=doc.ChildElement(Elements, "geometry");
    char *ID=GetAttributes(Elements, "id")
    Elements=doc.ChildElement(Elements, "mesh");

here im, parsing a collada file(just for an example, as most of you all have worked with this type of file) the functions works as follows:

DOCUMENT doc; // creates a document
ELEMENTS elements=InitElements() // InitElements() will initialise Elements'SPos and EPos to 0
Elements=doc.ChildElement(Elements, "collada"); // Elements will contain the SPos and EPos of <collada> to </collada>
char *ID=GetAttributs(Elements, "id"); // This will checkup the elements for having a string id= and will return charaters within the " and "(double quotes) eg. in a string <geometry id="ID1"> this will lookup for the string "id" and jump to the "" position and return ID1 to *ID;

So am i doing something wrong?? Please help me, thanks in advance!!

Also, if your code has bugs you can't pin down, you might want to put it on stackoverflow.com for greater exposure, then post it here once it's working as intended. — Mario, Dec 13 '14 at 8:51
Welcome to Code Review! I'm afraid this question does not match what this site is about. Code Review is about improving the cleanliness of existing, working code. Code Review is not the site to ask for help in fixing or changing what your code does. Once the code does what you want, we would love to help you do the same thing in a cleaner way! — Simon André Forsberg, Dec 13 '14 at 13:12

Mario · Accepted Answer · 2014-12-13 09:02:58Z

Several issues I saw immediately without even trying to understand the actual code:

Using ALLUPPERCASE for non-macros is in general a bad idea. It's commonly used to separate MACROS from nonMacros.
Using ALLUPPERCASE in combination with names of standard types like FILE is even worse (reusing standard names in general as well).
Elements.fname='\0'; is probably not doing what you expect it to do. You're essentially assigning them a NULL value rather then an empty string, but at the same time theoretically not assigning NULL or 0. Better use Elements.fname = NULL; or Elements.fname = ""; depending on whatever your actual intention is.
The functionality provided by VFILE is essentially just a std::string or std::vector<char>. A simple typedef would be enough rather than introducing a new type that's also limiting your access to the actual data.
Code inside InitElements() should be removed. This is essentially a constructor and should be implemented as such.
In general, you're not really parsing the XML file and are just looking for specific structures that might be the XML data you're looking for. Imagine the following file contents - your code would most likely read the attribute id despite it not being there!
```
<storage parent_id="parent" id="child">some... data</storage>
```
Also I don't think you're handling instances of empty tags (<div />).

About creating a DOM:

This is a bit similar to what you've done already. Basically it means you create a code or data structure representing the actual XML data (to make access easier and more logical).

For example, you've got the following simple XML file:

<config>
    <screen width="640" height="480" bpp="32" />
</config>

In JSON notation (or parsed in JavaScript for example; i.e. the DOM), this could look like this:

'config': {
    'screen': {
        'width': 640,
        'height': 480,
        'bpp': 32
    }
}

Or in some final C++ code, you might get an object config of class XMLDocument, which would allow you to do something like this:

SetScreenResolution(config["screen"]["width"], config["screen"]["height"], config["screen"]["bpp"]);

In Javascript this could work like this:

SetScreenResolution(config.screen.width, config.screen.height, config.screen.bpp);

Explaining the whole system or how to do it would obviously go far beyond the space for this answer. Best suggestion regarding this would be looking in existing implementations of XML parsers providing a DOM.

Thanks for the help, i will try to improve my code, but can u just tell me an example of parsing in c++, don't post the full code but tell me what should i do, steps to parse, i found i should parse a file to tokens, but it will become very complex then, what should i do?? thnaks for the help!! — Rishabh, Dec 13 '14 at 10:40
You can essentially do something similar to what you've done. Read the next character, if it's whitespace skip it, if it's "<" a new tag/element starts, skip whitespaces, read the name, then look for attributes till you hit ">", etc. — Mario, Dec 13 '14 at 10:43
Thanks Mario, its helped me a lot, that means i have to go through every tags, i was just trying to make a shortcut to the data, but that does'nt mean parsing, right?? ok thanks again!! — Rishabh, Dec 13 '14 at 16:59
You can do shortcuts, just ensure you're not omitting important things (like trying to read id and returning parent_id just due to the id portion matching). In the end it boils down to processing once or processing just-in-time. The more unpredictable the requests are and the more repetitive, the more likely it would be a good idea to actually parse everything at once. — Mario, Dec 13 '14 at 18:30

asked	3 months ago
viewed	93 times
active	3 months ago

current community

your communities

more stack exchange communities

What are the Steps/Algorithm to load/Parse an XML File? [closed]

closed as off-topic by Brythan, itsbruce, RubberDuck, Edward, Simon André Forsberg Dec 13 '14 at 13:12

1 Answer 1

Not the answer you're looking for? Browse other questions tagged c++ strings xml or ask your own question.

Hot Network Questions

current community

your communities

more stack exchange communities

What are the Steps/Algorithm to load/Parse an XML File? [closed]

closed as off-topic by Brythan, itsbruce, RubberDuck, Edward, Simon André Forsberg Dec 13 '14 at 13:12

1 Answer 1

Not the answer you're looking for? Browse other questions tagged c++ strings xml or ask your own question.

Related

Hot Network Questions