I am actually trying to extract data from RSS documents. I am using the following code to parse xml doc.
But wont work for this document http://www.mediafire.com/?hptptj8847awnn1 . Please help!!
#import easy to use xml parser called minidom:
import xml.dom.minidom as minidom
import csv
def getTags(xml):
"""
Print out all titles found in xml
"""
doc = minidom.parse(xml)
node = doc.documentElement
items = doc.getElementsByTagName("item")
titles = []
for item in items:
titleObj = item.getElementsByTagName("title")[0]
titles.append(titleObj)
print len(titles)
x = 0
for x in range(len(titles)):
nodes = titles[x].childNodes
for node in nodes:
if node.nodeType == node.CDATA_SECTION_NODE:
titletxt = node.data
elif node.nodeType == node.TEXT_NODE:
titletxt = node.data
if __name__ == "__main__":
document = 'D2B0918.xml'
getTags(document)