After much searching around the web, I couldn't find a single module which did a decent prettyXML or prettyHTML. Because:
<node>
<child>
Text
</child>
</node>
Is flat out ridiculous, and not a good use of space.
<node>
<child>Text</child>
</node>
Is much cleaner.
That's what spurred this project. I also created a similar, but works completely different prettyHTML. These are designed to work with the build in module, miniDOM. I'm looking for a full critique (i.e. anything that needs to be).
Helper for prettyHTML:
def addLineBreaks(txt, max_char, level, indent, indentSubLines = False):
# If the length is already shorter, or if there is no space to be found
if(len(txt) <= max_char or txt.find(" ") < 0):
# Just return it
return txt
# end if
# Up the level if indentSubLines
if(indentSubLines): ++level
## Remember the last breakpoint's position
# Start it where the last \n is found (in case what is passed has already been broken up some)
lastPos = txt.rfind("\n")
# If there was no "\n" then zero
if(lastPos < 0): lastPos = 0
# Get how many breaks we need to make. (-1) because 3 lines has 2 line breaks.
numBreaks = math.ceil((len(txt) - lastPos) / max_char) - 1
# Get the first space
pos = tempPos = txt.find(" ", lastPos + 1)
# Place that many minus one line breaks
for i in range(numBreaks):
# Keep searching until we find the furthest position we can, which will be
# at tempPos - lastPos, where lastPos is the last break point.
while((tempPos - lastPos) < max_char):
# Assign the last result
pos = tempPos
# Find the next result after that one
tempPos = txt.find(" ", tempPos + 1)
# end while
# If no break was found, then up to the next spot, which may
# break what is wanted, but also prevents an infinite loop.
if(pos == lastPos): pos = tempPos
# Add the line break
txt = txt[0:pos + 1] + "\n" + (indent * level) + txt[pos + 1:]
# Remember where the break was put (+1 so it's past the \n)
lastPos = pos + 1
# end for
# Return the new string
return txt
# end addLineBreaks
PrettyHTML:
def prettyHTML(xml, indent = " ", level = 0, max_char = -1):
# If given text, just return it
if(xml.nodeType in (xml.TEXT_NODE, xml.CDATA_SECTION_NODE)):
return xml.toxml()
# end if
# Elements who's children will be indented
indtCldrn = ['datalist', 'details', 'dl',
'map', 'math', 'menu',
'ol',
'select', 'svg',
'tbody', 'tfoot', 'thead', 'tr',
'ul'
]
# Elements which will be placed on their own line, with all content / children
ownLine = ['!DOCTYPE',
'button',
'dd', 'dt',
'figcaption',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'label', 'legend', 'li',
'menuitem', 'meter',
'optgroup', 'option', 'output',
'progress',
'rect',
'style', 'summary',
'td', 'textarea', 'th', 'title'
]
# Elements which are self closing / void / whatever you want to call them
selfClosing = ['area', 'base', 'br', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr']
# Elements who's start / end tags will sandwich the content
spaceAfter = ['address', 'article', 'aside', 'audio',
'blockquote', 'body',
'canvas', 'code',
'div',
'fieldset', 'figure', 'footer', 'form',
'head', 'hader', 'html',
'iframe',
'main',
'nav', 'noscript',
'object',
'p', 'pre',
'ruby',
'samp', 'script', 'section',
'table', 'template',
'video'
]
## This is just here so all HTML elements are accounted for
"""inline = ['a', 'abbr',
'b', 'bdi', 'bdo',
'cite',
'data',
'del', 'dfn',
'em',
'i', 'ins',
'kbd',
'mark',
'q',
'rp', 'rt',
's', 'small', 'span', 'strong', 'sub', 'sup',
'time',
'u',
'var',
]"""
# Holds the pretty text
pretty = ""
# HTML is case insensitive, so make all node's lowercase
nodeName = xml.nodeName.lower()
## Make the opening tag
# Indent the tag, and add the node name
openTag = (indent * level) + "<" + nodeName
# Add the attributes (With the attribute name as lowercase)
# This is tried as a precautionary
try:
for item in xml.attributes.items():
openTag += " " + item[0].lower() + '="' + item[1] + '"'
# end for
except AttributeError:
pass
# end try
# If we have a node which is self-closing
if(nodeName in selfClosing):
# Add the closing />
openTag += " />"
# Ensure it's not too long
if(max_char > 0):
openTag = addLineBreaks(openTag, max_char, level, indent, True)
# end if
# If not a <wbr />, then we also need to add a \n
if(nodeName != "wbr"): openTag += "\n"
# And return it, no need to process further
return openTag
# else, it's not a self closing tag
else:
# Add the closing >
openTag += ">"
# end if
# If we want to indent the children
if(nodeName in indtCldrn):
# Add the open tag with a line break (indentation is already included)
pretty += (openTag + "\n")
# Get the prettyHTML for all the children, placing them one level deeper
for child in xml.childNodes:
pretty += prettyHTML(child, indent, level + 1, max_char)
# end for
# Add the closing tag with indentation
pretty += ((indent * level) + "</" + nodeName + ">\n")
# else if we want to sandwich everything.
elif(nodeName in spaceAfter):
# We assume that there is already a line break before (indentation is already included)
pretty += (openTag + "\n")
# Holds the inner text
temp = ""
# Get the prettyHTML for all the children
for child in xml.childNodes:
temp += prettyHTML(child, indent, level, max_char)
# end for
# If we have a limit
if(max_char > 0):
# Break it up
temp = addLineBreaks(temp, max_char, level, indent)
# end if
# Append temp to pretty
pretty += temp
# Add the closing tag
# If the last character isn't a \n, make it one so there is a break before the closing tag
if(pretty[-1] != "\n"): pretty += "\n"
# Add the closing tag with indentation
pretty += ((indent * level) + "</" + nodeName + ">\n")
# Else, we either have an inline, or a tag which should sit on its own line
else:
# We assume that there is already a line break before (indentation included)
pretty += openTag
# Add all children
for child in xml.childNodes:
pretty += child.toxml()
# end for
# Add the closing tag
pretty += ("</" + nodeName + ">")
# Then be sure to cut it up, if we have a max_char
if(max_char > 0):
# Break it up (If necessary)
pretty = addLineBreaks(pretty, max_char, level, indent, True)
# end if
# If we have a tag which is supposed to be on its own line, add a line break
if(nodeName in ownLine):
pretty += "\n"
# end if
# end if
# Return the now prettified text
return pretty
# end prettyHTML
Pretty XML:
def prettyXML(xml, indent = " ", level = 0):
# Flag for whether or not to indent, or just wrap it
hasNonText = False
# For each node in the childNodes
for node in xml.childNodes:
# Check if there is something that isn't text
if(node.nodeType not in (node.TEXT_NODE, node.CDATA_SECTION_NODE)):
# If so, set the flag, and kill the loop
hasNonText = True
break
# end if
# end for
# Store the pretty XML
pretty = ""
# If we have nonText
if(hasNonText):
# Add the indentation and start tag
pretty += ((indent * level) + "<" + xml.nodeName)
# Add the attributes (This is tried as a precaution)
try:
for item in xml.attributes.items():
pretty += " " + item[0] + '="' + item[1] + '"'
# end for
except AttributeError:
pass
# end try
# add the closing > and a line break
pretty += ">\n"
# Loop through each child
for child in xml.childNodes:
# And add it to pretty, moving it one level deeper
pretty += prettyXML(child, indent, level + 1)
# end for
# Add the closing tag with indentation
pretty += ((indent * level) + "</" + xml.nodeName + ">\n")
# Else if it had no children
elif(not xml.childNodes):
# Just add the raw XML with indentation. Probably a self-closing tag
pretty += ((indent * level) + xml.toxml() + "\n")
# Else, it only had text children
else:
# Add the indentation and start tag
pretty += ((indent * level) + "<" + xml.nodeName)
# Add the attributes (like above)
try:
for item in xml.attributes.items():
pretty += " " + item[0] + '="' + item[1] + '"'
# end for
except AttributeError:
pass
# end try
# Add the closing >
pretty += ">"
# Add all children (which will just be text)
for node in xml.childNodes:
pretty += node.toxml()
# end for
# Add the closing tag
pretty += "</" + xml.nodeName + ">\n"
# end if
return pretty
# end prettyXML
asdf
s not wrapped. I think I get it; themax_char
is actually a misnamedmin_char
. – Veedrac Jan 10 at 15:03