Python - Very Simple Parser
Join the DZone community and get the full member experience.
Join For Free// Very Simple Parser
from sgmllib import SGMLParser
import urllib
class ParserHTML(SGMLParser):
def scrivi(self):
self.f = open('/tmp/fileOUT.html', 'w')
def unknown_starttag(self, tag, attrs):
value = 0
startTAG = '<' + tag
for i in attrs:
if(i[0].lower() == i[1].lower() and not i[0] == i[1]):
startTAG = startTAG[:-1] + ' ' + str(i[1])
value = 1
else:
startTAG += ' ' + str(i[0]) + '="' + str(i[1]) + '"'
value = 0
if(value == 1): startTAG += '"'
startTAG += '>'
self.f.write(startTAG + "\n")
def handle_data(self, data):
self.f.write(data + "\n")
def unknown_endtag(self, tag):
self.f.write('' + tag + '>' + "\n")
if __name__ == '__main__':
p = ParserHTML()
p.scrivi()
p.feed(open('/tmp/fileIN.html', 'r').read())
Python (language)
Parser (programming language)
Opinions expressed by DZone contributors are their own.
Comments