Html Table To Wiki Converter
Join the DZone community and get the full member experience.
Join For FreeFor more details on how to call this script from php if your server doesn't support python, click http://just-tech.blogspot.com/2007/01/python-html-tables-to-mediawiki.html
import HTMLParser, re, sys
class html2wiki(HTMLParser.HTMLParser):
def __init__(self):
HTMLParser.HTMLParser.__init__(self)
self.wiki = '' # The Wiki text
self.wikirow = '' # The current Wiki row of table being constructed from HTML
self.inTD = 0 # Used to track if we are inside or outside a ... tag.
self.inTR = 0 # Used to track if we are inside or outside a ... tag.
self.re_multiplespaces = re.compile('\s+') # regular expression used to remove spaces in excess
self.rowCount = 0 # output row counter.
self.rowspan = ''
self.colspan = ''
self.linebreak = '
'
self.data = ''
self.prop = ''
def handle_starttag(self, tag, attrs):
if tag == 'table': self.start_table()
elif tag == 'tr': self.start_tr()
elif tag == 'td': self.start_td(attrs)
def handle_endtag(self, tag):
if tag == 'table': self.end_table();
elif tag == 'tr': self.end_tr()
elif tag == 'td': self.end_td()
def start_table(self):
self.wiki += '{| border=1' + self.linebreak
self.wiki += '|-' + self.linebreak
def end_table(self):
self.wiki += '|}' + self.linebreak
def start_tr(self):
if self.inTR: self.end_tr() # implies
self.inTR = 1
def end_tr(self):
if self.inTD: self.end_td() # implies
self.inTR = 0
if len(self.wikirow) > 0:
self.wiki += self.wikirow
self.wiki += '|-' + self.linebreak
self.wikirow = ''
self.rowCount += 1
def start_td(self, attrs):
if not self.inTR: self.start_tr() # implies
self.data = ''
self.prop = ''
self.rowspan = ''
self.colspan = ''
for key, value in attrs:
if key == 'rowspan':
self.rowspan = value
elif key == 'colspan':
self.colspan = value
self.inTD = 1
def end_td(self):
if self.inTD:
self.wikirow += '| ' + self.prop + self.re_multiplespaces.sub(' ',self.data.replace('\t',' ').replace(self.linebreak,'').replace('\r','').replace('"','""'))+ self.linebreak;
self.data = ''
self.inTD = 0
def handle_data(self, data):
if self.inTD:
if data.strip() != '':
self.prop = ''
if self.rowspan != '':
self.prop = ' rowspan = '+self.rowspan
if self.colspan != '':
self.prop += ' colspan = '+self.colspan
if self.prop:
self.prop += ' | '
self.data += data
if __name__ == '__main__':
parser = html2wiki()
if len(sys.argv) == 2:
in_file = open(sys.argv[1],"r")
text = in_file.read()
parser.feed(text)
in_file.close()
print parser.wiki
else:
print 'Argument - filename required'
Database
Opinions expressed by DZone contributors are their own.
Related
-
-
-
-
Comments