Platinum Partner

Html Table To Wiki Converter

For more details on how to call this script from php if your server doesn't support python, click http://just-tech.blogspot.com/2007/01/python-html-tables-to-mediawiki.html

import HTMLParser, re, sys
class html2wiki(HTMLParser.HTMLParser):
	def __init__(self):
		HTMLParser.HTMLParser.__init__(self)
		self.wiki = ''	  # The Wiki text
		self.wikirow = ''   # The current Wiki row of table being constructed from HTML
		self.inTD = 0	  # Used to track if we are inside or outside a ... tag.
		self.inTR = 0	  # Used to track if we are inside or outside a ... tag.
		self.re_multiplespaces = re.compile('\s+')  # regular expression used to remove spaces in excess
		self.rowCount = 0  # output row counter.
		self.rowspan = ''
		self.colspan = ''
		self.linebreak = '
' self.data = '' self.prop = '' def handle_starttag(self, tag, attrs): if tag == 'table': self.start_table() elif tag == 'tr': self.start_tr() elif tag == 'td': self.start_td(attrs) def handle_endtag(self, tag): if tag == 'table': self.end_table(); elif tag == 'tr': self.end_tr() elif tag == 'td': self.end_td() def start_table(self): self.wiki += '{| border=1' + self.linebreak self.wiki += '|-' + self.linebreak def end_table(self): self.wiki += '|}' + self.linebreak def start_tr(self): if self.inTR: self.end_tr() # implies self.inTR = 1 def end_tr(self): if self.inTD: self.end_td() # implies self.inTR = 0 if len(self.wikirow) > 0: self.wiki += self.wikirow self.wiki += '|-' + self.linebreak self.wikirow = '' self.rowCount += 1 def start_td(self, attrs): if not self.inTR: self.start_tr() # implies self.data = '' self.prop = '' self.rowspan = '' self.colspan = '' for key, value in attrs: if key == 'rowspan': self.rowspan = value elif key == 'colspan': self.colspan = value self.inTD = 1 def end_td(self): if self.inTD: self.wikirow += '| ' + self.prop + self.re_multiplespaces.sub(' ',self.data.replace('\t',' ').replace(self.linebreak,'').replace('\r','').replace('"','""'))+ self.linebreak; self.data = '' self.inTD = 0 def handle_data(self, data): if self.inTD: if data.strip() != '': self.prop = '' if self.rowspan != '': self.prop = ' rowspan = '+self.rowspan if self.colspan != '': self.prop += ' colspan = '+self.colspan if self.prop: self.prop += ' | ' self.data += data if __name__ == '__main__': parser = html2wiki() if len(sys.argv) == 2: in_file = open(sys.argv[1],"r") text = in_file.read() parser.feed(text) in_file.close() print parser.wiki else: print 'Argument - filename required'
{{ tag }}, {{tag}},

{{ parent.title || parent.header.title}}

{{ parent.tldr }}

{{ parent.urlSource.name }}
{{ parent.authors[0].realName || parent.author}}

{{ parent.authors[0].tagline || parent.tagline }}

{{ parent.views }} ViewsClicks
Tweet

{{parent.nComments}}