Moved fact.py over to BS4 (I need to rewrite fact.py soon, it's a bit outdated and somewhat broken), removed old htmlstipper library

This commit is contained in:
Luke Rogers 2012-09-04 09:58:59 +12:00
parent b536bbf754
commit 16573eb482
2 changed files with 5 additions and 49 deletions

View file

@ -1,42 +0,0 @@
from HTMLParser import HTMLParser
import htmlentitydefs
import errno
import re
class HTMLStripper(HTMLParser):
def __init__(self, data):
HTMLParser.__init__(self)
self._stripped = []
self.feed(data)
def handle_starttag(self, tag, attrs):
if tag.lower() == 'br':
self._stripped.append('\n')
def handle_charref(self, name):
try:
if name.lower().startswith('x'):
char = int(name[1:], 16)
else:
char = int(name)
self._stripped.append(unichr(char))
except Exception, error:
return
def handle_entityref(self, name):
try:
char = unichr(htmlentitydefs.name2codepoint[name])
except Exception, error:
char = u'&%s;' % name
self._stripped.append(char)
def handle_data(self, data):
self._stripped.append(data)
@property
def stripped(self):
return ''.join(self._stripped)
def strip_html(data):
return HTMLStripper(data).stripped