From 624740990656a955e17109b4ae0b65d5424cc295 Mon Sep 17 00:00:00 2001 From: Luke Rogers Date: Thu, 5 Sep 2013 11:00:04 +1200 Subject: [PATCH 1/2] moved strip_html from http to text --- plugins/horoscope.py | 4 ++-- plugins/util/http.py | 29 ----------------------------- plugins/util/text.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/plugins/horoscope.py b/plugins/horoscope.py index 7bd6221..0a87714 100644 --- a/plugins/horoscope.py +++ b/plugins/horoscope.py @@ -1,6 +1,6 @@ # Plugin by Infinity - -from util import hook, http +from util import hook, http, text db_ready = False @@ -42,7 +42,7 @@ def horoscope(inp, db=None, notice=None, nick=None): title = soup.find_all('h1', {'class': 'h1b'})[1] horoscope = soup.find('div', {'class': 'fontdef1'}) result = "\x02%s\x02 %s" % (title, horoscope) - result = http.strip_html(result) + result = text.strip_html(result) #result = unicode(result, "utf8").replace('flight ','') if not title: diff --git a/plugins/util/http.py b/plugins/util/http.py index 849d929..4eccdf0 100755 --- a/plugins/util/http.py +++ b/plugins/util/http.py @@ -11,9 +11,6 @@ from urllib import quote, quote_plus as _quote_plus from lxml import etree, html from bs4 import BeautifulSoup -from HTMLParser import HTMLParser -import htmlentitydefs - # used in plugins that import this from urllib2 import URLError, HTTPError @@ -30,26 +27,6 @@ ua_chrome = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.4 (KHTML, ' \ jar = cookielib.CookieJar() -class HTMLTextExtractor(HTMLParser): - def __init__(self): - HTMLParser.__init__(self) - self.result = [] - - def handle_data(self, d): - self.result.append(d) - - def handle_charref(self, number): - codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number) - self.result.append(unichr(codepoint)) - - def handle_entityref(self, name): - codepoint = htmlentitydefs.name2codepoint[name] - self.result.append(unichr(codepoint)) - - def get_text(self): - return u''.join(self.result) - - def get(*args, **kwargs): return open(*args, **kwargs).read() @@ -133,9 +110,3 @@ def unescape(s): if not s.strip(): return s return html.fromstring(s).text_content() - - -def strip_html(html): - s = HTMLTextExtractor() - s.feed(html) - return s.get_text() diff --git a/plugins/util/text.py b/plugins/util/text.py index 5d7ee3f..9bc40f4 100755 --- a/plugins/util/text.py +++ b/plugins/util/text.py @@ -6,6 +6,35 @@ import re +from HTMLParser import HTMLParser +import htmlentitydefs + + +class HTMLTextExtractor(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.result = [] + + def handle_data(self, d): + self.result.append(d) + + def handle_charref(self, number): + codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number) + self.result.append(unichr(codepoint)) + + def handle_entityref(self, name): + codepoint = htmlentitydefs.name2codepoint[name] + self.result.append(unichr(codepoint)) + + def get_text(self): + return u''.join(self.result) + + +def strip_html(html): + s = HTMLTextExtractor() + s.feed(html) + return s.get_text() + def munge(text, munge_count=0): """munges up text.""" From 6c4d7db9762af226efcb328563772803c256c106 Mon Sep 17 00:00:00 2001 From: Luke Rogers Date: Thu, 5 Sep 2013 13:16:08 +1200 Subject: [PATCH 2/2] Removed debug code --- plugins/cypher.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/plugins/cypher.py b/plugins/cypher.py index 1faad4c..1527778 100755 --- a/plugins/cypher.py +++ b/plugins/cypher.py @@ -8,16 +8,11 @@ def encode(key, clear): key_c = key[i % len(key)] enc_c = chr((ord(clear[i]) + ord(key_c)) % 256) enc.append(enc_c) - print "[debug]" return base64.urlsafe_b64encode("".join(enc)) def decode(key, enc): dec = [] - print " [debug] " - print "key: "+key - print "string: "+enc enc = base64.urlsafe_b64decode(enc.encode('ascii','ignore')) - print "de64: "+enc for i in range(len(enc)): key_c = key[i % len(key)] dec_c = chr((256 + ord(enc[i]) - ord(key_c)) % 256)