moved strip_html from http to text

2013-09-05 11:00:04 +12:00 · 2013-09-05 11:00:04 +12:00 · 6247409906
commit 6247409906
parent 8b23ba9c12
3 changed files with 31 additions and 31 deletions
--- a/plugins/horoscope.py
+++ b/plugins/horoscope.py
@ -1,6 +1,6 @@
 # Plugin by Infinity - <https://github.com/infinitylabs/UguuBot>

-from util import hook, http
+from util import hook, http, text

 db_ready = False

@ -42,7 +42,7 @@ def horoscope(inp, db=None, notice=None, nick=None):
    title = soup.find_all('h1', {'class': 'h1b'})[1]
    horoscope = soup.find('div', {'class': 'fontdef1'})
    result = "\x02%s\x02 %s" % (title, horoscope)
-    result = http.strip_html(result)
+    result = text.strip_html(result)
    #result = unicode(result, "utf8").replace('flight ','')

    if not title:
--- a/plugins/util/http.py
+++ b/plugins/util/http.py
@ -11,9 +11,6 @@ from urllib import quote, quote_plus as _quote_plus
 from lxml import etree, html
 from bs4 import BeautifulSoup

-from HTMLParser import HTMLParser
-import htmlentitydefs
-
 # used in plugins that import this
 from urllib2 import URLError, HTTPError

@ -30,26 +27,6 @@ ua_chrome = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.4 (KHTML, ' \
 jar = cookielib.CookieJar()


-class HTMLTextExtractor(HTMLParser):
-    def __init__(self):
-        HTMLParser.__init__(self)
-        self.result = []
-
-    def handle_data(self, d):
-        self.result.append(d)
-
-    def handle_charref(self, number):
-        codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number)
-        self.result.append(unichr(codepoint))
-
-    def handle_entityref(self, name):
-        codepoint = htmlentitydefs.name2codepoint[name]
-        self.result.append(unichr(codepoint))
-
-    def get_text(self):
-        return u''.join(self.result)
-
-
 def get(*args, **kwargs):
    return open(*args, **kwargs).read()

@ -133,9 +110,3 @@ def unescape(s):
    if not s.strip():
        return s
    return html.fromstring(s).text_content()
-
-
-def strip_html(html):
-    s = HTMLTextExtractor()
-    s.feed(html)
-    return s.get_text()
--- a/plugins/util/text.py
+++ b/plugins/util/text.py
@ -6,6 +6,35 @@

 import re

+from HTMLParser import HTMLParser
+import htmlentitydefs
+
+
+class HTMLTextExtractor(HTMLParser):
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.result = []
+
+    def handle_data(self, d):
+        self.result.append(d)
+
+    def handle_charref(self, number):
+        codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number)
+        self.result.append(unichr(codepoint))
+
+    def handle_entityref(self, name):
+        codepoint = htmlentitydefs.name2codepoint[name]
+        self.result.append(unichr(codepoint))
+
+    def get_text(self):
+        return u''.join(self.result)
+
+
+def strip_html(html):
+    s = HTMLTextExtractor()
+    s.feed(html)
+    return s.get_text()
+

 def munge(text, munge_count=0):
    """munges up text."""