From a3669e6b507341c8ad51b9446d82618f92c03176 Mon Sep 17 00:00:00 2001 From: Luke Rogers Date: Tue, 16 Jul 2013 03:04:07 +1200 Subject: [PATCH] Run data through BS4 to fix the text formatting and all that crap (will optimise later) --- plugins/scp.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/plugins/scp.py b/plugins/scp.py index cce33d5..0710547 100644 --- a/plugins/scp.py +++ b/plugins/scp.py @@ -12,15 +12,16 @@ def api_get(kind, query): def get_info(url): try: - page = http.get(url) + soup = http.get_soup(url) except Exception as e: return "Could not get SCP information: Unable to fetch URL. ({})".format(e) - contents = re.sub('<[^<]+?>', '', page) + safe_html = unicode(soup) + contents = re.sub('<[^<]+?>', '', safe_html) try: - item_id = http.unescape(re.findall("Item #: (.+?)\n", contents, re.S)[0]) - object_class = http.unescape(re.findall("Object Class: (.+?)\n", contents, re.S)[0]) - description = http.unescape(re.findall("Description: (.+?)\n", contents, re.S)[0]) + item_id = re.findall("Item #: (.+?)\n", contents, re.S)[0] + object_class = re.findall("Object Class: (.+?)\n", contents, re.S)[0] + description = re.findall("Description: (.+?)\n", contents, re.S)[0] except IndexError as e: return "Could not get SCP information: Page was not a valid SCP page."