Fixed title plugin

2012-10-12 22:34:48 +13:00 · 2012-10-12 22:34:48 +13:00 · cf377e1800
commit cf377e1800
parent 00d80f0122
2 changed files with 9 additions and 8 deletions
--- a/plugins/urlparse.py
+++ b/plugins/urlparse.py
@ -4,18 +4,16 @@ from util import hook, http, urlnorm
@hook.command
 def title(inp):
    "title <url> -- gets the title of a web page"
-    url = urlnorm.normalize(inp.encode('utf-8'))
+    url = urlnorm.normalize(inp.encode('utf-8'), assume_scheme="http")
    try:
        page = http.get_html(url)
-    except:
+    except (http.HTTPError, http.URLError):
        return "Could not fetch page."
    try:
        title = page.find(".//title").text
-    except:
+    except AttributeError:
        return "Could not find title."
-    title = http.unescape(title)
+    return http.unescape(title)
    return title
--- a/plugins/util/urlnorm.py
+++ b/plugins/util/urlnorm.py
@ -42,12 +42,12 @@ normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:a
                            lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))),
                Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'),
                            lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ),
-                Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'),
+                Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'),
                            lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ),
    )
-def normalize(url):
+def normalize(url, assume_scheme=False):
    """Normalize a URL."""
    scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip())
@ -69,6 +69,9 @@ def normalize(url):
            scheme = "http"
        path = path[4:]
    if assume_scheme and not scheme:
        scheme = assume_scheme.lower()
    # Only perform percent-encoding where it is essential.
    # Always use uppercase A-through-F characters when percent-encoding.
    # All portions of the URI must be utf-8 encoded NFC from Unicode strings