diff --git a/plugins/urlparse.py b/plugins/title.py similarity index 62% rename from plugins/urlparse.py rename to plugins/title.py index 4ad6dfd..f1bfdb3 100755 --- a/plugins/urlparse.py +++ b/plugins/title.py @@ -4,18 +4,16 @@ from util import hook, http, urlnorm @hook.command def title(inp): "title -- gets the title of a web page" - url = urlnorm.normalize(inp.encode('utf-8')) + url = urlnorm.normalize(inp.encode('utf-8'), assume_scheme="http") try: page = http.get_html(url) - except: + except (http.HTTPError, http.URLError): return "Could not fetch page." try: title = page.find(".//title").text - except: + except AttributeError: return "Could not find title." - title = http.unescape(title) - - return title + return http.unescape(title) diff --git a/plugins/util/urlnorm.py b/plugins/util/urlnorm.py index 4089710..23b4a45 100755 --- a/plugins/util/urlnorm.py +++ b/plugins/util/urlnorm.py @@ -42,12 +42,12 @@ normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:a lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ), - Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'), + Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'), lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ), ) -def normalize(url): +def normalize(url, assume_scheme=False): """Normalize a URL.""" scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip()) @@ -69,6 +69,9 @@ def normalize(url): scheme = "http" path = path[4:] + if assume_scheme and not scheme: + scheme = assume_scheme.lower() + # Only perform percent-encoding where it is essential. # Always use uppercase A-through-F characters when percent-encoding. # All portions of the URI must be utf-8 encoded NFC from Unicode strings