Fixed title plugin

This commit is contained in:
Luke Rogers 2012-10-12 22:34:48 +13:00
parent 00d80f0122
commit cf377e1800
2 changed files with 9 additions and 8 deletions

View file

@ -4,18 +4,16 @@ from util import hook, http, urlnorm
@hook.command @hook.command
def title(inp): def title(inp):
"title <url> -- gets the title of a web page" "title <url> -- gets the title of a web page"
url = urlnorm.normalize(inp.encode('utf-8')) url = urlnorm.normalize(inp.encode('utf-8'), assume_scheme="http")
try: try:
page = http.get_html(url) page = http.get_html(url)
except: except (http.HTTPError, http.URLError):
return "Could not fetch page." return "Could not fetch page."
try: try:
title = page.find(".//title").text title = page.find(".//title").text
except: except AttributeError:
return "Could not find title." return "Could not find title."
title = http.unescape(title) return http.unescape(title)
return title

View file

@ -42,12 +42,12 @@ normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:a
lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))),
Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'),
lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ), lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ),
Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'), Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'),
lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ), lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ),
) )
def normalize(url): def normalize(url, assume_scheme=False):
"""Normalize a URL.""" """Normalize a URL."""
scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip()) scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip())
@ -69,6 +69,9 @@ def normalize(url):
scheme = "http" scheme = "http"
path = path[4:] path = path[4:]
if assume_scheme and not scheme:
scheme = assume_scheme.lower()
# Only perform percent-encoding where it is essential. # Only perform percent-encoding where it is essential.
# Always use uppercase A-through-F characters when percent-encoding. # Always use uppercase A-through-F characters when percent-encoding.
# All portions of the URI must be utf-8 encoded NFC from Unicode strings # All portions of the URI must be utf-8 encoded NFC from Unicode strings