Fixed formatting.

2013-09-04 18:30:04 +08:00 · 2013-09-04 18:30:04 +08:00 · 4069dd21a3
commit 4069dd21a3
parent 146ae3c279
89 changed files with 615 additions and 496 deletions
--- a/plugins/util/urlnorm.py
+++ b/plugins/util/urlnorm.py
@ -38,13 +38,15 @@ class Normalizer(object):
        self.regex = regex
        self.normalize = normalize_func

-normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P<tld>[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P<ASIN>[0-9A-Za-z]{10})'),
-                            lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))),
-                Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'),
-                            lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ),
-                Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'),
-                            lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ),
-    )
+
+normalizers = (Normalizer(re.compile(
+    r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P<tld>[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P<ASIN>[0-9A-Za-z]{10})'),
+                          lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))),
+               Normalizer(re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'),
+                          lambda m: r'http://img.waffleimages.com/%s' % m.group(1)),
+               Normalizer(re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'),
+                          lambda m: r'http://youtube.com/watch?v=%s' % m.group(1)),
+)


 def normalize(url, assume_scheme=False):
@ -78,12 +80,13 @@ def normalize(url, assume_scheme=False):
    def clean(string):
        string = unicode(unquote(string), 'utf-8', 'replace')
        return unicodedata.normalize('NFC', string).encode('utf-8')
+
    path = quote(clean(path), "~:/?#[]@!$&'()*+,;=")
    fragment = quote(clean(fragment), "~")

    # note care must be taken to only encode & and = characters as values
    query = "&".join(["=".join([quote(clean(t), "~:/?#[]@!$'()*+,;=")
-        for t in q.split("=", 1)]) for q in query.split("&")])
+                                for t in q.split("=", 1)]) for q in query.split("&")])

    # Prevent dot-segments appearing in non-relative URI paths.
    if scheme in ["", "http", "https", "ftp", "file"]:
@ -128,7 +131,7 @@ def normalize(url, assume_scheme=False):
    if url.endswith("#") and query == "" and fragment == "":
        path += "#"
    normal_url = urlparse.urlunsplit((scheme, auth, path, query,
-        fragment)).replace("http:///", "http://")
+                                      fragment)).replace("http:///", "http://")
    for norm in normalizers:
        m = norm.regex.match(normal_url)
        if m: