From 6d80ec1d77dec3287d36ee355627b4872f30609f Mon Sep 17 00:00:00 2001 From: Luke Rogers Date: Thu, 1 Dec 2011 04:56:46 +1300 Subject: [PATCH] bla --- plugins/urltools.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/plugins/urltools.py b/plugins/urltools.py index 153a2d3..a0d0724 100644 --- a/plugins/urltools.py +++ b/plugins/urltools.py @@ -19,6 +19,8 @@ wordDic = { '<': '<', '>': '>', '«': '«', +'!': '!', +'$': '$', ' ': ' '} def parse(match): @@ -41,12 +43,14 @@ def multiwordReplace(text, wordDic): #@hook.regex(r'^(?#Protocol)(?:(?:ht|f)tp(?:s?)\:\/\/|~\/|\/)?(?#Username:Password)(?:\w+:\w+@)?(?#Subdomains)(?:(?:[-\w]+\.)+(?#TopLevel Domains)(?:com|org|net|gov|mil|biz|info|mobi|name|aero|jobs|museum|travel|[a-z]{2}))(?#Port)(?::[\d]{1,5})?(?#Directories)(?:(?:(?:\/(?:[-\w~!$+|.,=]|%[a-f\d]{2})+)+|\/)+|\?|#)?(?#Query)(?:(?:\?(?:[-\w~!$+|.,*:]|%[a-f\d{2}])+=?(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)(?:&(?:[-\w~!$+|.,*:]|%[a-f\d{2}])+=?(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)*)*(?#Anchor)(?:#(?:[-\w~!$+|.,*:=]|%[a-f\d]{2})*)?$') @hook.regex(r'([a-zA-Z]+://|www\.)[^ ]+') def urlparser(match, say = None): + print "[debug] URL found" url = urlnorm.normalize(match.group().encode('utf-8')) for x in ignored_urls: if x in url: return title = parse(url) if title == "fail": + print "[url] No title found" return title = multiwordReplace(title, wordDic) realurl = http.get_url(url)