Made url parsing toggleable, added more advenced regex

This commit is contained in:
Luke Rogers 2012-03-04 14:23:39 +13:00
parent 7e3ce7ddba
commit 104a4d6810
2 changed files with 17 additions and 4 deletions

View file

@ -46,6 +46,10 @@ if not os.path.exists('config'):
"factoids": "factoids":
{ {
"prefix": false "prefix": false
},
"urlparse":
{
"enabled": false
} }
}, },
"censored_strings": "censored_strings":

View file

@ -18,8 +18,17 @@ def parse(match):
except: except:
return "fail" return "fail"
@hook.regex(r'([a-zA-Z]://|www\.)?[^ ]+(\.[a-z]+)(\/)?(.*)') # there should be " after the ' in the regex string but I was unable to escape it properly
def urlparser(match, say=None): @hook.regex(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'.,<>?«»“”‘’]))")
def urlparser(match, say=None, bot=None):
try:
enabled = bot.config["plugins"]["urlparse"]["enabled"]
except KeyError:
enabled = False
if not enabled:
return
url = urlnorm.normalize(match.group().encode('utf-8')) url = urlnorm.normalize(match.group().encode('utf-8'))
if url[:7] != "http://": if url[:7] != "http://":
if url[:8] != "https://": if url[:8] != "https://":
@ -33,8 +42,8 @@ def urlparser(match, say=None):
title = http.unescape(title) title = http.unescape(title)
realurl = http.get_url(url) realurl = http.get_url(url)
if realurl == url: if realurl == url:
say("(Link) %s" % title) say(u"(Link) %s" % title)
return return
else: else:
say("(Link) %s [%s]" % (title, realurl)) say(u"(Link) %s [%s]" % (title, realurl))
return return