Made url parsing toggleable, added more advenced regex
This commit is contained in:
parent
7e3ce7ddba
commit
104a4d6810
2 changed files with 17 additions and 4 deletions
|
@ -46,6 +46,10 @@ if not os.path.exists('config'):
|
||||||
"factoids":
|
"factoids":
|
||||||
{
|
{
|
||||||
"prefix": false
|
"prefix": false
|
||||||
|
},
|
||||||
|
"urlparse":
|
||||||
|
{
|
||||||
|
"enabled": false
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"censored_strings":
|
"censored_strings":
|
||||||
|
|
|
@ -18,8 +18,17 @@ def parse(match):
|
||||||
except:
|
except:
|
||||||
return "fail"
|
return "fail"
|
||||||
|
|
||||||
@hook.regex(r'([a-zA-Z]://|www\.)?[^ ]+(\.[a-z]+)(\/)?(.*)')
|
# there should be " after the ' in the regex string but I was unable to escape it properly
|
||||||
def urlparser(match, say=None):
|
@hook.regex(r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'.,<>?«»“”‘’]))")
|
||||||
|
def urlparser(match, say=None, bot=None):
|
||||||
|
try:
|
||||||
|
enabled = bot.config["plugins"]["urlparse"]["enabled"]
|
||||||
|
except KeyError:
|
||||||
|
enabled = False
|
||||||
|
|
||||||
|
if not enabled:
|
||||||
|
return
|
||||||
|
|
||||||
url = urlnorm.normalize(match.group().encode('utf-8'))
|
url = urlnorm.normalize(match.group().encode('utf-8'))
|
||||||
if url[:7] != "http://":
|
if url[:7] != "http://":
|
||||||
if url[:8] != "https://":
|
if url[:8] != "https://":
|
||||||
|
@ -33,8 +42,8 @@ def urlparser(match, say=None):
|
||||||
title = http.unescape(title)
|
title = http.unescape(title)
|
||||||
realurl = http.get_url(url)
|
realurl = http.get_url(url)
|
||||||
if realurl == url:
|
if realurl == url:
|
||||||
say("(Link) %s" % title)
|
say(u"(Link) %s" % title)
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
say("(Link) %s [%s]" % (title, realurl))
|
say(u"(Link) %s [%s]" % (title, realurl))
|
||||||
return
|
return
|
||||||
|
|
Reference in a new issue