This repository has been archived on 2023-04-13. You can view files and clone it, but cannot push or open issues or pull requests.
CloudBot/plugins/urlparse.py

37 lines
882 B
Python
Raw Normal View History

2012-05-16 21:45:59 +02:00
from util import hook, http, urlnorm
2011-11-20 10:23:31 +01:00
import re
2012-03-23 06:14:58 +01:00
titler = re.compile(r'(?si)<title>(.+?)</title>')
2012-03-28 00:04:06 +02:00
def get_title(url):
url = urlnorm.normalize(url.encode('utf-8'))
url = url.decode('utf-8')
# add http if its missing
if not "://" in url:
url = "http://" + url
try:
# get the title
request = http.open(url)
real_url = request.geturl()
text = request.read()
text = text.decode('utf8')
match = titler.search(text)
title = match.group(1)
except:
return "Could not parse URL! Are you sure its valid?"
2012-03-23 06:14:58 +01:00
title = http.unescape(title)
2012-03-23 06:14:58 +01:00
# if the url has been redirected, show us
if real_url == url:
return title
2011-11-30 13:51:43 +01:00
else:
return u"%s [%s]" % (title, real_url)
2012-03-23 06:14:58 +01:00
@hook.command
def title(inp):
2012-05-16 05:07:27 +02:00
"title <url> -- gets the title of a web page"
2012-03-28 00:04:06 +02:00
return get_title(inp)