This repository has been archived on 2023-04-13. You can view files and clone it, but cannot push or open issues or pull requests.
CloudBot/plugins/urlparse.py
2012-03-28 11:24:20 +13:00

37 lines
893 B
Python
Executable file

from util import hook, http, urlnorm
import re
titler = re.compile(r'(?si)<title>(.+?)</title>')
def get_title(url):
url = urlnorm.normalize(url.encode('utf-8'))
url = url.decode('utf-8')
# add http if its missing
if not url.startswith("http"):
url = "http://" + url
try:
# get the title
request = http.open(url)
real_url = request.geturl()
text = request.read()
text = text.decode('utf8')
match = titler.search(text)
title = match.group(1)
except:
return "Could not parse URL! Are you sure its valid?"
title = http.unescape(title)
# if the url has been redirected, show us
if real_url == url:
return title
else:
return u"%s [%s]" % (title, real_url)
@hook.command
def title(inp):
".title <url> -- gets the title of a web page"
return get_title(inp)