2012-10-12 12:01:37 +02:00
|
|
|
from bs4 import BeautifulSoup
|
2011-11-20 10:23:31 +01:00
|
|
|
|
2014-02-14 04:36:57 +01:00
|
|
|
from util import hook, http, urlnorm
|
|
|
|
|
2012-03-23 06:14:58 +01:00
|
|
|
|
2012-09-04 21:41:52 +02:00
|
|
|
@hook.command
|
|
|
|
def title(inp):
|
2013-09-04 12:30:04 +02:00
|
|
|
"""title <url> -- gets the title of a web page"""
|
2012-10-12 11:34:48 +02:00
|
|
|
url = urlnorm.normalize(inp.encode('utf-8'), assume_scheme="http")
|
2011-11-26 01:15:43 +01:00
|
|
|
|
2012-03-04 02:23:39 +01:00
|
|
|
try:
|
2012-10-12 12:01:37 +02:00
|
|
|
page = http.open(url)
|
|
|
|
real_url = page.geturl()
|
|
|
|
soup = BeautifulSoup(page.read())
|
2012-10-12 11:34:48 +02:00
|
|
|
except (http.HTTPError, http.URLError):
|
2012-09-04 21:41:52 +02:00
|
|
|
return "Could not fetch page."
|
2012-03-23 06:14:58 +01:00
|
|
|
|
2014-02-13 03:02:44 +01:00
|
|
|
page_title = soup.find('title').contents[0]
|
2012-10-12 12:01:37 +02:00
|
|
|
|
2014-02-13 03:02:44 +01:00
|
|
|
if not page_title:
|
2012-09-04 21:41:52 +02:00
|
|
|
return "Could not find title."
|
2012-03-21 11:52:15 +01:00
|
|
|
|
2014-02-13 03:02:44 +01:00
|
|
|
return u"{} [{}]".format(page_title, real_url)
|