diff --git a/plugins/urlparse.py b/plugins/urlparse.py
index 9378051..7bf5abc 100755
--- a/plugins/urlparse.py
+++ b/plugins/urlparse.py
@@ -1,36 +1,21 @@
from util import hook, http, urlnorm
-import re
-
-titler = re.compile(r'(?si)
(.+?)')
-
-
-def get_title(url):
- url = urlnorm.normalize(url.encode('utf-8'))
- url = url.decode('utf-8')
- # add http if its missing
- if not "://" in url:
- url = "http://" + url
- try:
- # get the title
- request = http.open(url)
- real_url = request.geturl()
- text = request.read()
- text = text.decode('utf8')
- match = titler.search(text)
- title = match.group(1)
- except:
- return "Could not parse URL! Are you sure its valid?"
-
- title = http.unescape(title)
-
- # if the url has been redirected, show us
- if real_url == url:
- return title
- else:
- return u"%s [%s]" % (title, real_url)
@hook.command
def title(inp):
"title -- gets the title of a web page"
- return get_title(inp)
+ url = urlnorm.normalize(inp.encode('utf-8'))
+
+ try:
+ page = http.get_html(url)
+ except:
+ return "Could not fetch page."
+
+ try:
+ title = page.find(".//title").text
+ except:
+ return "Could not find title."
+
+ title = http.unescape(title)
+
+ return title
\ No newline at end of file
diff --git a/plugins/util/http.py b/plugins/util/http.py
index b420311..58cddf8 100755
--- a/plugins/util/http.py
+++ b/plugins/util/http.py
@@ -6,10 +6,7 @@ import urllib
import urllib2
import urlparse
-from urllib import quote, quote_plus as _quote_plus
-from urllib2 import HTTPError, URLError
-
-
+from urllib import quote as _quote, quote_plus as _quote_plus
from lxml import etree, html
@@ -30,9 +27,11 @@ def get(*args, **kwargs):
def get_url(*args, **kwargs):
return open(*args, **kwargs).geturl()
+
def get_html(*args, **kwargs):
return html.fromstring(get(*args, **kwargs))
+
def get_xml(*args, **kwargs):
return etree.fromstring(get(*args, **kwargs))
@@ -60,7 +59,7 @@ def open(url, query_params=None, user_agent=None, post_data=None,
request.get_method = lambda: get_method
request.add_header('User-Agent', user_agent)
-
+
if referer is not None:
request.add_header('Referer', referer)
@@ -97,7 +96,11 @@ def quote_plus(s):
return _quote_plus(to_utf8(s))
+def quote(s):
+ return _quote(to_utf8(s))
+
+
def unescape(s):
if not s.strip():
return s
- return html.fromstring(s).text_content()
+ return html.fromstring(s).text_content()
\ No newline at end of file