From 6c022dac1c10ef0ac33ce408714989fe45303882 Mon Sep 17 00:00:00 2001
From: Luke Rogers <lukeroge@gmail.com>
Date: Wed, 5 Sep 2012 07:41:52 +1200
Subject: [PATCH] Tweaked http.py, started rewriting urlparse.py

---
 plugins/urlparse.py  | 45 +++++++++++++++-----------------------------
 plugins/util/http.py | 15 +++++++++------
 2 files changed, 24 insertions(+), 36 deletions(-)
diff --git a/plugins/urlparse.py b/plugins/urlparse.py
index 9378051..7bf5abc 100755
--- a/plugins/urlparse.py
+++ b/plugins/urlparse.py
@@ -1,36 +1,21 @@
 from util import hook, http, urlnorm
-import re
-
-titler = re.compile(r'(?si)<title>(.+?)</title>')
-
-
-def get_title(url):
-    url = urlnorm.normalize(url.encode('utf-8'))
-    url = url.decode('utf-8')
-    # add http if its missing
-    if not "://" in url:
-        url = "http://" + url
-    try:
-        # get the title
-        request = http.open(url)
-        real_url = request.geturl()
-        text = request.read()
-        text = text.decode('utf8')
-        match = titler.search(text)
-        title = match.group(1)
-    except:
-        return "Could not parse URL! Are you sure its valid?"
-
-    title = http.unescape(title)
-
-    # if the url has been redirected, show us
-    if real_url == url:
-        return title
-    else:
-        return u"%s [%s]" % (title, real_url)
 
 
 @hook.command
 def title(inp):
     "title <url> -- gets the title of a web page"
-    return get_title(inp)
+    url = urlnorm.normalize(inp.encode('utf-8'))
+
+    try:
+        page = http.get_html(url)
+    except:
+        return "Could not fetch page."
+
+    try:
+        title = page.find(".//title").text
+    except:
+        return "Could not find title."
+
+    title = http.unescape(title)
+
+    return title
\ No newline at end of file
diff --git a/plugins/util/http.py b/plugins/util/http.py
index b420311..58cddf8 100755
--- a/plugins/util/http.py
+++ b/plugins/util/http.py
@@ -6,10 +6,7 @@ import urllib
 import urllib2
 import urlparse
 
-from urllib import quote, quote_plus as _quote_plus
-from urllib2 import HTTPError, URLError
-
-
+from urllib import quote as _quote, quote_plus as _quote_plus
 
 from lxml import etree, html
 
@@ -30,9 +27,11 @@ def get(*args, **kwargs):
 def get_url(*args, **kwargs):
     return open(*args, **kwargs).geturl()
 
+
 def get_html(*args, **kwargs):
     return html.fromstring(get(*args, **kwargs))
 
+
 def get_xml(*args, **kwargs):
     return etree.fromstring(get(*args, **kwargs))
 
@@ -60,7 +59,7 @@ def open(url, query_params=None, user_agent=None, post_data=None,
         request.get_method = lambda: get_method
 
     request.add_header('User-Agent', user_agent)
-    
+
     if referer is not None:
         request.add_header('Referer', referer)
 
@@ -97,7 +96,11 @@ def quote_plus(s):
     return _quote_plus(to_utf8(s))
 
 
+def quote(s):
+    return _quote(to_utf8(s))
+
+
 def unescape(s):
     if not s.strip():
         return s
-    return html.fromstring(s).text_content()
+    return html.fromstring(s).text_content()
\ No newline at end of file