CloudBot/plugins/util/http.py

# convenience wrapper for urllib2 & friends

import cookielib
import json
import urllib
import urllib2
import urlparse

from urllib import quote as _quote, quote_plus as _quote_plus

from lxml import etree, html


ua_cloudbot = 'Cloudbot/DEV http://github.com/lukeroge/cloudbot'

ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \
    'Gecko/20070725 Firefox/2.0.0.6'
ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'

jar = cookielib.CookieJar()


def get(*args, **kwargs):
    return open(*args, **kwargs).read()


def get_url(*args, **kwargs):
    return open(*args, **kwargs).geturl()


def get_html(*args, **kwargs):
    return html.fromstring(get(*args, **kwargs))


def get_xml(*args, **kwargs):
    return etree.fromstring(get(*args, **kwargs))


def get_json(*args, **kwargs):
    return json.loads(get(*args, **kwargs))


def open(url, query_params=None, user_agent=None, post_data=None,
         referer=None, get_method=None, cookies=False, **kwargs):

    if query_params is None:
        query_params = {}

    if user_agent is None:
        user_agent = ua_cloudbot

    query_params.update(kwargs)

    url = prepare_url(url, query_params)

    request = urllib2.Request(url, post_data)

    if get_method is not None:
        request.get_method = lambda: get_method

    request.add_header('User-Agent', user_agent)

    if referer is not None:
        request.add_header('Referer', referer)

    if cookies:
        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
    else:
        opener = urllib2.build_opener()

    return opener.open(request)


def prepare_url(url, queries):
    if queries:
        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)

        query = dict(urlparse.parse_qsl(query))
        query.update(queries)
        query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))
                                  for key, value in query.iteritems()))

        url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))

    return url


def to_utf8(s):
    if isinstance(s, unicode):
        return s.encode('utf8', 'ignore')
    else:
        return str(s)


def quote_plus(s):
    return _quote_plus(to_utf8(s))


def quote(s):
    return _quote(to_utf8(s))


def unescape(s):
    if not s.strip():
        return s
    return html.fromstring(s).text_content()
First :D 2011-11-20 10:23:31 +01:00			`# convenience wrapper for urllib2 & friends`

			`import cookielib`
			`import json`
			`import urllib`
			`import urllib2`
			`import urlparse`

Tweaked http.py, started rewriting urlparse.py 2012-09-04 21:41:52 +02:00			`from urllib import quote as _quote, quote_plus as _quote_plus`
First :D 2011-11-20 10:23:31 +01:00
			`from lxml import etree, html`


Updated bot useragent 2011-11-20 12:10:16 +01:00			`ua_cloudbot = 'Cloudbot/DEV http://github.com/lukeroge/cloudbot'`
First :D 2011-11-20 10:23:31 +01:00
			`ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \`
			`'Gecko/20070725 Firefox/2.0.0.6'`
			`ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'`

			`jar = cookielib.CookieJar()`


			`def get(args, *kwargs):`
			`return open(args, *kwargs).read()`


Added new function to http library 2011-11-30 13:50:28 +01:00			`def get_url(args, *kwargs):`
			`return open(args, *kwargs).geturl()`

Tweaked http.py, started rewriting urlparse.py 2012-09-04 21:41:52 +02:00
First :D 2011-11-20 10:23:31 +01:00			`def get_html(args, *kwargs):`
			`return html.fromstring(get(args, *kwargs))`

Tweaked http.py, started rewriting urlparse.py 2012-09-04 21:41:52 +02:00
First :D 2011-11-20 10:23:31 +01:00			`def get_xml(args, *kwargs):`
			`return etree.fromstring(get(args, *kwargs))`


			`def get_json(args, *kwargs):`
			`return json.loads(get(args, *kwargs))`


			`def open(url, query_params=None, user_agent=None, post_data=None,`
Fixed .urban, added % to stock.py - Thank you @Atheuz :) (original commit: https://github.com/Atheuz/skybot/commit/68e5535ca44303dcfbd6548cbe0ab3dd640b413c) 2012-06-03 16:41:19 +02:00			`referer=None, get_method=None, cookies=False, **kwargs):`
First :D 2011-11-20 10:23:31 +01:00
			`if query_params is None:`
			`query_params = {}`

			`if user_agent is None:`
Updated bot useragent 2011-11-20 12:10:16 +01:00			`user_agent = ua_cloudbot`
First :D 2011-11-20 10:23:31 +01:00
			`query_params.update(kwargs)`

			`url = prepare_url(url, query_params)`

			`request = urllib2.Request(url, post_data)`

			`if get_method is not None:`
			`request.get_method = lambda: get_method`

			`request.add_header('User-Agent', user_agent)`
Tweaked http.py, started rewriting urlparse.py 2012-09-04 21:41:52 +02:00
Fixed .urban, added % to stock.py - Thank you @Atheuz :) (original commit: https://github.com/Atheuz/skybot/commit/68e5535ca44303dcfbd6548cbe0ab3dd640b413c) 2012-06-03 16:41:19 +02:00			`if referer is not None:`
			`request.add_header('Referer', referer)`
First :D 2011-11-20 10:23:31 +01:00
			`if cookies:`
			`opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))`
			`else:`
			`opener = urllib2.build_opener()`

			`return opener.open(request)`


			`def prepare_url(url, queries):`
			`if queries:`
			`scheme, netloc, path, query, fragment = urlparse.urlsplit(url)`

			`query = dict(urlparse.parse_qsl(query))`
			`query.update(queries)`
			`query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))`
			`for key, value in query.iteritems()))`

			`url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))`

			`return url`


			`def to_utf8(s):`
			`if isinstance(s, unicode):`
			`return s.encode('utf8', 'ignore')`
			`else:`
			`return str(s)`


			`def quote_plus(s):`
			`return _quote_plus(to_utf8(s))`


Tweaked http.py, started rewriting urlparse.py 2012-09-04 21:41:52 +02:00			`def quote(s):`
			`return _quote(to_utf8(s))`


First :D 2011-11-20 10:23:31 +01:00			`def unescape(s):`
			`if not s.strip():`
			`return s`
Tweaked http.py, started rewriting urlparse.py 2012-09-04 21:41:52 +02:00			`return html.fromstring(s).text_content()`