From 7dc1daa69f0f40526dbfb15fdf6a76df7bfb5beb Mon Sep 17 00:00:00 2001
From: Luke Rogers <lukeroge@gmail.com>
Date: Wed, 2 Oct 2013 12:01:46 +1300
Subject: [PATCH] move lib to core, no more sys.path fucking @cybojenix

---
 cloudbot.py        |   1 -
 core/bot.py        |   6 +-
 util/__init__.py   |   0
 util/color.py      |  35 +++++++
 util/execute.py    |  30 ++++++
 util/formatting.py |  34 +++++++
 util/hook.py       | 101 ++++++++++++++++++++
 util/http.py       | 115 +++++++++++++++++++++++
 util/text.py       | 230 +++++++++++++++++++++++++++++++++++++++++++++
 util/textgen.py    |  51 ++++++++++
 util/timeformat.py |  14 +++
 util/timesince.py  | 105 +++++++++++++++++++++
 util/urlnorm.py    | 139 +++++++++++++++++++++++++++
 util/web.py        |  54 +++++++++++
 14 files changed, 911 insertions(+), 4 deletions(-)
 create mode 100644 util/__init__.py
 create mode 100644 util/color.py
 create mode 100644 util/execute.py
 create mode 100644 util/formatting.py
 create mode 100644 util/hook.py
 create mode 100644 util/http.py
 create mode 100644 util/text.py
 create mode 100644 util/textgen.py
 create mode 100644 util/timeformat.py
 create mode 100644 util/timesince.py
 create mode 100644 util/urlnorm.py
 create mode 100644 util/web.py

diff --git a/cloudbot.py b/cloudbot.py
index 385dc3a..76349dc 100644
--- a/cloudbot.py
+++ b/cloudbot.py
@@ -8,7 +8,6 @@ import sys
 import time
 
 # set up enviroment
-sys.path += ['plugins']  # add stuff to the sys.path for easy imports
 os.chdir(sys.path[0] or '.')  # do stuff relative to the install directory
 
 print 'CloudBot REFRESH <http://git.io/cloudbotirc>'
diff --git a/core/bot.py b/core/bot.py
index 68ea15c..0929855 100644
--- a/core/bot.py
+++ b/core/bot.py
@@ -66,17 +66,17 @@ class Bot(object):
 
     def get_config(self):
         """create and return the config object"""
-        return config.Config(self.name, self.logger)
+        return config.Config(self.logger)
 
 
     def get_logger(self):
         """create and return the logger object"""
         # create logger
-        logger = logging.getLogger(self.name)
+        logger = logging.getLogger("cloudbot")
         logger.setLevel(logging.DEBUG)
 
         # add a file handler
-        log_name = "{}.log".format(self.name)
+        log_name = "bot.log"
         fh = logging.FileHandler(log_name)
         fh.setLevel(logging.DEBUG)
 
diff --git a/util/__init__.py b/util/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/util/color.py b/util/color.py
new file mode 100644
index 0000000..d5501df
--- /dev/null
+++ b/util/color.py
@@ -0,0 +1,35 @@
+# Colors. Plugin by blha303, color/control id info from http://stackoverflow.com/a/13382032
+
+colors = {'white': '0', 'black': '1', 'darkblue': '2', 'darkgreen': '3',
+          'red': '4', 'darkred': '5', 'darkviolet': '6', 'orange': '7',
+          'yellow': '8', 'lightgreen': '9', 'cyan': '10', 'lightcyan': '11',
+          'blue': '12', 'violet': '13', 'darkgray': '14', 'lightgray': '15'}
+
+control = {'bold': '\x02', 'color': '\x03', 'italic': '\x09',
+           'strikethrough': '\x13', 'reset': '\x0f', 'underline': '\x15',
+           'underline2': '\x1f', 'reverse': '\x16'}
+
+
+def color(color):
+    return control['color'] + colors[color]
+
+def bold():
+    return control['bold']
+
+def italic():
+    return control['italic']
+
+def strike():
+    return control['strikethrough']
+
+def reset():
+    return control['reset']
+
+def underline(other=False):
+    if other:
+        return control['underline2']
+    else:
+        return control['underline']
+
+def reverse():
+    return control['reverse']
diff --git a/util/execute.py b/util/execute.py
new file mode 100644
index 0000000..9aecd7e
--- /dev/null
+++ b/util/execute.py
@@ -0,0 +1,30 @@
+import http, web
+
+
+def eval_py(code, paste_multiline=True):
+    attempts = 0
+
+    while True:
+        try:
+            output = http.get("http://eval.appspot.com/eval", statement=code).rstrip('\n')
+            # sometimes the API returns a blank string on first attempt, lets try again
+            # and make sure it is actually supposed to be a blank string. ._.
+            if output == "":
+                output = http.get("http://eval.appspot.com/eval", statement=code).rstrip('\n')
+            break
+        except http.HTTPError:
+            if attempts > 2:
+                return "Failed to execute code."
+            else:
+                attempts += 1
+                continue
+
+    if "Traceback (most recent call last):" in output:
+        status = "Python error: "
+    else:
+        status = "Code executed sucessfully: "
+
+    if "\n" in output and paste_multiline:
+        return status + web.haste(output)
+    else:
+        return output
diff --git a/util/formatting.py b/util/formatting.py
new file mode 100644
index 0000000..442adf5
--- /dev/null
+++ b/util/formatting.py
@@ -0,0 +1,34 @@
+def raw(format_string):
+    """Replace based irc formatting"""
+    stuff = {}
+    stuff['col'] = {'[white]':'\x030',
+    '[black]':'\x031',
+    '[dblue]':'\x032',
+    '[dgreen]':'\x033',
+    '[dred]':'\x034',
+    '[brown]':'\x035',
+    '[purple]':'\x036',
+    '[gold]':'\x037',
+    '[yellow]':'\x038',
+    '[green]':'\x039',
+    '[cyan]':'\x0310',
+    '[lblue]':'\x0311',
+    '[blue]':'\x0312',
+    '[pink]':'\x0313',
+    '[gray]':'\x0314',
+    '[lgray]':'\x0315',
+    '[err]':'\x034\x02'
+    '[/err]':'\x030\x02'}
+    stuff['style'] = {'[b]':'\x02',
+                      '[clear]':'\x0f'}
+    stuff['sym'] = {'[point]':'\x07'}
+    stuff['text'] = {'[url]':'http://'}
+    final = {}
+    for x in stuff:
+        final.update(stuff[x])
+    for x in final:
+        format_string = format_string.replace(x,final[x])
+    return format_string
+def err(format_string):
+    """Format the string with standard error styling"""
+    return "\x034\x02{}\x0f".format(format_string)
\ No newline at end of file
diff --git a/util/hook.py b/util/hook.py
new file mode 100644
index 0000000..5e191d3
--- /dev/null
+++ b/util/hook.py
@@ -0,0 +1,101 @@
+import inspect
+import re
+
+
+def _hook_add(func, add, name=''):
+    if not hasattr(func, '_hook'):
+        func._hook = []
+    func._hook.append(add)
+
+    if not hasattr(func, '_filename'):
+        func._filename = func.func_code.co_filename
+
+    if not hasattr(func, '_args'):
+        argspec = inspect.getargspec(func)
+        if name:
+            n_args = len(argspec.args)
+            if argspec.defaults:
+                n_args -= len(argspec.defaults)
+            if argspec.keywords:
+                n_args -= 1
+            if argspec.varargs:
+                n_args -= 1
+            if n_args != 1:
+                err = '%ss must take 1 non-keyword argument (%s)' % (name,
+                                                                     func.__name__)
+                raise ValueError(err)
+
+        args = []
+        if argspec.defaults:
+            end = bool(argspec.keywords) + bool(argspec.varargs)
+            args.extend(argspec.args[-len(argspec.defaults):
+            end if end else None])
+        if argspec.keywords:
+            args.append(0)  # means kwargs present
+        func._args = args
+
+    if not hasattr(func, '_thread'):  # does function run in its own thread?
+        func._thread = False
+
+
+def sieve(func):
+    if func.func_code.co_argcount != 5:
+        raise ValueError(
+            'sieves must take 5 arguments: (bot, input, func, type, args)')
+    _hook_add(func, ['sieve', (func,)])
+    return func
+
+
+def command(arg=None, **kwargs):
+    args = {}
+
+    def command_wrapper(func):
+        args.setdefault('name', func.func_name)
+        _hook_add(func, ['command', (func, args)], 'command')
+        return func
+
+    if kwargs or not inspect.isfunction(arg):
+        if arg is not None:
+            args['name'] = arg
+        args.update(kwargs)
+        return command_wrapper
+    else:
+        return command_wrapper(arg)
+
+
+def event(arg=None, **kwargs):
+    args = kwargs
+
+    def event_wrapper(func):
+        args['name'] = func.func_name
+        args.setdefault('events', ['*'])
+        _hook_add(func, ['event', (func, args)], 'event')
+        return func
+
+    if inspect.isfunction(arg):
+        return event_wrapper(arg, kwargs)
+    else:
+        if arg is not None:
+            args['events'] = arg.split()
+        return event_wrapper
+
+
+def singlethread(func):
+    func._thread = True
+    return func
+
+
+def regex(regex, flags=0, **kwargs):
+    args = kwargs
+
+    def regex_wrapper(func):
+        args['name'] = func.func_name
+        args['regex'] = regex
+        args['re'] = re.compile(regex, flags)
+        _hook_add(func, ['regex', (func, args)], 'regex')
+        return func
+
+    if inspect.isfunction(regex):
+        raise ValueError("regex decorators require a regex to match against")
+    else:
+        return regex_wrapper
diff --git a/util/http.py b/util/http.py
new file mode 100644
index 0000000..4409211
--- /dev/null
+++ b/util/http.py
@@ -0,0 +1,115 @@
+# convenience wrapper for urllib2 & friends
+
+import cookielib
+import json
+import urllib
+import urllib2
+import urlparse
+
+from urllib import quote, quote_plus as _quote_plus
+
+from lxml import etree, html
+from bs4 import BeautifulSoup
+
+# used in plugins that import this
+from urllib2 import URLError, HTTPError
+
+ua_cloudbot = 'Cloudbot/DEV http://github.com/CloudDev/CloudBot'
+
+ua_firefox = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:17.0) Gecko/17.0' \
+             ' Firefox/17.0'
+ua_old_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; ' \
+                 'rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6'
+ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
+ua_chrome = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.4 (KHTML, ' \
+            'like Gecko) Chrome/22.0.1229.79 Safari/537.4'
+
+jar = cookielib.CookieJar()
+
+
+def get(*args, **kwargs):
+    return open(*args, **kwargs).read()
+
+
+def get_url(*args, **kwargs):
+    return open(*args, **kwargs).geturl()
+
+
+def get_html(*args, **kwargs):
+    return html.fromstring(get(*args, **kwargs))
+
+
+def get_soup(*args, **kwargs):
+    return BeautifulSoup(get(*args, **kwargs), 'lxml')
+
+
+def get_xml(*args, **kwargs):
+    return etree.fromstring(get(*args, **kwargs))
+
+
+def get_json(*args, **kwargs):
+    return json.loads(get(*args, **kwargs))
+
+
+def open(url, query_params=None, user_agent=None, post_data=None,
+         referer=None, get_method=None, cookies=False, timeout=None, **kwargs):
+    if query_params is None:
+        query_params = {}
+
+    if user_agent is None:
+        user_agent = ua_cloudbot
+
+    query_params.update(kwargs)
+
+    url = prepare_url(url, query_params)
+
+    request = urllib2.Request(url, post_data)
+
+    if get_method is not None:
+        request.get_method = lambda: get_method
+
+    request.add_header('User-Agent', user_agent)
+
+    if referer is not None:
+        request.add_header('Referer', referer)
+
+    if cookies:
+        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
+    else:
+        opener = urllib2.build_opener()
+
+    if timeout:
+        return opener.open(request, timeout=timeout)
+    else:
+        return opener.open(request)
+
+
+def prepare_url(url, queries):
+    if queries:
+        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+
+        query = dict(urlparse.parse_qsl(query))
+        query.update(queries)
+        query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))
+                                      for key, value in query.iteritems()))
+
+        url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
+
+    return url
+
+
+def to_utf8(s):
+    if isinstance(s, unicode):
+        return s.encode('utf8', 'ignore')
+    else:
+        return str(s)
+
+
+def quote_plus(s):
+    return _quote_plus(to_utf8(s))
+
+
+def unescape(s):
+    if not s.strip():
+        return s
+    return html.fromstring(s).text_content()
diff --git a/util/text.py b/util/text.py
new file mode 100644
index 0000000..9bc40f4
--- /dev/null
+++ b/util/text.py
@@ -0,0 +1,230 @@
+# -*- coding: utf-8 -*-
+""" formatting.py - handy functions for formatting text
+    this file contains code from the following URL:
+    <http://code.djangoproject.com/svn/django/trunk/django/utils/text.py>
+"""
+
+import re
+
+from HTMLParser import HTMLParser
+import htmlentitydefs
+
+
+class HTMLTextExtractor(HTMLParser):
+    def __init__(self):
+        HTMLParser.__init__(self)
+        self.result = []
+
+    def handle_data(self, d):
+        self.result.append(d)
+
+    def handle_charref(self, number):
+        codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number)
+        self.result.append(unichr(codepoint))
+
+    def handle_entityref(self, name):
+        codepoint = htmlentitydefs.name2codepoint[name]
+        self.result.append(unichr(codepoint))
+
+    def get_text(self):
+        return u''.join(self.result)
+
+
+def strip_html(html):
+    s = HTMLTextExtractor()
+    s.feed(html)
+    return s.get_text()
+
+
+def munge(text, munge_count=0):
+    """munges up text."""
+    reps = 0
+    for n in xrange(len(text)):
+        rep = character_replacements.get(text[n])
+        if rep:
+            text = text[:n] + rep.decode('utf8') + text[n + 1:]
+            reps += 1
+            if reps == munge_count:
+                break
+    return text
+
+
+character_replacements = {
+    'a': 'ä',
+    'b': 'Б',
+    'c': 'ċ',
+    'd': 'đ',
+    'e': 'ë',
+    'f': 'ƒ',
+    'g': 'ġ',
+    'h': 'ħ',
+    'i': 'í',
+    'j': 'ĵ',
+    'k': 'ķ',
+    'l': 'ĺ',
+    'm': 'ṁ',
+    'n': 'ñ',
+    'o': 'ö',
+    'p': 'ρ',
+    'q': 'ʠ',
+    'r': 'ŗ',
+    's': 'š',
+    't': 'ţ',
+    'u': 'ü',
+    'v': '',
+    'w': 'ω',
+    'x': 'χ',
+    'y': 'ÿ',
+    'z': 'ź',
+    'A': 'Å',
+    'B': 'Β',
+    'C': 'Ç',
+    'D': 'Ď',
+    'E': 'Ē',
+    'F': 'Ḟ',
+    'G': 'Ġ',
+    'H': 'Ħ',
+    'I': 'Í',
+    'J': 'Ĵ',
+    'K': 'Ķ',
+    'L': 'Ĺ',
+    'M': 'Μ',
+    'N': 'Ν',
+    'O': 'Ö',
+    'P': 'Р',
+    'Q': 'Ｑ',
+    'R': 'Ŗ',
+    'S': 'Š',
+    'T': 'Ţ',
+    'U': 'Ů',
+    'V': 'Ṿ',
+    'W': 'Ŵ',
+    'X': 'Χ',
+    'Y': 'Ỳ',
+    'Z': 'Ż'}
+
+
+def capitalize_first(line):
+    """
+    capitalises the first letter of words
+    (keeps other letters intact)
+    """
+    return ' '.join([s[0].upper() + s[1:] for s in line.split(' ')])
+
+
+def multiword_replace(text, wordDic):
+    """
+    take a text and replace words that match a key in a dictionary with
+    the associated value, return the changed text
+    """
+    rc = re.compile('|'.join(map(re.escape, wordDic)))
+
+    def translate(match):
+        return wordDic[match.group(0)]
+    return rc.sub(translate, text)
+
+
+def truncate_words(content, length=10, suffix='...'):
+    """Truncates a string after a certain number of words."""
+    nmsg = content.split(" ")
+    out = None
+    x = 0
+    for i in nmsg:
+        if x <= length:
+            if out:
+                out = out + " " + nmsg[x]
+            else:
+                out = nmsg[x]
+        x += 1
+    if x <= length:
+        return out
+    else:
+        return out + suffix
+
+
+# from <http://stackoverflow.com/questions/250357/smart-truncate-in-python>
+def truncate_str(content, length=100, suffix='...'):
+    """Truncates a string after a certain number of chars."""
+    if len(content) <= length:
+        return content
+    else:
+        return content[:length].rsplit(' ', 1)[0] + suffix
+
+
+# ALL CODE BELOW THIS LINE IS COVERED BY THE FOLLOWING AGREEMENT:
+
+# Copyright (c) Django Software Foundation and individual contributors.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+#  3. Neither the name of Django nor the names of its contributors may be used
+#     to endorse or promote products derived from this software without
+#     specific prior written permission.
+#
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND
+#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Expression to match some_token and some_token="with spaces" (and similarly
+# for single-quoted strings).
+
+split_re = re.compile(r"""((?:[^\s'"]*(?:(?:"(?:[^"\\]|\\.)*" | '(?:[""" \
+                      r"""^'\\]|\\.)*')[^\s'"]*)+) | \S+)""", re.VERBOSE)
+
+
+def smart_split(text):
+    r"""
+    Generator that splits a string by spaces, leaving quoted phrases together.
+    Supports both single and double quotes, and supports escaping quotes with
+    backslashes. In the output, strings will keep their initial and trailing
+    quote marks and escaped quotes will remain escaped (the results can then
+    be further processed with unescape_string_literal()).
+
+    >>> list(smart_split(r'This is "a person\'s" test.'))
+    [u'This', u'is', u'"a person\\\'s"', u'test.']
+    >>> list(smart_split(r"Another 'person\'s' test."))
+    [u'Another', u"'person\\'s'", u'test.']
+    >>> list(smart_split(r'A "\"funky\" style" test.'))
+    [u'A', u'"\\"funky\\" style"', u'test.']
+    """
+    for bit in split_re.finditer(text):
+        yield bit.group(0)
+
+
+def get_text_list(list_, last_word='or'):
+    """
+    >>> get_text_list(['a', 'b', 'c', 'd'])
+    u'a, b, c or d'
+    >>> get_text_list(['a', 'b', 'c'], 'and')
+    u'a, b and c'
+    >>> get_text_list(['a', 'b'], 'and')
+    u'a and b'
+    >>> get_text_list(['a'])
+    u'a'
+    >>> get_text_list([])
+    u''
+    """
+    if len(list_) == 0:
+        return ''
+    if len(list_) == 1:
+        return list_[0]
+    return '%s %s %s' % (
+        # Translators: This string is used as a separator between list elements
+        ', '.join([i for i in list_][:-1]),
+        last_word, list_[-1])
diff --git a/util/textgen.py b/util/textgen.py
new file mode 100644
index 0000000..29d74f0
--- /dev/null
+++ b/util/textgen.py
@@ -0,0 +1,51 @@
+import re
+import random
+
+TEMPLATE_RE = re.compile(r"\{(.+?)\}")
+
+
+class TextGenerator(object):
+    def __init__(self, templates, parts, default_templates=None, variables=None):
+        self.templates = templates
+        self.default_templates = default_templates
+        self.parts = parts
+        self.variables = variables
+
+    def generate_string(self, template=None):
+        """
+        Generates one string using the specified templates.
+        If no templates are specified, use a random template from the default_templates list.
+        """
+        # this is bad
+        if self.default_templates:
+            text = self.templates[template or random.choice(self.default_templates)]
+        else:
+            text = random.choice(self.templates)
+
+        # replace static variables in the template with provided values
+        if self.variables:
+            for key, value in self.variables.items():
+                text = text.replace("{%s}" % key, value)
+
+        # get a list of all text parts we need
+        required_parts = TEMPLATE_RE.findall(text)
+
+        for required_part in required_parts:
+            ppart = self.parts[required_part]
+            # check if the part is a single string or a list
+            if not isinstance(ppart, basestring):
+                part = random.choice(self.parts[required_part])
+            else:
+                part = self.parts[required_part]
+            text = text.replace("{%s}" % required_part, part)
+
+        return text
+
+    def generate_strings(self, amount, template=None):
+        strings = []
+        for i in xrange(amount):
+            strings.append(self.generate_string())
+        return strings
+
+    def get_template(self, template):
+        return self.templates[template]
\ No newline at end of file
diff --git a/util/timeformat.py b/util/timeformat.py
new file mode 100644
index 0000000..185fd36
--- /dev/null
+++ b/util/timeformat.py
@@ -0,0 +1,14 @@
+def timeformat(seconds):
+    days = seconds / 86400
+    seconds -= 86400 * days
+    hours = seconds / 3600
+    seconds -= 3600 * hours
+    minutes = seconds / 60
+    seconds -= 60 * minutes
+    if days != 0:
+        return "%sd %sh %sm %ss" % (days, hours, minutes, seconds)
+    elif hours == 0 and minutes != 0:
+        return "%sm %ss" % (minutes, seconds)
+    elif hours == 0 and minutes == 0:
+        return "%ss" % seconds
+    return "%sh %sm %ss" % (hours, minutes, seconds)
diff --git a/util/timesince.py b/util/timesince.py
new file mode 100644
index 0000000..56ec8b0
--- /dev/null
+++ b/util/timesince.py
@@ -0,0 +1,105 @@
+# Copyright (c) Django Software Foundation and individual contributors.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+#  3. Neither the name of Django nor the names of its contributors may be used
+#     to endorse or promote products derived from this software without
+#     specific prior written permission.
+#
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND
+#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+
+
+def timesince(d, now=None):
+    """
+    Takes two datetime objects and returns the time between d and now
+    as a nicely formatted string, e.g. "10 minutes".  If d occurs after now,
+    then "0 minutes" is returned.
+
+    Units used are years, months, weeks, days, hours, and minutes.
+    Seconds and microseconds are ignored.  Up to two adjacent units will be
+    displayed.  For example, "2 weeks, 3 days" and "1 year, 3 months" are
+    possible outputs, but "2 weeks, 3 hours" and "1 year, 5 days" are not.
+
+    Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
+    """
+    chunks = (
+        (60 * 60 * 24 * 365, ('year', 'years')),
+        (60 * 60 * 24 * 30, ('month', 'months')),
+        (60 * 60 * 24 * 7, ('week', 'weeks')),
+        (60 * 60 * 24, ('day', 'days')),
+        (60 * 60, ('hour', 'hours')),
+        (60, ('minute', 'minutes'))
+    )
+
+    # Convert int or float (unix epoch) to datetime.datetime for comparison
+    if isinstance(d, int) or isinstance(d, float):
+        d = datetime.datetime.fromtimestamp(d)
+
+    if isinstance(now, int) or isinstance(now, float):
+        now = datetime.datetime.fromtimestamp(now)
+
+    # Convert datetime.date to datetime.datetime for comparison.
+    if not isinstance(d, datetime.datetime):
+        d = datetime.datetime(d.year, d.month, d.day)
+    if now and not isinstance(now, datetime.datetime):
+        now = datetime.datetime(now.year, now.month, now.day)
+
+    if not now:
+        now = datetime.datetime.now()
+
+    # ignore microsecond part of 'd' since we removed it from 'now'
+    delta = now - (d - datetime.timedelta(0, 0, d.microsecond))
+    since = delta.days * 24 * 60 * 60 + delta.seconds
+    if since <= 0:
+        # d is in the future compared to now, stop processing.
+        return u'0 ' + 'minutes'
+    for i, (seconds, name) in enumerate(chunks):
+        count = since // seconds
+        if count != 0:
+            break
+
+    if count == 1:
+        s = '%(number)d %(type)s' % {'number': count, 'type': name[0]}
+    else:
+        s = '%(number)d %(type)s' % {'number': count, 'type': name[1]}
+
+    if i + 1 < len(chunks):
+        # Now get the second item
+        seconds2, name2 = chunks[i + 1]
+        count2 = (since - (seconds * count)) // seconds2
+        if count2 != 0:
+            if count2 == 1:
+                s += ', %d %s' % (count2, name2[0])
+            else:
+                s += ', %d %s' % (count2, name2[1])
+    return s
+
+
+def timeuntil(d, now=None):
+    """
+    Like timesince, but returns a string measuring the time until
+    the given time.
+    """
+    if not now:
+        now = datetime.datetime.now()
+    return timesince(now, d)
diff --git a/util/urlnorm.py b/util/urlnorm.py
new file mode 100644
index 0000000..b3c351b
--- /dev/null
+++ b/util/urlnorm.py
@@ -0,0 +1,139 @@
+"""
+URI Normalization function:
+ * Always provide the URI scheme in lowercase characters.
+ * Always provide the host, if any, in lowercase characters.
+ * Only perform percent-encoding where it is essential.
+ * Always use uppercase A-through-F characters when percent-encoding.
+ * Prevent dot-segments appearing in non-relative URI paths.
+ * For schemes that define a default authority, use an empty authority if the
+   default is desired.
+ * For schemes that define an empty path to be equivalent to a path of "/",
+   use "/".
+ * For schemes that define a port, use an empty port if the default is desired
+ * All portions of the URI must be utf-8 encoded NFC from Unicode strings
+
+implements:
+  http://gbiv.com/protocols/uri/rev-2002/rfc2396bis.html#canonical-form
+  http://www.intertwingly.net/wiki/pie/PaceCanonicalIds
+
+inspired by:
+  Tony J. Ibbs,    http://starship.python.net/crew/tibs/python/tji_url.py
+  Mark Nottingham, http://www.mnot.net/python/urlnorm.py
+"""
+
+__license__ = "Python"
+
+import re
+import unicodedata
+import urlparse
+from urllib import quote, unquote
+
+default_port = {
+    'http': 80,
+}
+
+
+class Normalizer(object):
+    def __init__(self, regex, normalize_func):
+        self.regex = regex
+        self.normalize = normalize_func
+
+
+normalizers = (Normalizer(re.compile(
+    r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P<tld>[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P<ASIN>[0-9A-Za-z]{10})'),
+                          lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))),
+               Normalizer(re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'),
+                          lambda m: r'http://img.waffleimages.com/%s' % m.group(1)),
+               Normalizer(re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'),
+                          lambda m: r'http://youtube.com/watch?v=%s' % m.group(1)),
+)
+
+
+def normalize(url, assume_scheme=False):
+    """Normalize a URL."""
+
+    scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip())
+    userinfo, host, port = re.search('([^@]*@)?([^:]*):?(.*)', auth).groups()
+
+    # Always provide the URI scheme in lowercase characters.
+    scheme = scheme.lower()
+
+    # Always provide the host, if any, in lowercase characters.
+    host = host.lower()
+    if host and host[-1] == '.':
+        host = host[:-1]
+    if host and host.startswith("www."):
+        if not scheme:
+            scheme = "http"
+        host = host[4:]
+    elif path and path.startswith("www."):
+        if not scheme:
+            scheme = "http"
+        path = path[4:]
+
+    if assume_scheme and not scheme:
+        scheme = assume_scheme.lower()
+
+    # Only perform percent-encoding where it is essential.
+    # Always use uppercase A-through-F characters when percent-encoding.
+    # All portions of the URI must be utf-8 encoded NFC from Unicode strings
+    def clean(string):
+        string = unicode(unquote(string), 'utf-8', 'replace')
+        return unicodedata.normalize('NFC', string).encode('utf-8')
+
+    path = quote(clean(path), "~:/?#[]@!$&'()*+,;=")
+    fragment = quote(clean(fragment), "~")
+
+    # note care must be taken to only encode & and = characters as values
+    query = "&".join(["=".join([quote(clean(t), "~:/?#[]@!$'()*+,;=")
+                                for t in q.split("=", 1)]) for q in query.split("&")])
+
+    # Prevent dot-segments appearing in non-relative URI paths.
+    if scheme in ["", "http", "https", "ftp", "file"]:
+        output = []
+        for input in path.split('/'):
+            if input == "":
+                if not output:
+                    output.append(input)
+            elif input == ".":
+                pass
+            elif input == "..":
+                if len(output) > 1:
+                    output.pop()
+            else:
+                output.append(input)
+        if input in ["", ".", ".."]:
+            output.append("")
+        path = '/'.join(output)
+
+    # For schemes that define a default authority, use an empty authority if
+    # the default is desired.
+    if userinfo in ["@", ":@"]:
+        userinfo = ""
+
+    # For schemes that define an empty path to be equivalent to a path of "/",
+    # use "/".
+    if path == "" and scheme in ["http", "https", "ftp", "file"]:
+        path = "/"
+
+    # For schemes that define a port, use an empty port if the default is
+    # desired
+    if port and scheme in default_port.keys():
+        if port.isdigit():
+            port = str(int(port))
+            if int(port) == default_port[scheme]:
+                port = ''
+
+    # Put it all back together again
+    auth = (userinfo or "") + host
+    if port:
+        auth += ":" + port
+    if url.endswith("#") and query == "" and fragment == "":
+        path += "#"
+    normal_url = urlparse.urlunsplit((scheme, auth, path, query,
+                                      fragment)).replace("http:///", "http://")
+    for norm in normalizers:
+        m = norm.regex.match(normal_url)
+        if m:
+            return norm.normalize(m)
+    return normal_url
diff --git a/util/web.py b/util/web.py
new file mode 100644
index 0000000..1180bca
--- /dev/null
+++ b/util/web.py
@@ -0,0 +1,54 @@
+""" web.py - handy functions for web services """
+
+import http
+import urlnorm
+import json
+import urllib
+import yql
+
+short_url = "http://is.gd/create.php"
+paste_url = "http://hastebin.com"
+yql_env = "http://datatables.org/alltables.env"
+
+YQL = yql.Public()
+
+
+class ShortenError(Exception):
+    def __init__(self, code, text):
+        self.code = code
+        self.text = text
+
+    def __str__(self):
+        return self.text
+
+
+def isgd(url):
+    """ shortens a URL with the is.gd API """
+    url = urlnorm.normalize(url.encode('utf-8'), assume_scheme='http')
+    params = urllib.urlencode({'format': 'json', 'url': url})
+    request = http.get_json("http://is.gd/create.php?%s" % params)
+
+    if "errorcode" in request:
+        raise ShortenError(request["errorcode"], request["errormessage"])
+    else:
+        return request["shorturl"]
+
+
+def try_isgd(url):
+    try:
+        out = isgd(url)
+    except (ShortenError, http.HTTPError):
+        out = url
+    return out
+
+
+def haste(text, ext='txt'):
+    """ pastes text to a hastebin server """
+    page = http.get(paste_url + "/documents", post_data=text)
+    data = json.loads(page)
+    return ("%s/%s.%s" % (paste_url, data['key'], ext))
+
+
+def query(query, params={}):
+    """ runs a YQL query and returns the results """
+    return YQL.execute(query, params, env=yql_env)