First :D

2011-11-20 22:23:31 +13:00 · 2011-11-20 22:23:31 +13:00 · 37588421f3
commit 37588421f3
100 changed files with 22673 additions and 0 deletions
--- a/plugins/util/init.py
+++ b/plugins/util/init.py
--- a/plugins/util/hook.py
+++ b/plugins/util/hook.py
@ -0,0 +1,101 @@
+import inspect
+import re
+
+
+def _hook_add(func, add, name=''):
+    if not hasattr(func, '_hook'):
+        func._hook = []
+    func._hook.append(add)
+
+    if not hasattr(func, '_filename'):
+        func._filename = func.func_code.co_filename
+
+    if not hasattr(func, '_args'):
+        argspec = inspect.getargspec(func)
+        if name:
+            n_args = len(argspec.args)
+            if argspec.defaults:
+                n_args -= len(argspec.defaults)
+            if argspec.keywords:
+                n_args -= 1
+            if argspec.varargs:
+                n_args -= 1
+            if n_args != 1:
+                err = '%ss must take 1 non-keyword argument (%s)' % (name,
+                            func.__name__)
+                raise ValueError(err)
+
+        args = []
+        if argspec.defaults:
+            end = bool(argspec.keywords) + bool(argspec.varargs)
+            args.extend(argspec.args[-len(argspec.defaults):
+                        end if end else None])
+        if argspec.keywords:
+            args.append(0)  # means kwargs present
+        func._args = args
+
+    if not hasattr(func, '_thread'):  # does function run in its own thread?
+        func._thread = False
+
+
+def sieve(func):
+    if func.func_code.co_argcount != 5:
+        raise ValueError(
+                'sieves must take 5 arguments: (bot, input, func, type, args)')
+    _hook_add(func, ['sieve', (func,)])
+    return func
+
+
+def command(arg=None, **kwargs):
+    args = {}
+
+    def command_wrapper(func):
+        args.setdefault('name', func.func_name)
+        _hook_add(func, ['command', (func, args)], 'command')
+        return func
+
+    if kwargs or not inspect.isfunction(arg):
+        if arg is not None:
+            args['name'] = arg
+        args.update(kwargs)
+        return command_wrapper
+    else:
+        return command_wrapper(arg)
+
+
+def event(arg=None, **kwargs):
+    args = kwargs
+
+    def event_wrapper(func):
+        args['name'] = func.func_name
+        args.setdefault('events', ['*'])
+        _hook_add(func, ['event', (func, args)], 'event')
+        return func
+
+    if inspect.isfunction(arg):
+        return event_wrapper(arg, kwargs)
+    else:
+        if arg is not None:
+            args['events'] = arg.split()
+        return event_wrapper
+
+
+def singlethread(func):
+    func._thread = True
+    return func
+
+
+def regex(regex, flags=0, **kwargs):
+    args = kwargs
+
+    def regex_wrapper(func):
+        args['name'] = func.func_name
+        args['regex'] = regex
+        args['re'] = re.compile(regex, flags)
+        _hook_add(func, ['regex', (func, args)], 'regex')
+        return func
+
+    if inspect.isfunction(regex):
+        raise ValueError("regex decorators require a regex to match against")
+    else:
+        return regex_wrapper
--- a/plugins/util/http.py
+++ b/plugins/util/http.py
@ -0,0 +1,103 @@
+# convenience wrapper for urllib2 & friends
+
+import cookielib
+import json
+import urllib
+import urllib2
+import urlparse
+
+from urllib import quote, quote_plus as _quote_plus
+from urllib2 import HTTPError, URLError
+
+from BeautifulSoup import BeautifulSoup
+
+
+from lxml import etree, html
+
+
+ua_skybot = 'Cloudbot/3.4 http://github.com/lukeroge/cloudbot'
+
+ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \
+    'Gecko/20070725 Firefox/2.0.0.6'
+ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
+
+jar = cookielib.CookieJar()
+
+
+def get(*args, **kwargs):
+    return open(*args, **kwargs).read()
+
+
+def get_html(*args, **kwargs):
+    return html.fromstring(get(*args, **kwargs))
+
+
+def get_soup(*args, **kwargs):
+    return BeautifulSoup(get(*args, **kwargs))
+
+
+def get_xml(*args, **kwargs):
+    return etree.fromstring(get(*args, **kwargs))
+
+
+def get_json(*args, **kwargs):
+    return json.loads(get(*args, **kwargs))
+
+
+def open(url, query_params=None, user_agent=None, post_data=None,
+         get_method=None, cookies=False, **kwargs):
+
+    if query_params is None:
+        query_params = {}
+
+    if user_agent is None:
+        user_agent = ua_skybot
+
+    query_params.update(kwargs)
+
+    url = prepare_url(url, query_params)
+
+    request = urllib2.Request(url, post_data)
+
+    if get_method is not None:
+        request.get_method = lambda: get_method
+
+    request.add_header('User-Agent', user_agent)
+
+    if cookies:
+        opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
+    else:
+        opener = urllib2.build_opener()
+
+    return opener.open(request)
+
+
+def prepare_url(url, queries):
+    if queries:
+        scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
+
+        query = dict(urlparse.parse_qsl(query))
+        query.update(queries)
+        query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))
+                                  for key, value in query.iteritems()))
+
+        url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
+
+    return url
+
+
+def to_utf8(s):
+    if isinstance(s, unicode):
+        return s.encode('utf8', 'ignore')
+    else:
+        return str(s)
+
+
+def quote_plus(s):
+    return _quote_plus(to_utf8(s))
+
+
+def unescape(s):
+    if not s.strip():
+        return s
+    return html.fromstring(s).text_content()
--- a/plugins/util/misc.py
+++ b/plugins/util/misc.py
@ -0,0 +1,54 @@
+from htmlentitydefs import name2codepoint
+from time import time as unix_time
+from HTMLParser import HTMLParser
+from datetime import datetime
+import tempfile
+import logging as log
+import errno
+import re
+import sys
+import os
+
+class HTMLStripper(HTMLParser):
+
+    def __init__(self, data):
+        HTMLParser.__init__(self)
+        self._stripped = []
+        self.feed(data)
+
+    def handle_starttag(self, tag, attrs):
+        if tag.lower() == 'br':
+            self._stripped.append('\n')
+
+    def handle_charref(self, name):
+        try:
+            if name.lower().startswith('x'):
+                char = int(name[1:], 16)
+            else:
+                char = int(name)
+            self._stripped.append(unichr(char))
+        except Exception, error:
+            log.warn('invalid entity: %s' % error)
+
+    def handle_entityref(self, name):
+        try:
+            char = unichr(name2codepoint[name])
+        except Exception, error:
+            log.warn('unknown entity: %s' % error)
+            char = u'&%s;' % name
+        self._stripped.append(char)
+
+    def handle_data(self, data):
+        self._stripped.append(data)
+
+    @property
+    def stripped(self):
+        return ''.join(self._stripped)
+
+def superscript(text):
+    if isinstance(text, str):
+        text = decode(text, 'utf-8')
+    return text.translate(SUPER_MAP)
+
+def strip_html(data):
+    return HTMLStripper(data).stripped
--- a/plugins/util/molecular.py
+++ b/plugins/util/molecular.py
@ -0,0 +1,219 @@
+#!/usr/bin/env python
+#
+# molecular.py
+# Copyright (c) 2001, Chris Gonnerman
+# All rights reserved.
+# 
+# Redistribution and use in source and binary forms, with or without 
+# modification, are permitted provided that the following conditions
+# are met:
+# 
+# Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer. 
+# 
+# Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution. 
+# 
+# Neither the name of the author nor the names of any contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission. 
+# 
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+"""molecular.py -- molecular (ngenoid) name generator
+
+This module knows how to generate "random" names for RPG characters.
+It uses the same method as the "ngen" name generator by Kimmo Kulovesi,
+and in fact it can use the same name files.  molecular.py knows how
+to merge multiple tables also, which can be handy...
+
+If run as a command-line program, use the following options:
+
+    -r namefile    -- read the given name file and add to the
+                      current name table.
+    nnn            -- generate nnn (a number) names and print
+                      on standard output.
+
+To generate names from a name file:
+
+    python molecular.py -r file 10
+
+As a module (to be imported) you get the following classes and functions:
+
+    NameFile (class)    -- a file wrapper with a disabled close() method,
+                           used internally and probably not useful otherwise.
+    nameopen (function) -- opens a file; takes filename and mode options,
+                           searches the default name file directory if not
+                           found in current directory, handles "-" filenames,
+                           and uses NameFile to disable closing of sys.stdin/
+                           sys.stdout.
+    Molecule (class)    -- the meat of the matter.  A Molecule instance has
+                           the following methods:
+
+                                .load(file)    -- loads a name file, 
+                                                  which may be a file-like
+                                                  object with a .readline()
+                                                  method or a filename as a
+                                                  string.
+                                .name()        -- generate one name and
+                                                  return it.
+"""
+
+__version__ = "1.0"
+
+import string, re, sys, random
+
+NAMEDIR = "/home/ircbot/bot/plugins/util/names"
+NAMESECTIONS = [ "inf", "first", "mid", "final", "notes", "end" ]
+
+class NameFile:
+    __file_attributes = ('closed','mode','name','softspace')
+    def __init__(self, file):
+        self.fd = file
+    def close(self):
+        pass
+    def flush(self):
+        return self.fd.flush()
+    def isatty(self):
+        return self.fd.isatty()
+    def fileno(self):
+        return self.fd.fileno()
+    def read(self, *args):
+        return apply(self.fd.read, args)
+    def readline(self, *args):
+        return apply(self.fd.readline, args)
+    def readlines(self, *args):
+        return apply(self.fd.readlines, args)
+    def seek(self, *args):
+        return apply(self.fd.seek, args)
+    def tell(self):
+        return self.fd.tell()
+    def write(self, str):
+        return self.fd.write(str)
+    def writelines(self, list):
+        return self.fd.writelines(list)
+    def __repr__(self): 
+        return repr(self.fd)
+    def __getattr__(self, name):
+        if name in self.__file_attributes:
+            return getattr(self.fd, name)
+        else:
+            return self.__dict__[name]
+    def __setattr__(self, name, value):
+        if name in self.__file_attributes:
+            setattr(self.fd, name, value)
+        else:
+            self.__dict__[name] = value
+    def __cmp__(self, file):
+        """I'm not sure what the correct behavior is, and therefore 
+        this implementation is just a guess."""
+        if type(file) == type(self.fd):
+            return cmp(self.fd, file)
+        else:
+            return cmp(self.fd, file.fd)
+
+
+class NameReader:
+    def __init__(self, file):
+        self.file = file
+        self.line = ""
+    def next(self):
+        self.line = self.file.readline()
+        return self.line
+    def close(self):
+        return self.file.close()
+
+
+def safeopen(filename, mode):
+    try:
+        return open(filename, mode)
+    except IOError:
+        return None
+
+def nameopen(filename, mode):
+    if filename == "-":
+        if "r" in mode:
+            return NameFile(sys.stdin)
+        else:
+            return NameFile(sys.stdout)
+    fp = safeopen(filename, mode)
+    if fp is None:
+        fp = safeopen(filename + ".nam", mode)
+    if "r" in mode and fp is None:
+        fp = safeopen(NAMEDIR + "/" + filename, mode)
+        # last call is open() instead of safeopen() to finally raise
+        # the exception if we just can't find the file.
+        if fp is None:
+            fp = open(NAMEDIR + "/" + filename + ".nam", mode)
+    return fp
+    
+
+class Molecule:
+
+    def __init__(self):
+        self.nametbl = {}
+        for i in NAMESECTIONS:
+            self.nametbl[i] = []
+        self.nametbl[""] = []
+        self.cursection = self.nametbl[""]
+
+    def load(self, fp):
+        if type(fp) is type(""):
+            fp = nameopen(fp, "r")
+        else:
+            fp = NameFile(fp)
+        rdr = NameReader(fp)
+        while rdr.next():
+            line = rdr.line[:-1]
+            if len(line) > 0 and line[0] == '[' and line[-1] == ']':
+                line = string.strip(line)[1:-1]
+                if not self.nametbl.has_key(line):
+                    self.nametbl[line] = []
+                self.cursection = self.nametbl[line]
+            else:
+                self.cursection.append(line)
+        fp.close()
+    
+    def name(self):
+        n = []
+        if len(self.nametbl["first"]) > 0:
+            n.append(random.choice(self.nametbl["first"]))
+        if len(self.nametbl["mid"]) > 0:
+            n.append(random.choice(self.nametbl["mid"]))
+        if len(self.nametbl["final"]) > 0:
+            n.append(random.choice(self.nametbl["final"]))
+        return string.join(n, "")
+
+if __name__ == "__main__":
+
+    if len(sys.argv) <= 1:
+        sys.stderr.write( \
+            "Usage: molecular.py [ -r file ] [ nn ]\n")
+        sys.exit(0)
+
+    name = Molecule()
+
+    i = 1
+
+    while i < len(sys.argv):
+        arg = sys.argv[i]
+        if arg == "-r":
+            i += 1
+            name.load(sys.argv[i])
+        else:
+            n = int(sys.argv[i])
+            lst = []
+            for i in range(n):
+                print name.name()
+        i += 1
--- a/plugins/util/timesince.py
+++ b/plugins/util/timesince.py
@ -0,0 +1,102 @@
+# Copyright (c) Django Software Foundation and individual contributors.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+#  1. Redistributions of source code must retain the above copyright notice,
+#     this list of conditions and the following disclaimer.
+#
+#  2. Redistributions in binary form must reproduce the above copyright
+#     notice, this list of conditions and the following disclaimer in the
+#     documentation and/or other materials provided with the distribution.
+#
+#  3. Neither the name of Django nor the names of its contributors may be used
+#     to endorse or promote products derived from this software without
+#     specific prior written permission.
+#
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND
+#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import datetime
+
+
+def timesince(d, now=None):
+    """
+    Takes two datetime objects and returns the time between d and now
+    as a nicely formatted string, e.g. "10 minutes".  If d occurs after now,
+    then "0 minutes" is returned.
+
+    Units used are years, months, weeks, days, hours, and minutes.
+    Seconds and microseconds are ignored.  Up to two adjacent units will be
+    displayed.  For example, "2 weeks, 3 days" and "1 year, 3 months" are
+    possible outputs, but "2 weeks, 3 hours" and "1 year, 5 days" are not.
+
+    Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
+    """
+    chunks = (
+      (60 * 60 * 24 * 365, ('year', 'years')),
+      (60 * 60 * 24 * 30, ('month', 'months')),
+      (60 * 60 * 24 * 7, ('week', 'weeks')),
+      (60 * 60 * 24, ('day', 'days')),
+      (60 * 60, ('hour', 'hours')),
+      (60, ('minute', 'minutes'))
+    )
+
+    # Convert int or float (unix epoch) to datetime.datetime for comparison
+    if isinstance(d, int) or isinstance(d, float):
+        d = datetime.datetime.fromtimestamp(d)
+
+    # Convert datetime.date to datetime.datetime for comparison.
+    if not isinstance(d, datetime.datetime):
+        d = datetime.datetime(d.year, d.month, d.day)
+    if now and not isinstance(now, datetime.datetime):
+        now = datetime.datetime(now.year, now.month, now.day)
+
+    if not now:
+        now = datetime.datetime.now()
+
+    # ignore microsecond part of 'd' since we removed it from 'now'
+    delta = now - (d - datetime.timedelta(0, 0, d.microsecond))
+    since = delta.days * 24 * 60 * 60 + delta.seconds
+    if since <= 0:
+        # d is in the future compared to now, stop processing.
+        return u'0 ' + 'minutes'
+    for i, (seconds, name) in enumerate(chunks):
+        count = since // seconds
+        if count != 0:
+            break
+
+    if count == 1:
+        s = '%(number)d %(type)s' % {'number': count, 'type': name[0]}
+    else:
+        s = '%(number)d %(type)s' % {'number': count, 'type': name[1]}
+
+    if i + 1 < len(chunks):
+        # Now get the second item
+        seconds2, name2 = chunks[i + 1]
+        count2 = (since - (seconds * count)) // seconds2
+        if count2 != 0:
+            if count2 == 1:
+                s += ', %d %s' % (count2, name2[0])
+            else:
+                s += ', %d %s' % (count2, name2[1])
+    return s
+
+
+def timeuntil(d, now=None):
+    """
+    Like timesince, but returns a string measuring the time until
+    the given time.
+    """
+    if not now:
+        now = datetime.datetime.now()
+    return timesince(now, d)
--- a/plugins/util/urlnorm.py
+++ b/plugins/util/urlnorm.py
@ -0,0 +1,133 @@
+"""
+URI Normalization function:
+ * Always provide the URI scheme in lowercase characters.
+ * Always provide the host, if any, in lowercase characters.
+ * Only perform percent-encoding where it is essential.
+ * Always use uppercase A-through-F characters when percent-encoding.
+ * Prevent dot-segments appearing in non-relative URI paths.
+ * For schemes that define a default authority, use an empty authority if the
+   default is desired.
+ * For schemes that define an empty path to be equivalent to a path of "/",
+   use "/".
+ * For schemes that define a port, use an empty port if the default is desired
+ * All portions of the URI must be utf-8 encoded NFC from Unicode strings
+
+implements:
+  http://gbiv.com/protocols/uri/rev-2002/rfc2396bis.html#canonical-form
+  http://www.intertwingly.net/wiki/pie/PaceCanonicalIds
+
+inspired by:
+  Tony J. Ibbs,    http://starship.python.net/crew/tibs/python/tji_url.py
+  Mark Nottingham, http://www.mnot.net/python/urlnorm.py
+"""
+
+__license__ = "Python"
+
+import re
+import unicodedata
+import urlparse
+from urllib import quote, unquote
+
+default_port = {
+    'http': 80,
+}
+
+
+class Normalizer(object):
+    def __init__(self, regex, normalize_func):
+        self.regex = regex
+        self.normalize = normalize_func
+
+normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P<tld>[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P<ASIN>[0-9A-Za-z]{10})'),
+                            lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))),
+                Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'),
+                            lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ),
+                Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'),
+                            lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ),
+    )
+
+
+def normalize(url):
+    """Normalize a URL."""
+
+    scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip())
+    userinfo, host, port = re.search('([^@]*@)?([^:]*):?(.*)', auth).groups()
+
+    # Always provide the URI scheme in lowercase characters.
+    scheme = scheme.lower()
+
+    # Always provide the host, if any, in lowercase characters.
+    host = host.lower()
+    if host and host[-1] == '.':
+        host = host[:-1]
+    if host and host.startswith("www."):
+        if not scheme:
+            scheme = "http"
+        host = host[4:]
+    elif path and path.startswith("www."):
+        if not scheme:
+            scheme = "http"
+        path = path[4:]
+
+    # Only perform percent-encoding where it is essential.
+    # Always use uppercase A-through-F characters when percent-encoding.
+    # All portions of the URI must be utf-8 encoded NFC from Unicode strings
+    def clean(string):
+        string = unicode(unquote(string), 'utf-8', 'replace')
+        return unicodedata.normalize('NFC', string).encode('utf-8')
+    path = quote(clean(path), "~:/?#[]@!$&'()*+,;=")
+    fragment = quote(clean(fragment), "~")
+
+    # note care must be taken to only encode & and = characters as values
+    query = "&".join(["=".join([quote(clean(t), "~:/?#[]@!$'()*+,;=")
+        for t in q.split("=", 1)]) for q in query.split("&")])
+
+    # Prevent dot-segments appearing in non-relative URI paths.
+    if scheme in ["", "http", "https", "ftp", "file"]:
+        output = []
+        for input in path.split('/'):
+            if input == "":
+                if not output:
+                    output.append(input)
+            elif input == ".":
+                pass
+            elif input == "..":
+                if len(output) > 1:
+                    output.pop()
+            else:
+                output.append(input)
+        if input in ["", ".", ".."]:
+            output.append("")
+        path = '/'.join(output)
+
+    # For schemes that define a default authority, use an empty authority if
+    # the default is desired.
+    if userinfo in ["@", ":@"]:
+        userinfo = ""
+
+    # For schemes that define an empty path to be equivalent to a path of "/",
+    # use "/".
+    if path == "" and scheme in ["http", "https", "ftp", "file"]:
+        path = "/"
+
+    # For schemes that define a port, use an empty port if the default is
+    # desired
+    if port and scheme in default_port.keys():
+        if port.isdigit():
+            port = str(int(port))
+            if int(port) == default_port[scheme]:
+                port = ''
+
+    # Put it all back together again
+    auth = (userinfo or "") + host
+    if port:
+        auth += ":" + port
+    if url.endswith("#") and query == "" and fragment == "":
+        path += "#"
+    normal_url = urlparse.urlunsplit((scheme, auth, path, query,
+        fragment)).replace("http:///", "http://")
+    for norm in normalizers:
+        m = norm.regex.match(normal_url)
+        if m:
+            return norm.normalize(m)
+    return normal_url