This commit is contained in:
Luke Rogers 2011-11-20 22:23:31 +13:00
commit 37588421f3
100 changed files with 22673 additions and 0 deletions

0
plugins/util/__init__.py Normal file
View file

101
plugins/util/hook.py Normal file
View file

@ -0,0 +1,101 @@
import inspect
import re
def _hook_add(func, add, name=''):
if not hasattr(func, '_hook'):
func._hook = []
func._hook.append(add)
if not hasattr(func, '_filename'):
func._filename = func.func_code.co_filename
if not hasattr(func, '_args'):
argspec = inspect.getargspec(func)
if name:
n_args = len(argspec.args)
if argspec.defaults:
n_args -= len(argspec.defaults)
if argspec.keywords:
n_args -= 1
if argspec.varargs:
n_args -= 1
if n_args != 1:
err = '%ss must take 1 non-keyword argument (%s)' % (name,
func.__name__)
raise ValueError(err)
args = []
if argspec.defaults:
end = bool(argspec.keywords) + bool(argspec.varargs)
args.extend(argspec.args[-len(argspec.defaults):
end if end else None])
if argspec.keywords:
args.append(0) # means kwargs present
func._args = args
if not hasattr(func, '_thread'): # does function run in its own thread?
func._thread = False
def sieve(func):
if func.func_code.co_argcount != 5:
raise ValueError(
'sieves must take 5 arguments: (bot, input, func, type, args)')
_hook_add(func, ['sieve', (func,)])
return func
def command(arg=None, **kwargs):
args = {}
def command_wrapper(func):
args.setdefault('name', func.func_name)
_hook_add(func, ['command', (func, args)], 'command')
return func
if kwargs or not inspect.isfunction(arg):
if arg is not None:
args['name'] = arg
args.update(kwargs)
return command_wrapper
else:
return command_wrapper(arg)
def event(arg=None, **kwargs):
args = kwargs
def event_wrapper(func):
args['name'] = func.func_name
args.setdefault('events', ['*'])
_hook_add(func, ['event', (func, args)], 'event')
return func
if inspect.isfunction(arg):
return event_wrapper(arg, kwargs)
else:
if arg is not None:
args['events'] = arg.split()
return event_wrapper
def singlethread(func):
func._thread = True
return func
def regex(regex, flags=0, **kwargs):
args = kwargs
def regex_wrapper(func):
args['name'] = func.func_name
args['regex'] = regex
args['re'] = re.compile(regex, flags)
_hook_add(func, ['regex', (func, args)], 'regex')
return func
if inspect.isfunction(regex):
raise ValueError("regex decorators require a regex to match against")
else:
return regex_wrapper

103
plugins/util/http.py Normal file
View file

@ -0,0 +1,103 @@
# convenience wrapper for urllib2 & friends
import cookielib
import json
import urllib
import urllib2
import urlparse
from urllib import quote, quote_plus as _quote_plus
from urllib2 import HTTPError, URLError
from BeautifulSoup import BeautifulSoup
from lxml import etree, html
ua_skybot = 'Cloudbot/3.4 http://github.com/lukeroge/cloudbot'
ua_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.6) ' \
'Gecko/20070725 Firefox/2.0.0.6'
ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)'
jar = cookielib.CookieJar()
def get(*args, **kwargs):
return open(*args, **kwargs).read()
def get_html(*args, **kwargs):
return html.fromstring(get(*args, **kwargs))
def get_soup(*args, **kwargs):
return BeautifulSoup(get(*args, **kwargs))
def get_xml(*args, **kwargs):
return etree.fromstring(get(*args, **kwargs))
def get_json(*args, **kwargs):
return json.loads(get(*args, **kwargs))
def open(url, query_params=None, user_agent=None, post_data=None,
get_method=None, cookies=False, **kwargs):
if query_params is None:
query_params = {}
if user_agent is None:
user_agent = ua_skybot
query_params.update(kwargs)
url = prepare_url(url, query_params)
request = urllib2.Request(url, post_data)
if get_method is not None:
request.get_method = lambda: get_method
request.add_header('User-Agent', user_agent)
if cookies:
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar))
else:
opener = urllib2.build_opener()
return opener.open(request)
def prepare_url(url, queries):
if queries:
scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
query = dict(urlparse.parse_qsl(query))
query.update(queries)
query = urllib.urlencode(dict((to_utf8(key), to_utf8(value))
for key, value in query.iteritems()))
url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
return url
def to_utf8(s):
if isinstance(s, unicode):
return s.encode('utf8', 'ignore')
else:
return str(s)
def quote_plus(s):
return _quote_plus(to_utf8(s))
def unescape(s):
if not s.strip():
return s
return html.fromstring(s).text_content()

54
plugins/util/misc.py Normal file
View file

@ -0,0 +1,54 @@
from htmlentitydefs import name2codepoint
from time import time as unix_time
from HTMLParser import HTMLParser
from datetime import datetime
import tempfile
import logging as log
import errno
import re
import sys
import os
class HTMLStripper(HTMLParser):
def __init__(self, data):
HTMLParser.__init__(self)
self._stripped = []
self.feed(data)
def handle_starttag(self, tag, attrs):
if tag.lower() == 'br':
self._stripped.append('\n')
def handle_charref(self, name):
try:
if name.lower().startswith('x'):
char = int(name[1:], 16)
else:
char = int(name)
self._stripped.append(unichr(char))
except Exception, error:
log.warn('invalid entity: %s' % error)
def handle_entityref(self, name):
try:
char = unichr(name2codepoint[name])
except Exception, error:
log.warn('unknown entity: %s' % error)
char = u'&%s;' % name
self._stripped.append(char)
def handle_data(self, data):
self._stripped.append(data)
@property
def stripped(self):
return ''.join(self._stripped)
def superscript(text):
if isinstance(text, str):
text = decode(text, 'utf-8')
return text.translate(SUPER_MAP)
def strip_html(data):
return HTMLStripper(data).stripped

219
plugins/util/molecular.py Normal file
View file

@ -0,0 +1,219 @@
#!/usr/bin/env python
#
# molecular.py
# Copyright (c) 2001, Chris Gonnerman
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# Neither the name of the author nor the names of any contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""molecular.py -- molecular (ngenoid) name generator
This module knows how to generate "random" names for RPG characters.
It uses the same method as the "ngen" name generator by Kimmo Kulovesi,
and in fact it can use the same name files. molecular.py knows how
to merge multiple tables also, which can be handy...
If run as a command-line program, use the following options:
-r namefile -- read the given name file and add to the
current name table.
nnn -- generate nnn (a number) names and print
on standard output.
To generate names from a name file:
python molecular.py -r file 10
As a module (to be imported) you get the following classes and functions:
NameFile (class) -- a file wrapper with a disabled close() method,
used internally and probably not useful otherwise.
nameopen (function) -- opens a file; takes filename and mode options,
searches the default name file directory if not
found in current directory, handles "-" filenames,
and uses NameFile to disable closing of sys.stdin/
sys.stdout.
Molecule (class) -- the meat of the matter. A Molecule instance has
the following methods:
.load(file) -- loads a name file,
which may be a file-like
object with a .readline()
method or a filename as a
string.
.name() -- generate one name and
return it.
"""
__version__ = "1.0"
import string, re, sys, random
NAMEDIR = "/home/ircbot/bot/plugins/util/names"
NAMESECTIONS = [ "inf", "first", "mid", "final", "notes", "end" ]
class NameFile:
__file_attributes = ('closed','mode','name','softspace')
def __init__(self, file):
self.fd = file
def close(self):
pass
def flush(self):
return self.fd.flush()
def isatty(self):
return self.fd.isatty()
def fileno(self):
return self.fd.fileno()
def read(self, *args):
return apply(self.fd.read, args)
def readline(self, *args):
return apply(self.fd.readline, args)
def readlines(self, *args):
return apply(self.fd.readlines, args)
def seek(self, *args):
return apply(self.fd.seek, args)
def tell(self):
return self.fd.tell()
def write(self, str):
return self.fd.write(str)
def writelines(self, list):
return self.fd.writelines(list)
def __repr__(self):
return repr(self.fd)
def __getattr__(self, name):
if name in self.__file_attributes:
return getattr(self.fd, name)
else:
return self.__dict__[name]
def __setattr__(self, name, value):
if name in self.__file_attributes:
setattr(self.fd, name, value)
else:
self.__dict__[name] = value
def __cmp__(self, file):
"""I'm not sure what the correct behavior is, and therefore
this implementation is just a guess."""
if type(file) == type(self.fd):
return cmp(self.fd, file)
else:
return cmp(self.fd, file.fd)
class NameReader:
def __init__(self, file):
self.file = file
self.line = ""
def next(self):
self.line = self.file.readline()
return self.line
def close(self):
return self.file.close()
def safeopen(filename, mode):
try:
return open(filename, mode)
except IOError:
return None
def nameopen(filename, mode):
if filename == "-":
if "r" in mode:
return NameFile(sys.stdin)
else:
return NameFile(sys.stdout)
fp = safeopen(filename, mode)
if fp is None:
fp = safeopen(filename + ".nam", mode)
if "r" in mode and fp is None:
fp = safeopen(NAMEDIR + "/" + filename, mode)
# last call is open() instead of safeopen() to finally raise
# the exception if we just can't find the file.
if fp is None:
fp = open(NAMEDIR + "/" + filename + ".nam", mode)
return fp
class Molecule:
def __init__(self):
self.nametbl = {}
for i in NAMESECTIONS:
self.nametbl[i] = []
self.nametbl[""] = []
self.cursection = self.nametbl[""]
def load(self, fp):
if type(fp) is type(""):
fp = nameopen(fp, "r")
else:
fp = NameFile(fp)
rdr = NameReader(fp)
while rdr.next():
line = rdr.line[:-1]
if len(line) > 0 and line[0] == '[' and line[-1] == ']':
line = string.strip(line)[1:-1]
if not self.nametbl.has_key(line):
self.nametbl[line] = []
self.cursection = self.nametbl[line]
else:
self.cursection.append(line)
fp.close()
def name(self):
n = []
if len(self.nametbl["first"]) > 0:
n.append(random.choice(self.nametbl["first"]))
if len(self.nametbl["mid"]) > 0:
n.append(random.choice(self.nametbl["mid"]))
if len(self.nametbl["final"]) > 0:
n.append(random.choice(self.nametbl["final"]))
return string.join(n, "")
if __name__ == "__main__":
if len(sys.argv) <= 1:
sys.stderr.write( \
"Usage: molecular.py [ -r file ] [ nn ]\n")
sys.exit(0)
name = Molecule()
i = 1
while i < len(sys.argv):
arg = sys.argv[i]
if arg == "-r":
i += 1
name.load(sys.argv[i])
else:
n = int(sys.argv[i])
lst = []
for i in range(n):
print name.name()
i += 1

102
plugins/util/timesince.py Normal file
View file

@ -0,0 +1,102 @@
# Copyright (c) Django Software Foundation and individual contributors.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# 3. Neither the name of Django nor the names of its contributors may be used
# to endorse or promote products derived from this software without
# specific prior written permission.
#
#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND
#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
import datetime
def timesince(d, now=None):
"""
Takes two datetime objects and returns the time between d and now
as a nicely formatted string, e.g. "10 minutes". If d occurs after now,
then "0 minutes" is returned.
Units used are years, months, weeks, days, hours, and minutes.
Seconds and microseconds are ignored. Up to two adjacent units will be
displayed. For example, "2 weeks, 3 days" and "1 year, 3 months" are
possible outputs, but "2 weeks, 3 hours" and "1 year, 5 days" are not.
Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since
"""
chunks = (
(60 * 60 * 24 * 365, ('year', 'years')),
(60 * 60 * 24 * 30, ('month', 'months')),
(60 * 60 * 24 * 7, ('week', 'weeks')),
(60 * 60 * 24, ('day', 'days')),
(60 * 60, ('hour', 'hours')),
(60, ('minute', 'minutes'))
)
# Convert int or float (unix epoch) to datetime.datetime for comparison
if isinstance(d, int) or isinstance(d, float):
d = datetime.datetime.fromtimestamp(d)
# Convert datetime.date to datetime.datetime for comparison.
if not isinstance(d, datetime.datetime):
d = datetime.datetime(d.year, d.month, d.day)
if now and not isinstance(now, datetime.datetime):
now = datetime.datetime(now.year, now.month, now.day)
if not now:
now = datetime.datetime.now()
# ignore microsecond part of 'd' since we removed it from 'now'
delta = now - (d - datetime.timedelta(0, 0, d.microsecond))
since = delta.days * 24 * 60 * 60 + delta.seconds
if since <= 0:
# d is in the future compared to now, stop processing.
return u'0 ' + 'minutes'
for i, (seconds, name) in enumerate(chunks):
count = since // seconds
if count != 0:
break
if count == 1:
s = '%(number)d %(type)s' % {'number': count, 'type': name[0]}
else:
s = '%(number)d %(type)s' % {'number': count, 'type': name[1]}
if i + 1 < len(chunks):
# Now get the second item
seconds2, name2 = chunks[i + 1]
count2 = (since - (seconds * count)) // seconds2
if count2 != 0:
if count2 == 1:
s += ', %d %s' % (count2, name2[0])
else:
s += ', %d %s' % (count2, name2[1])
return s
def timeuntil(d, now=None):
"""
Like timesince, but returns a string measuring the time until
the given time.
"""
if not now:
now = datetime.datetime.now()
return timesince(now, d)

133
plugins/util/urlnorm.py Normal file
View file

@ -0,0 +1,133 @@
"""
URI Normalization function:
* Always provide the URI scheme in lowercase characters.
* Always provide the host, if any, in lowercase characters.
* Only perform percent-encoding where it is essential.
* Always use uppercase A-through-F characters when percent-encoding.
* Prevent dot-segments appearing in non-relative URI paths.
* For schemes that define a default authority, use an empty authority if the
default is desired.
* For schemes that define an empty path to be equivalent to a path of "/",
use "/".
* For schemes that define a port, use an empty port if the default is desired
* All portions of the URI must be utf-8 encoded NFC from Unicode strings
implements:
http://gbiv.com/protocols/uri/rev-2002/rfc2396bis.html#canonical-form
http://www.intertwingly.net/wiki/pie/PaceCanonicalIds
inspired by:
Tony J. Ibbs, http://starship.python.net/crew/tibs/python/tji_url.py
Mark Nottingham, http://www.mnot.net/python/urlnorm.py
"""
__license__ = "Python"
import re
import unicodedata
import urlparse
from urllib import quote, unquote
default_port = {
'http': 80,
}
class Normalizer(object):
def __init__(self, regex, normalize_func):
self.regex = regex
self.normalize = normalize_func
normalizers = ( Normalizer( re.compile(r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P<tld>[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P<ASIN>[0-9A-Za-z]{10})'),
lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))),
Normalizer( re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'),
lambda m: r'http://img.waffleimages.com/%s' % m.group(1) ),
Normalizer( re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-z0-9]+)'),
lambda m: r'http://youtube.com/watch?v=%s' % m.group(1) ),
)
def normalize(url):
"""Normalize a URL."""
scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip())
userinfo, host, port = re.search('([^@]*@)?([^:]*):?(.*)', auth).groups()
# Always provide the URI scheme in lowercase characters.
scheme = scheme.lower()
# Always provide the host, if any, in lowercase characters.
host = host.lower()
if host and host[-1] == '.':
host = host[:-1]
if host and host.startswith("www."):
if not scheme:
scheme = "http"
host = host[4:]
elif path and path.startswith("www."):
if not scheme:
scheme = "http"
path = path[4:]
# Only perform percent-encoding where it is essential.
# Always use uppercase A-through-F characters when percent-encoding.
# All portions of the URI must be utf-8 encoded NFC from Unicode strings
def clean(string):
string = unicode(unquote(string), 'utf-8', 'replace')
return unicodedata.normalize('NFC', string).encode('utf-8')
path = quote(clean(path), "~:/?#[]@!$&'()*+,;=")
fragment = quote(clean(fragment), "~")
# note care must be taken to only encode & and = characters as values
query = "&".join(["=".join([quote(clean(t), "~:/?#[]@!$'()*+,;=")
for t in q.split("=", 1)]) for q in query.split("&")])
# Prevent dot-segments appearing in non-relative URI paths.
if scheme in ["", "http", "https", "ftp", "file"]:
output = []
for input in path.split('/'):
if input == "":
if not output:
output.append(input)
elif input == ".":
pass
elif input == "..":
if len(output) > 1:
output.pop()
else:
output.append(input)
if input in ["", ".", ".."]:
output.append("")
path = '/'.join(output)
# For schemes that define a default authority, use an empty authority if
# the default is desired.
if userinfo in ["@", ":@"]:
userinfo = ""
# For schemes that define an empty path to be equivalent to a path of "/",
# use "/".
if path == "" and scheme in ["http", "https", "ftp", "file"]:
path = "/"
# For schemes that define a port, use an empty port if the default is
# desired
if port and scheme in default_port.keys():
if port.isdigit():
port = str(int(port))
if int(port) == default_port[scheme]:
port = ''
# Put it all back together again
auth = (userinfo or "") + host
if port:
auth += ":" + port
if url.endswith("#") and query == "" and fragment == "":
path += "#"
normal_url = urlparse.urlunsplit((scheme, auth, path, query,
fragment)).replace("http:///", "http://")
for norm in normalizers:
m = norm.regex.match(normal_url)
if m:
return norm.normalize(m)
return normal_url