From 7dc1daa69f0f40526dbfb15fdf6a76df7bfb5beb Mon Sep 17 00:00:00 2001 From: Luke Rogers Date: Wed, 2 Oct 2013 12:01:46 +1300 Subject: [PATCH] move lib to core, no more sys.path fucking @cybojenix --- cloudbot.py | 1 - core/bot.py | 6 +- util/__init__.py | 0 util/color.py | 35 +++++++ util/execute.py | 30 ++++++ util/formatting.py | 34 +++++++ util/hook.py | 101 ++++++++++++++++++++ util/http.py | 115 +++++++++++++++++++++++ util/text.py | 230 +++++++++++++++++++++++++++++++++++++++++++++ util/textgen.py | 51 ++++++++++ util/timeformat.py | 14 +++ util/timesince.py | 105 +++++++++++++++++++++ util/urlnorm.py | 139 +++++++++++++++++++++++++++ util/web.py | 54 +++++++++++ 14 files changed, 911 insertions(+), 4 deletions(-) create mode 100644 util/__init__.py create mode 100644 util/color.py create mode 100644 util/execute.py create mode 100644 util/formatting.py create mode 100644 util/hook.py create mode 100644 util/http.py create mode 100644 util/text.py create mode 100644 util/textgen.py create mode 100644 util/timeformat.py create mode 100644 util/timesince.py create mode 100644 util/urlnorm.py create mode 100644 util/web.py diff --git a/cloudbot.py b/cloudbot.py index 385dc3a..76349dc 100644 --- a/cloudbot.py +++ b/cloudbot.py @@ -8,7 +8,6 @@ import sys import time # set up enviroment -sys.path += ['plugins'] # add stuff to the sys.path for easy imports os.chdir(sys.path[0] or '.') # do stuff relative to the install directory print 'CloudBot REFRESH ' diff --git a/core/bot.py b/core/bot.py index 68ea15c..0929855 100644 --- a/core/bot.py +++ b/core/bot.py @@ -66,17 +66,17 @@ class Bot(object): def get_config(self): """create and return the config object""" - return config.Config(self.name, self.logger) + return config.Config(self.logger) def get_logger(self): """create and return the logger object""" # create logger - logger = logging.getLogger(self.name) + logger = logging.getLogger("cloudbot") logger.setLevel(logging.DEBUG) # add a file handler - log_name = "{}.log".format(self.name) + log_name = "bot.log" fh = logging.FileHandler(log_name) fh.setLevel(logging.DEBUG) diff --git a/util/__init__.py b/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/util/color.py b/util/color.py new file mode 100644 index 0000000..d5501df --- /dev/null +++ b/util/color.py @@ -0,0 +1,35 @@ +# Colors. Plugin by blha303, color/control id info from http://stackoverflow.com/a/13382032 + +colors = {'white': '0', 'black': '1', 'darkblue': '2', 'darkgreen': '3', + 'red': '4', 'darkred': '5', 'darkviolet': '6', 'orange': '7', + 'yellow': '8', 'lightgreen': '9', 'cyan': '10', 'lightcyan': '11', + 'blue': '12', 'violet': '13', 'darkgray': '14', 'lightgray': '15'} + +control = {'bold': '\x02', 'color': '\x03', 'italic': '\x09', + 'strikethrough': '\x13', 'reset': '\x0f', 'underline': '\x15', + 'underline2': '\x1f', 'reverse': '\x16'} + + +def color(color): + return control['color'] + colors[color] + +def bold(): + return control['bold'] + +def italic(): + return control['italic'] + +def strike(): + return control['strikethrough'] + +def reset(): + return control['reset'] + +def underline(other=False): + if other: + return control['underline2'] + else: + return control['underline'] + +def reverse(): + return control['reverse'] diff --git a/util/execute.py b/util/execute.py new file mode 100644 index 0000000..9aecd7e --- /dev/null +++ b/util/execute.py @@ -0,0 +1,30 @@ +import http, web + + +def eval_py(code, paste_multiline=True): + attempts = 0 + + while True: + try: + output = http.get("http://eval.appspot.com/eval", statement=code).rstrip('\n') + # sometimes the API returns a blank string on first attempt, lets try again + # and make sure it is actually supposed to be a blank string. ._. + if output == "": + output = http.get("http://eval.appspot.com/eval", statement=code).rstrip('\n') + break + except http.HTTPError: + if attempts > 2: + return "Failed to execute code." + else: + attempts += 1 + continue + + if "Traceback (most recent call last):" in output: + status = "Python error: " + else: + status = "Code executed sucessfully: " + + if "\n" in output and paste_multiline: + return status + web.haste(output) + else: + return output diff --git a/util/formatting.py b/util/formatting.py new file mode 100644 index 0000000..442adf5 --- /dev/null +++ b/util/formatting.py @@ -0,0 +1,34 @@ +def raw(format_string): + """Replace based irc formatting""" + stuff = {} + stuff['col'] = {'[white]':'\x030', + '[black]':'\x031', + '[dblue]':'\x032', + '[dgreen]':'\x033', + '[dred]':'\x034', + '[brown]':'\x035', + '[purple]':'\x036', + '[gold]':'\x037', + '[yellow]':'\x038', + '[green]':'\x039', + '[cyan]':'\x0310', + '[lblue]':'\x0311', + '[blue]':'\x0312', + '[pink]':'\x0313', + '[gray]':'\x0314', + '[lgray]':'\x0315', + '[err]':'\x034\x02' + '[/err]':'\x030\x02'} + stuff['style'] = {'[b]':'\x02', + '[clear]':'\x0f'} + stuff['sym'] = {'[point]':'\x07'} + stuff['text'] = {'[url]':'http://'} + final = {} + for x in stuff: + final.update(stuff[x]) + for x in final: + format_string = format_string.replace(x,final[x]) + return format_string +def err(format_string): + """Format the string with standard error styling""" + return "\x034\x02{}\x0f".format(format_string) \ No newline at end of file diff --git a/util/hook.py b/util/hook.py new file mode 100644 index 0000000..5e191d3 --- /dev/null +++ b/util/hook.py @@ -0,0 +1,101 @@ +import inspect +import re + + +def _hook_add(func, add, name=''): + if not hasattr(func, '_hook'): + func._hook = [] + func._hook.append(add) + + if not hasattr(func, '_filename'): + func._filename = func.func_code.co_filename + + if not hasattr(func, '_args'): + argspec = inspect.getargspec(func) + if name: + n_args = len(argspec.args) + if argspec.defaults: + n_args -= len(argspec.defaults) + if argspec.keywords: + n_args -= 1 + if argspec.varargs: + n_args -= 1 + if n_args != 1: + err = '%ss must take 1 non-keyword argument (%s)' % (name, + func.__name__) + raise ValueError(err) + + args = [] + if argspec.defaults: + end = bool(argspec.keywords) + bool(argspec.varargs) + args.extend(argspec.args[-len(argspec.defaults): + end if end else None]) + if argspec.keywords: + args.append(0) # means kwargs present + func._args = args + + if not hasattr(func, '_thread'): # does function run in its own thread? + func._thread = False + + +def sieve(func): + if func.func_code.co_argcount != 5: + raise ValueError( + 'sieves must take 5 arguments: (bot, input, func, type, args)') + _hook_add(func, ['sieve', (func,)]) + return func + + +def command(arg=None, **kwargs): + args = {} + + def command_wrapper(func): + args.setdefault('name', func.func_name) + _hook_add(func, ['command', (func, args)], 'command') + return func + + if kwargs or not inspect.isfunction(arg): + if arg is not None: + args['name'] = arg + args.update(kwargs) + return command_wrapper + else: + return command_wrapper(arg) + + +def event(arg=None, **kwargs): + args = kwargs + + def event_wrapper(func): + args['name'] = func.func_name + args.setdefault('events', ['*']) + _hook_add(func, ['event', (func, args)], 'event') + return func + + if inspect.isfunction(arg): + return event_wrapper(arg, kwargs) + else: + if arg is not None: + args['events'] = arg.split() + return event_wrapper + + +def singlethread(func): + func._thread = True + return func + + +def regex(regex, flags=0, **kwargs): + args = kwargs + + def regex_wrapper(func): + args['name'] = func.func_name + args['regex'] = regex + args['re'] = re.compile(regex, flags) + _hook_add(func, ['regex', (func, args)], 'regex') + return func + + if inspect.isfunction(regex): + raise ValueError("regex decorators require a regex to match against") + else: + return regex_wrapper diff --git a/util/http.py b/util/http.py new file mode 100644 index 0000000..4409211 --- /dev/null +++ b/util/http.py @@ -0,0 +1,115 @@ +# convenience wrapper for urllib2 & friends + +import cookielib +import json +import urllib +import urllib2 +import urlparse + +from urllib import quote, quote_plus as _quote_plus + +from lxml import etree, html +from bs4 import BeautifulSoup + +# used in plugins that import this +from urllib2 import URLError, HTTPError + +ua_cloudbot = 'Cloudbot/DEV http://github.com/CloudDev/CloudBot' + +ua_firefox = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:17.0) Gecko/17.0' \ + ' Firefox/17.0' +ua_old_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; ' \ + 'rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' +ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' +ua_chrome = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.4 (KHTML, ' \ + 'like Gecko) Chrome/22.0.1229.79 Safari/537.4' + +jar = cookielib.CookieJar() + + +def get(*args, **kwargs): + return open(*args, **kwargs).read() + + +def get_url(*args, **kwargs): + return open(*args, **kwargs).geturl() + + +def get_html(*args, **kwargs): + return html.fromstring(get(*args, **kwargs)) + + +def get_soup(*args, **kwargs): + return BeautifulSoup(get(*args, **kwargs), 'lxml') + + +def get_xml(*args, **kwargs): + return etree.fromstring(get(*args, **kwargs)) + + +def get_json(*args, **kwargs): + return json.loads(get(*args, **kwargs)) + + +def open(url, query_params=None, user_agent=None, post_data=None, + referer=None, get_method=None, cookies=False, timeout=None, **kwargs): + if query_params is None: + query_params = {} + + if user_agent is None: + user_agent = ua_cloudbot + + query_params.update(kwargs) + + url = prepare_url(url, query_params) + + request = urllib2.Request(url, post_data) + + if get_method is not None: + request.get_method = lambda: get_method + + request.add_header('User-Agent', user_agent) + + if referer is not None: + request.add_header('Referer', referer) + + if cookies: + opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar)) + else: + opener = urllib2.build_opener() + + if timeout: + return opener.open(request, timeout=timeout) + else: + return opener.open(request) + + +def prepare_url(url, queries): + if queries: + scheme, netloc, path, query, fragment = urlparse.urlsplit(url) + + query = dict(urlparse.parse_qsl(query)) + query.update(queries) + query = urllib.urlencode(dict((to_utf8(key), to_utf8(value)) + for key, value in query.iteritems())) + + url = urlparse.urlunsplit((scheme, netloc, path, query, fragment)) + + return url + + +def to_utf8(s): + if isinstance(s, unicode): + return s.encode('utf8', 'ignore') + else: + return str(s) + + +def quote_plus(s): + return _quote_plus(to_utf8(s)) + + +def unescape(s): + if not s.strip(): + return s + return html.fromstring(s).text_content() diff --git a/util/text.py b/util/text.py new file mode 100644 index 0000000..9bc40f4 --- /dev/null +++ b/util/text.py @@ -0,0 +1,230 @@ +# -*- coding: utf-8 -*- +""" formatting.py - handy functions for formatting text + this file contains code from the following URL: + +""" + +import re + +from HTMLParser import HTMLParser +import htmlentitydefs + + +class HTMLTextExtractor(HTMLParser): + def __init__(self): + HTMLParser.__init__(self) + self.result = [] + + def handle_data(self, d): + self.result.append(d) + + def handle_charref(self, number): + codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number) + self.result.append(unichr(codepoint)) + + def handle_entityref(self, name): + codepoint = htmlentitydefs.name2codepoint[name] + self.result.append(unichr(codepoint)) + + def get_text(self): + return u''.join(self.result) + + +def strip_html(html): + s = HTMLTextExtractor() + s.feed(html) + return s.get_text() + + +def munge(text, munge_count=0): + """munges up text.""" + reps = 0 + for n in xrange(len(text)): + rep = character_replacements.get(text[n]) + if rep: + text = text[:n] + rep.decode('utf8') + text[n + 1:] + reps += 1 + if reps == munge_count: + break + return text + + +character_replacements = { + 'a': 'ä', + 'b': 'Б', + 'c': 'ċ', + 'd': 'đ', + 'e': 'ë', + 'f': 'ƒ', + 'g': 'ġ', + 'h': 'ħ', + 'i': 'í', + 'j': 'ĵ', + 'k': 'ķ', + 'l': 'ĺ', + 'm': 'ṁ', + 'n': 'ñ', + 'o': 'ö', + 'p': 'ρ', + 'q': 'ʠ', + 'r': 'ŗ', + 's': 'š', + 't': 'ţ', + 'u': 'ü', + 'v': '', + 'w': 'ω', + 'x': 'χ', + 'y': 'ÿ', + 'z': 'ź', + 'A': 'Å', + 'B': 'Β', + 'C': 'Ç', + 'D': 'Ď', + 'E': 'Ē', + 'F': 'Ḟ', + 'G': 'Ġ', + 'H': 'Ħ', + 'I': 'Í', + 'J': 'Ĵ', + 'K': 'Ķ', + 'L': 'Ĺ', + 'M': 'Μ', + 'N': 'Ν', + 'O': 'Ö', + 'P': 'Р', + 'Q': 'Q', + 'R': 'Ŗ', + 'S': 'Š', + 'T': 'Ţ', + 'U': 'Ů', + 'V': 'Ṿ', + 'W': 'Ŵ', + 'X': 'Χ', + 'Y': 'Ỳ', + 'Z': 'Ż'} + + +def capitalize_first(line): + """ + capitalises the first letter of words + (keeps other letters intact) + """ + return ' '.join([s[0].upper() + s[1:] for s in line.split(' ')]) + + +def multiword_replace(text, wordDic): + """ + take a text and replace words that match a key in a dictionary with + the associated value, return the changed text + """ + rc = re.compile('|'.join(map(re.escape, wordDic))) + + def translate(match): + return wordDic[match.group(0)] + return rc.sub(translate, text) + + +def truncate_words(content, length=10, suffix='...'): + """Truncates a string after a certain number of words.""" + nmsg = content.split(" ") + out = None + x = 0 + for i in nmsg: + if x <= length: + if out: + out = out + " " + nmsg[x] + else: + out = nmsg[x] + x += 1 + if x <= length: + return out + else: + return out + suffix + + +# from +def truncate_str(content, length=100, suffix='...'): + """Truncates a string after a certain number of chars.""" + if len(content) <= length: + return content + else: + return content[:length].rsplit(' ', 1)[0] + suffix + + +# ALL CODE BELOW THIS LINE IS COVERED BY THE FOLLOWING AGREEMENT: + +# Copyright (c) Django Software Foundation and individual contributors. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of Django nor the names of its contributors may be used +# to endorse or promote products derived from this software without +# specific prior written permission. +# +#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND +#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# Expression to match some_token and some_token="with spaces" (and similarly +# for single-quoted strings). + +split_re = re.compile(r"""((?:[^\s'"]*(?:(?:"(?:[^"\\]|\\.)*" | '(?:[""" \ + r"""^'\\]|\\.)*')[^\s'"]*)+) | \S+)""", re.VERBOSE) + + +def smart_split(text): + r""" + Generator that splits a string by spaces, leaving quoted phrases together. + Supports both single and double quotes, and supports escaping quotes with + backslashes. In the output, strings will keep their initial and trailing + quote marks and escaped quotes will remain escaped (the results can then + be further processed with unescape_string_literal()). + + >>> list(smart_split(r'This is "a person\'s" test.')) + [u'This', u'is', u'"a person\\\'s"', u'test.'] + >>> list(smart_split(r"Another 'person\'s' test.")) + [u'Another', u"'person\\'s'", u'test.'] + >>> list(smart_split(r'A "\"funky\" style" test.')) + [u'A', u'"\\"funky\\" style"', u'test.'] + """ + for bit in split_re.finditer(text): + yield bit.group(0) + + +def get_text_list(list_, last_word='or'): + """ + >>> get_text_list(['a', 'b', 'c', 'd']) + u'a, b, c or d' + >>> get_text_list(['a', 'b', 'c'], 'and') + u'a, b and c' + >>> get_text_list(['a', 'b'], 'and') + u'a and b' + >>> get_text_list(['a']) + u'a' + >>> get_text_list([]) + u'' + """ + if len(list_) == 0: + return '' + if len(list_) == 1: + return list_[0] + return '%s %s %s' % ( + # Translators: This string is used as a separator between list elements + ', '.join([i for i in list_][:-1]), + last_word, list_[-1]) diff --git a/util/textgen.py b/util/textgen.py new file mode 100644 index 0000000..29d74f0 --- /dev/null +++ b/util/textgen.py @@ -0,0 +1,51 @@ +import re +import random + +TEMPLATE_RE = re.compile(r"\{(.+?)\}") + + +class TextGenerator(object): + def __init__(self, templates, parts, default_templates=None, variables=None): + self.templates = templates + self.default_templates = default_templates + self.parts = parts + self.variables = variables + + def generate_string(self, template=None): + """ + Generates one string using the specified templates. + If no templates are specified, use a random template from the default_templates list. + """ + # this is bad + if self.default_templates: + text = self.templates[template or random.choice(self.default_templates)] + else: + text = random.choice(self.templates) + + # replace static variables in the template with provided values + if self.variables: + for key, value in self.variables.items(): + text = text.replace("{%s}" % key, value) + + # get a list of all text parts we need + required_parts = TEMPLATE_RE.findall(text) + + for required_part in required_parts: + ppart = self.parts[required_part] + # check if the part is a single string or a list + if not isinstance(ppart, basestring): + part = random.choice(self.parts[required_part]) + else: + part = self.parts[required_part] + text = text.replace("{%s}" % required_part, part) + + return text + + def generate_strings(self, amount, template=None): + strings = [] + for i in xrange(amount): + strings.append(self.generate_string()) + return strings + + def get_template(self, template): + return self.templates[template] \ No newline at end of file diff --git a/util/timeformat.py b/util/timeformat.py new file mode 100644 index 0000000..185fd36 --- /dev/null +++ b/util/timeformat.py @@ -0,0 +1,14 @@ +def timeformat(seconds): + days = seconds / 86400 + seconds -= 86400 * days + hours = seconds / 3600 + seconds -= 3600 * hours + minutes = seconds / 60 + seconds -= 60 * minutes + if days != 0: + return "%sd %sh %sm %ss" % (days, hours, minutes, seconds) + elif hours == 0 and minutes != 0: + return "%sm %ss" % (minutes, seconds) + elif hours == 0 and minutes == 0: + return "%ss" % seconds + return "%sh %sm %ss" % (hours, minutes, seconds) diff --git a/util/timesince.py b/util/timesince.py new file mode 100644 index 0000000..56ec8b0 --- /dev/null +++ b/util/timesince.py @@ -0,0 +1,105 @@ +# Copyright (c) Django Software Foundation and individual contributors. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of Django nor the names of its contributors may be used +# to endorse or promote products derived from this software without +# specific prior written permission. +# +#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND +#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import datetime + + +def timesince(d, now=None): + """ + Takes two datetime objects and returns the time between d and now + as a nicely formatted string, e.g. "10 minutes". If d occurs after now, + then "0 minutes" is returned. + + Units used are years, months, weeks, days, hours, and minutes. + Seconds and microseconds are ignored. Up to two adjacent units will be + displayed. For example, "2 weeks, 3 days" and "1 year, 3 months" are + possible outputs, but "2 weeks, 3 hours" and "1 year, 5 days" are not. + + Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since + """ + chunks = ( + (60 * 60 * 24 * 365, ('year', 'years')), + (60 * 60 * 24 * 30, ('month', 'months')), + (60 * 60 * 24 * 7, ('week', 'weeks')), + (60 * 60 * 24, ('day', 'days')), + (60 * 60, ('hour', 'hours')), + (60, ('minute', 'minutes')) + ) + + # Convert int or float (unix epoch) to datetime.datetime for comparison + if isinstance(d, int) or isinstance(d, float): + d = datetime.datetime.fromtimestamp(d) + + if isinstance(now, int) or isinstance(now, float): + now = datetime.datetime.fromtimestamp(now) + + # Convert datetime.date to datetime.datetime for comparison. + if not isinstance(d, datetime.datetime): + d = datetime.datetime(d.year, d.month, d.day) + if now and not isinstance(now, datetime.datetime): + now = datetime.datetime(now.year, now.month, now.day) + + if not now: + now = datetime.datetime.now() + + # ignore microsecond part of 'd' since we removed it from 'now' + delta = now - (d - datetime.timedelta(0, 0, d.microsecond)) + since = delta.days * 24 * 60 * 60 + delta.seconds + if since <= 0: + # d is in the future compared to now, stop processing. + return u'0 ' + 'minutes' + for i, (seconds, name) in enumerate(chunks): + count = since // seconds + if count != 0: + break + + if count == 1: + s = '%(number)d %(type)s' % {'number': count, 'type': name[0]} + else: + s = '%(number)d %(type)s' % {'number': count, 'type': name[1]} + + if i + 1 < len(chunks): + # Now get the second item + seconds2, name2 = chunks[i + 1] + count2 = (since - (seconds * count)) // seconds2 + if count2 != 0: + if count2 == 1: + s += ', %d %s' % (count2, name2[0]) + else: + s += ', %d %s' % (count2, name2[1]) + return s + + +def timeuntil(d, now=None): + """ + Like timesince, but returns a string measuring the time until + the given time. + """ + if not now: + now = datetime.datetime.now() + return timesince(now, d) diff --git a/util/urlnorm.py b/util/urlnorm.py new file mode 100644 index 0000000..b3c351b --- /dev/null +++ b/util/urlnorm.py @@ -0,0 +1,139 @@ +""" +URI Normalization function: + * Always provide the URI scheme in lowercase characters. + * Always provide the host, if any, in lowercase characters. + * Only perform percent-encoding where it is essential. + * Always use uppercase A-through-F characters when percent-encoding. + * Prevent dot-segments appearing in non-relative URI paths. + * For schemes that define a default authority, use an empty authority if the + default is desired. + * For schemes that define an empty path to be equivalent to a path of "/", + use "/". + * For schemes that define a port, use an empty port if the default is desired + * All portions of the URI must be utf-8 encoded NFC from Unicode strings + +implements: + http://gbiv.com/protocols/uri/rev-2002/rfc2396bis.html#canonical-form + http://www.intertwingly.net/wiki/pie/PaceCanonicalIds + +inspired by: + Tony J. Ibbs, http://starship.python.net/crew/tibs/python/tji_url.py + Mark Nottingham, http://www.mnot.net/python/urlnorm.py +""" + +__license__ = "Python" + +import re +import unicodedata +import urlparse +from urllib import quote, unquote + +default_port = { + 'http': 80, +} + + +class Normalizer(object): + def __init__(self, regex, normalize_func): + self.regex = regex + self.normalize = normalize_func + + +normalizers = (Normalizer(re.compile( + r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P[0-9A-Za-z]{10})'), + lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), + Normalizer(re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), + lambda m: r'http://img.waffleimages.com/%s' % m.group(1)), + Normalizer(re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'), + lambda m: r'http://youtube.com/watch?v=%s' % m.group(1)), +) + + +def normalize(url, assume_scheme=False): + """Normalize a URL.""" + + scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip()) + userinfo, host, port = re.search('([^@]*@)?([^:]*):?(.*)', auth).groups() + + # Always provide the URI scheme in lowercase characters. + scheme = scheme.lower() + + # Always provide the host, if any, in lowercase characters. + host = host.lower() + if host and host[-1] == '.': + host = host[:-1] + if host and host.startswith("www."): + if not scheme: + scheme = "http" + host = host[4:] + elif path and path.startswith("www."): + if not scheme: + scheme = "http" + path = path[4:] + + if assume_scheme and not scheme: + scheme = assume_scheme.lower() + + # Only perform percent-encoding where it is essential. + # Always use uppercase A-through-F characters when percent-encoding. + # All portions of the URI must be utf-8 encoded NFC from Unicode strings + def clean(string): + string = unicode(unquote(string), 'utf-8', 'replace') + return unicodedata.normalize('NFC', string).encode('utf-8') + + path = quote(clean(path), "~:/?#[]@!$&'()*+,;=") + fragment = quote(clean(fragment), "~") + + # note care must be taken to only encode & and = characters as values + query = "&".join(["=".join([quote(clean(t), "~:/?#[]@!$'()*+,;=") + for t in q.split("=", 1)]) for q in query.split("&")]) + + # Prevent dot-segments appearing in non-relative URI paths. + if scheme in ["", "http", "https", "ftp", "file"]: + output = [] + for input in path.split('/'): + if input == "": + if not output: + output.append(input) + elif input == ".": + pass + elif input == "..": + if len(output) > 1: + output.pop() + else: + output.append(input) + if input in ["", ".", ".."]: + output.append("") + path = '/'.join(output) + + # For schemes that define a default authority, use an empty authority if + # the default is desired. + if userinfo in ["@", ":@"]: + userinfo = "" + + # For schemes that define an empty path to be equivalent to a path of "/", + # use "/". + if path == "" and scheme in ["http", "https", "ftp", "file"]: + path = "/" + + # For schemes that define a port, use an empty port if the default is + # desired + if port and scheme in default_port.keys(): + if port.isdigit(): + port = str(int(port)) + if int(port) == default_port[scheme]: + port = '' + + # Put it all back together again + auth = (userinfo or "") + host + if port: + auth += ":" + port + if url.endswith("#") and query == "" and fragment == "": + path += "#" + normal_url = urlparse.urlunsplit((scheme, auth, path, query, + fragment)).replace("http:///", "http://") + for norm in normalizers: + m = norm.regex.match(normal_url) + if m: + return norm.normalize(m) + return normal_url diff --git a/util/web.py b/util/web.py new file mode 100644 index 0000000..1180bca --- /dev/null +++ b/util/web.py @@ -0,0 +1,54 @@ +""" web.py - handy functions for web services """ + +import http +import urlnorm +import json +import urllib +import yql + +short_url = "http://is.gd/create.php" +paste_url = "http://hastebin.com" +yql_env = "http://datatables.org/alltables.env" + +YQL = yql.Public() + + +class ShortenError(Exception): + def __init__(self, code, text): + self.code = code + self.text = text + + def __str__(self): + return self.text + + +def isgd(url): + """ shortens a URL with the is.gd API """ + url = urlnorm.normalize(url.encode('utf-8'), assume_scheme='http') + params = urllib.urlencode({'format': 'json', 'url': url}) + request = http.get_json("http://is.gd/create.php?%s" % params) + + if "errorcode" in request: + raise ShortenError(request["errorcode"], request["errormessage"]) + else: + return request["shorturl"] + + +def try_isgd(url): + try: + out = isgd(url) + except (ShortenError, http.HTTPError): + out = url + return out + + +def haste(text, ext='txt'): + """ pastes text to a hastebin server """ + page = http.get(paste_url + "/documents", post_data=text) + data = json.loads(page) + return ("%s/%s.%s" % (paste_url, data['key'], ext)) + + +def query(query, params={}): + """ runs a YQL query and returns the results """ + return YQL.execute(query, params, env=yql_env)