diff --git a/plugins/util/__init__.py b/plugins/util/__init__.py deleted file mode 100755 index e69de29..0000000 diff --git a/plugins/util/color.py b/plugins/util/color.py deleted file mode 100644 index d5501df..0000000 --- a/plugins/util/color.py +++ /dev/null @@ -1,35 +0,0 @@ -# Colors. Plugin by blha303, color/control id info from http://stackoverflow.com/a/13382032 - -colors = {'white': '0', 'black': '1', 'darkblue': '2', 'darkgreen': '3', - 'red': '4', 'darkred': '5', 'darkviolet': '6', 'orange': '7', - 'yellow': '8', 'lightgreen': '9', 'cyan': '10', 'lightcyan': '11', - 'blue': '12', 'violet': '13', 'darkgray': '14', 'lightgray': '15'} - -control = {'bold': '\x02', 'color': '\x03', 'italic': '\x09', - 'strikethrough': '\x13', 'reset': '\x0f', 'underline': '\x15', - 'underline2': '\x1f', 'reverse': '\x16'} - - -def color(color): - return control['color'] + colors[color] - -def bold(): - return control['bold'] - -def italic(): - return control['italic'] - -def strike(): - return control['strikethrough'] - -def reset(): - return control['reset'] - -def underline(other=False): - if other: - return control['underline2'] - else: - return control['underline'] - -def reverse(): - return control['reverse'] diff --git a/plugins/util/execute.py b/plugins/util/execute.py deleted file mode 100644 index 9aecd7e..0000000 --- a/plugins/util/execute.py +++ /dev/null @@ -1,30 +0,0 @@ -import http, web - - -def eval_py(code, paste_multiline=True): - attempts = 0 - - while True: - try: - output = http.get("http://eval.appspot.com/eval", statement=code).rstrip('\n') - # sometimes the API returns a blank string on first attempt, lets try again - # and make sure it is actually supposed to be a blank string. ._. - if output == "": - output = http.get("http://eval.appspot.com/eval", statement=code).rstrip('\n') - break - except http.HTTPError: - if attempts > 2: - return "Failed to execute code." - else: - attempts += 1 - continue - - if "Traceback (most recent call last):" in output: - status = "Python error: " - else: - status = "Code executed sucessfully: " - - if "\n" in output and paste_multiline: - return status + web.haste(output) - else: - return output diff --git a/plugins/util/formatting.py b/plugins/util/formatting.py deleted file mode 100644 index 442adf5..0000000 --- a/plugins/util/formatting.py +++ /dev/null @@ -1,34 +0,0 @@ -def raw(format_string): - """Replace based irc formatting""" - stuff = {} - stuff['col'] = {'[white]':'\x030', - '[black]':'\x031', - '[dblue]':'\x032', - '[dgreen]':'\x033', - '[dred]':'\x034', - '[brown]':'\x035', - '[purple]':'\x036', - '[gold]':'\x037', - '[yellow]':'\x038', - '[green]':'\x039', - '[cyan]':'\x0310', - '[lblue]':'\x0311', - '[blue]':'\x0312', - '[pink]':'\x0313', - '[gray]':'\x0314', - '[lgray]':'\x0315', - '[err]':'\x034\x02' - '[/err]':'\x030\x02'} - stuff['style'] = {'[b]':'\x02', - '[clear]':'\x0f'} - stuff['sym'] = {'[point]':'\x07'} - stuff['text'] = {'[url]':'http://'} - final = {} - for x in stuff: - final.update(stuff[x]) - for x in final: - format_string = format_string.replace(x,final[x]) - return format_string -def err(format_string): - """Format the string with standard error styling""" - return "\x034\x02{}\x0f".format(format_string) \ No newline at end of file diff --git a/plugins/util/hook.py b/plugins/util/hook.py deleted file mode 100755 index 5e191d3..0000000 --- a/plugins/util/hook.py +++ /dev/null @@ -1,101 +0,0 @@ -import inspect -import re - - -def _hook_add(func, add, name=''): - if not hasattr(func, '_hook'): - func._hook = [] - func._hook.append(add) - - if not hasattr(func, '_filename'): - func._filename = func.func_code.co_filename - - if not hasattr(func, '_args'): - argspec = inspect.getargspec(func) - if name: - n_args = len(argspec.args) - if argspec.defaults: - n_args -= len(argspec.defaults) - if argspec.keywords: - n_args -= 1 - if argspec.varargs: - n_args -= 1 - if n_args != 1: - err = '%ss must take 1 non-keyword argument (%s)' % (name, - func.__name__) - raise ValueError(err) - - args = [] - if argspec.defaults: - end = bool(argspec.keywords) + bool(argspec.varargs) - args.extend(argspec.args[-len(argspec.defaults): - end if end else None]) - if argspec.keywords: - args.append(0) # means kwargs present - func._args = args - - if not hasattr(func, '_thread'): # does function run in its own thread? - func._thread = False - - -def sieve(func): - if func.func_code.co_argcount != 5: - raise ValueError( - 'sieves must take 5 arguments: (bot, input, func, type, args)') - _hook_add(func, ['sieve', (func,)]) - return func - - -def command(arg=None, **kwargs): - args = {} - - def command_wrapper(func): - args.setdefault('name', func.func_name) - _hook_add(func, ['command', (func, args)], 'command') - return func - - if kwargs or not inspect.isfunction(arg): - if arg is not None: - args['name'] = arg - args.update(kwargs) - return command_wrapper - else: - return command_wrapper(arg) - - -def event(arg=None, **kwargs): - args = kwargs - - def event_wrapper(func): - args['name'] = func.func_name - args.setdefault('events', ['*']) - _hook_add(func, ['event', (func, args)], 'event') - return func - - if inspect.isfunction(arg): - return event_wrapper(arg, kwargs) - else: - if arg is not None: - args['events'] = arg.split() - return event_wrapper - - -def singlethread(func): - func._thread = True - return func - - -def regex(regex, flags=0, **kwargs): - args = kwargs - - def regex_wrapper(func): - args['name'] = func.func_name - args['regex'] = regex - args['re'] = re.compile(regex, flags) - _hook_add(func, ['regex', (func, args)], 'regex') - return func - - if inspect.isfunction(regex): - raise ValueError("regex decorators require a regex to match against") - else: - return regex_wrapper diff --git a/plugins/util/http.py b/plugins/util/http.py deleted file mode 100755 index 4409211..0000000 --- a/plugins/util/http.py +++ /dev/null @@ -1,115 +0,0 @@ -# convenience wrapper for urllib2 & friends - -import cookielib -import json -import urllib -import urllib2 -import urlparse - -from urllib import quote, quote_plus as _quote_plus - -from lxml import etree, html -from bs4 import BeautifulSoup - -# used in plugins that import this -from urllib2 import URLError, HTTPError - -ua_cloudbot = 'Cloudbot/DEV http://github.com/CloudDev/CloudBot' - -ua_firefox = 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:17.0) Gecko/17.0' \ - ' Firefox/17.0' -ua_old_firefox = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; ' \ - 'rv:1.8.1.6) Gecko/20070725 Firefox/2.0.0.6' -ua_internetexplorer = 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)' -ua_chrome = 'Mozilla/5.0 (X11; Linux i686) AppleWebKit/537.4 (KHTML, ' \ - 'like Gecko) Chrome/22.0.1229.79 Safari/537.4' - -jar = cookielib.CookieJar() - - -def get(*args, **kwargs): - return open(*args, **kwargs).read() - - -def get_url(*args, **kwargs): - return open(*args, **kwargs).geturl() - - -def get_html(*args, **kwargs): - return html.fromstring(get(*args, **kwargs)) - - -def get_soup(*args, **kwargs): - return BeautifulSoup(get(*args, **kwargs), 'lxml') - - -def get_xml(*args, **kwargs): - return etree.fromstring(get(*args, **kwargs)) - - -def get_json(*args, **kwargs): - return json.loads(get(*args, **kwargs)) - - -def open(url, query_params=None, user_agent=None, post_data=None, - referer=None, get_method=None, cookies=False, timeout=None, **kwargs): - if query_params is None: - query_params = {} - - if user_agent is None: - user_agent = ua_cloudbot - - query_params.update(kwargs) - - url = prepare_url(url, query_params) - - request = urllib2.Request(url, post_data) - - if get_method is not None: - request.get_method = lambda: get_method - - request.add_header('User-Agent', user_agent) - - if referer is not None: - request.add_header('Referer', referer) - - if cookies: - opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(jar)) - else: - opener = urllib2.build_opener() - - if timeout: - return opener.open(request, timeout=timeout) - else: - return opener.open(request) - - -def prepare_url(url, queries): - if queries: - scheme, netloc, path, query, fragment = urlparse.urlsplit(url) - - query = dict(urlparse.parse_qsl(query)) - query.update(queries) - query = urllib.urlencode(dict((to_utf8(key), to_utf8(value)) - for key, value in query.iteritems())) - - url = urlparse.urlunsplit((scheme, netloc, path, query, fragment)) - - return url - - -def to_utf8(s): - if isinstance(s, unicode): - return s.encode('utf8', 'ignore') - else: - return str(s) - - -def quote_plus(s): - return _quote_plus(to_utf8(s)) - - -def unescape(s): - if not s.strip(): - return s - return html.fromstring(s).text_content() diff --git a/plugins/util/text.py b/plugins/util/text.py deleted file mode 100755 index 9bc40f4..0000000 --- a/plugins/util/text.py +++ /dev/null @@ -1,230 +0,0 @@ -# -*- coding: utf-8 -*- -""" formatting.py - handy functions for formatting text - this file contains code from the following URL: - -""" - -import re - -from HTMLParser import HTMLParser -import htmlentitydefs - - -class HTMLTextExtractor(HTMLParser): - def __init__(self): - HTMLParser.__init__(self) - self.result = [] - - def handle_data(self, d): - self.result.append(d) - - def handle_charref(self, number): - codepoint = int(number[1:], 16) if number[0] in (u'x', u'X') else int(number) - self.result.append(unichr(codepoint)) - - def handle_entityref(self, name): - codepoint = htmlentitydefs.name2codepoint[name] - self.result.append(unichr(codepoint)) - - def get_text(self): - return u''.join(self.result) - - -def strip_html(html): - s = HTMLTextExtractor() - s.feed(html) - return s.get_text() - - -def munge(text, munge_count=0): - """munges up text.""" - reps = 0 - for n in xrange(len(text)): - rep = character_replacements.get(text[n]) - if rep: - text = text[:n] + rep.decode('utf8') + text[n + 1:] - reps += 1 - if reps == munge_count: - break - return text - - -character_replacements = { - 'a': 'ä', - 'b': 'Б', - 'c': 'ċ', - 'd': 'đ', - 'e': 'ë', - 'f': 'ƒ', - 'g': 'ġ', - 'h': 'ħ', - 'i': 'í', - 'j': 'ĵ', - 'k': 'ķ', - 'l': 'ĺ', - 'm': 'ṁ', - 'n': 'ñ', - 'o': 'ö', - 'p': 'ρ', - 'q': 'ʠ', - 'r': 'ŗ', - 's': 'š', - 't': 'ţ', - 'u': 'ü', - 'v': '', - 'w': 'ω', - 'x': 'χ', - 'y': 'ÿ', - 'z': 'ź', - 'A': 'Å', - 'B': 'Β', - 'C': 'Ç', - 'D': 'Ď', - 'E': 'Ē', - 'F': 'Ḟ', - 'G': 'Ġ', - 'H': 'Ħ', - 'I': 'Í', - 'J': 'Ĵ', - 'K': 'Ķ', - 'L': 'Ĺ', - 'M': 'Μ', - 'N': 'Ν', - 'O': 'Ö', - 'P': 'Р', - 'Q': 'Q', - 'R': 'Ŗ', - 'S': 'Š', - 'T': 'Ţ', - 'U': 'Ů', - 'V': 'Ṿ', - 'W': 'Ŵ', - 'X': 'Χ', - 'Y': 'Ỳ', - 'Z': 'Ż'} - - -def capitalize_first(line): - """ - capitalises the first letter of words - (keeps other letters intact) - """ - return ' '.join([s[0].upper() + s[1:] for s in line.split(' ')]) - - -def multiword_replace(text, wordDic): - """ - take a text and replace words that match a key in a dictionary with - the associated value, return the changed text - """ - rc = re.compile('|'.join(map(re.escape, wordDic))) - - def translate(match): - return wordDic[match.group(0)] - return rc.sub(translate, text) - - -def truncate_words(content, length=10, suffix='...'): - """Truncates a string after a certain number of words.""" - nmsg = content.split(" ") - out = None - x = 0 - for i in nmsg: - if x <= length: - if out: - out = out + " " + nmsg[x] - else: - out = nmsg[x] - x += 1 - if x <= length: - return out - else: - return out + suffix - - -# from -def truncate_str(content, length=100, suffix='...'): - """Truncates a string after a certain number of chars.""" - if len(content) <= length: - return content - else: - return content[:length].rsplit(' ', 1)[0] + suffix - - -# ALL CODE BELOW THIS LINE IS COVERED BY THE FOLLOWING AGREEMENT: - -# Copyright (c) Django Software Foundation and individual contributors. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of Django nor the names of its contributors may be used -# to endorse or promote products derived from this software without -# specific prior written permission. -# -#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND -#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# Expression to match some_token and some_token="with spaces" (and similarly -# for single-quoted strings). - -split_re = re.compile(r"""((?:[^\s'"]*(?:(?:"(?:[^"\\]|\\.)*" | '(?:[""" \ - r"""^'\\]|\\.)*')[^\s'"]*)+) | \S+)""", re.VERBOSE) - - -def smart_split(text): - r""" - Generator that splits a string by spaces, leaving quoted phrases together. - Supports both single and double quotes, and supports escaping quotes with - backslashes. In the output, strings will keep their initial and trailing - quote marks and escaped quotes will remain escaped (the results can then - be further processed with unescape_string_literal()). - - >>> list(smart_split(r'This is "a person\'s" test.')) - [u'This', u'is', u'"a person\\\'s"', u'test.'] - >>> list(smart_split(r"Another 'person\'s' test.")) - [u'Another', u"'person\\'s'", u'test.'] - >>> list(smart_split(r'A "\"funky\" style" test.')) - [u'A', u'"\\"funky\\" style"', u'test.'] - """ - for bit in split_re.finditer(text): - yield bit.group(0) - - -def get_text_list(list_, last_word='or'): - """ - >>> get_text_list(['a', 'b', 'c', 'd']) - u'a, b, c or d' - >>> get_text_list(['a', 'b', 'c'], 'and') - u'a, b and c' - >>> get_text_list(['a', 'b'], 'and') - u'a and b' - >>> get_text_list(['a']) - u'a' - >>> get_text_list([]) - u'' - """ - if len(list_) == 0: - return '' - if len(list_) == 1: - return list_[0] - return '%s %s %s' % ( - # Translators: This string is used as a separator between list elements - ', '.join([i for i in list_][:-1]), - last_word, list_[-1]) diff --git a/plugins/util/textgen.py b/plugins/util/textgen.py deleted file mode 100644 index 29d74f0..0000000 --- a/plugins/util/textgen.py +++ /dev/null @@ -1,51 +0,0 @@ -import re -import random - -TEMPLATE_RE = re.compile(r"\{(.+?)\}") - - -class TextGenerator(object): - def __init__(self, templates, parts, default_templates=None, variables=None): - self.templates = templates - self.default_templates = default_templates - self.parts = parts - self.variables = variables - - def generate_string(self, template=None): - """ - Generates one string using the specified templates. - If no templates are specified, use a random template from the default_templates list. - """ - # this is bad - if self.default_templates: - text = self.templates[template or random.choice(self.default_templates)] - else: - text = random.choice(self.templates) - - # replace static variables in the template with provided values - if self.variables: - for key, value in self.variables.items(): - text = text.replace("{%s}" % key, value) - - # get a list of all text parts we need - required_parts = TEMPLATE_RE.findall(text) - - for required_part in required_parts: - ppart = self.parts[required_part] - # check if the part is a single string or a list - if not isinstance(ppart, basestring): - part = random.choice(self.parts[required_part]) - else: - part = self.parts[required_part] - text = text.replace("{%s}" % required_part, part) - - return text - - def generate_strings(self, amount, template=None): - strings = [] - for i in xrange(amount): - strings.append(self.generate_string()) - return strings - - def get_template(self, template): - return self.templates[template] \ No newline at end of file diff --git a/plugins/util/timeformat.py b/plugins/util/timeformat.py deleted file mode 100644 index 185fd36..0000000 --- a/plugins/util/timeformat.py +++ /dev/null @@ -1,14 +0,0 @@ -def timeformat(seconds): - days = seconds / 86400 - seconds -= 86400 * days - hours = seconds / 3600 - seconds -= 3600 * hours - minutes = seconds / 60 - seconds -= 60 * minutes - if days != 0: - return "%sd %sh %sm %ss" % (days, hours, minutes, seconds) - elif hours == 0 and minutes != 0: - return "%sm %ss" % (minutes, seconds) - elif hours == 0 and minutes == 0: - return "%ss" % seconds - return "%sh %sm %ss" % (hours, minutes, seconds) diff --git a/plugins/util/timesince.py b/plugins/util/timesince.py deleted file mode 100755 index 56ec8b0..0000000 --- a/plugins/util/timesince.py +++ /dev/null @@ -1,105 +0,0 @@ -# Copyright (c) Django Software Foundation and individual contributors. -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, -# this list of conditions and the following disclaimer. -# -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# -# 3. Neither the name of Django nor the names of its contributors may be used -# to endorse or promote products derived from this software without -# specific prior written permission. -# -#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"AND -#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -#DISCLAIMED.IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -#ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -#(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -#LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON -#ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -#(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -#SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import datetime - - -def timesince(d, now=None): - """ - Takes two datetime objects and returns the time between d and now - as a nicely formatted string, e.g. "10 minutes". If d occurs after now, - then "0 minutes" is returned. - - Units used are years, months, weeks, days, hours, and minutes. - Seconds and microseconds are ignored. Up to two adjacent units will be - displayed. For example, "2 weeks, 3 days" and "1 year, 3 months" are - possible outputs, but "2 weeks, 3 hours" and "1 year, 5 days" are not. - - Adapted from http://blog.natbat.co.uk/archive/2003/Jun/14/time_since - """ - chunks = ( - (60 * 60 * 24 * 365, ('year', 'years')), - (60 * 60 * 24 * 30, ('month', 'months')), - (60 * 60 * 24 * 7, ('week', 'weeks')), - (60 * 60 * 24, ('day', 'days')), - (60 * 60, ('hour', 'hours')), - (60, ('minute', 'minutes')) - ) - - # Convert int or float (unix epoch) to datetime.datetime for comparison - if isinstance(d, int) or isinstance(d, float): - d = datetime.datetime.fromtimestamp(d) - - if isinstance(now, int) or isinstance(now, float): - now = datetime.datetime.fromtimestamp(now) - - # Convert datetime.date to datetime.datetime for comparison. - if not isinstance(d, datetime.datetime): - d = datetime.datetime(d.year, d.month, d.day) - if now and not isinstance(now, datetime.datetime): - now = datetime.datetime(now.year, now.month, now.day) - - if not now: - now = datetime.datetime.now() - - # ignore microsecond part of 'd' since we removed it from 'now' - delta = now - (d - datetime.timedelta(0, 0, d.microsecond)) - since = delta.days * 24 * 60 * 60 + delta.seconds - if since <= 0: - # d is in the future compared to now, stop processing. - return u'0 ' + 'minutes' - for i, (seconds, name) in enumerate(chunks): - count = since // seconds - if count != 0: - break - - if count == 1: - s = '%(number)d %(type)s' % {'number': count, 'type': name[0]} - else: - s = '%(number)d %(type)s' % {'number': count, 'type': name[1]} - - if i + 1 < len(chunks): - # Now get the second item - seconds2, name2 = chunks[i + 1] - count2 = (since - (seconds * count)) // seconds2 - if count2 != 0: - if count2 == 1: - s += ', %d %s' % (count2, name2[0]) - else: - s += ', %d %s' % (count2, name2[1]) - return s - - -def timeuntil(d, now=None): - """ - Like timesince, but returns a string measuring the time until - the given time. - """ - if not now: - now = datetime.datetime.now() - return timesince(now, d) diff --git a/plugins/util/urlnorm.py b/plugins/util/urlnorm.py deleted file mode 100755 index b3c351b..0000000 --- a/plugins/util/urlnorm.py +++ /dev/null @@ -1,139 +0,0 @@ -""" -URI Normalization function: - * Always provide the URI scheme in lowercase characters. - * Always provide the host, if any, in lowercase characters. - * Only perform percent-encoding where it is essential. - * Always use uppercase A-through-F characters when percent-encoding. - * Prevent dot-segments appearing in non-relative URI paths. - * For schemes that define a default authority, use an empty authority if the - default is desired. - * For schemes that define an empty path to be equivalent to a path of "/", - use "/". - * For schemes that define a port, use an empty port if the default is desired - * All portions of the URI must be utf-8 encoded NFC from Unicode strings - -implements: - http://gbiv.com/protocols/uri/rev-2002/rfc2396bis.html#canonical-form - http://www.intertwingly.net/wiki/pie/PaceCanonicalIds - -inspired by: - Tony J. Ibbs, http://starship.python.net/crew/tibs/python/tji_url.py - Mark Nottingham, http://www.mnot.net/python/urlnorm.py -""" - -__license__ = "Python" - -import re -import unicodedata -import urlparse -from urllib import quote, unquote - -default_port = { - 'http': 80, -} - - -class Normalizer(object): - def __init__(self, regex, normalize_func): - self.regex = regex - self.normalize = normalize_func - - -normalizers = (Normalizer(re.compile( - r'(?:https?://)?(?:[a-zA-Z0-9\-]+\.)?(?:amazon|amzn){1}\.(?P[a-zA-Z\.]{2,})\/(gp/(?:product|offer-listing|customer-media/product-gallery)/|exec/obidos/tg/detail/-/|o/ASIN/|dp/|(?:[A-Za-z0-9\-]+)/dp/)?(?P[0-9A-Za-z]{10})'), - lambda m: r'http://amazon.%s/dp/%s' % (m.group('tld'), m.group('ASIN'))), - Normalizer(re.compile(r'.*waffleimages\.com.*/([0-9a-fA-F]{40})'), - lambda m: r'http://img.waffleimages.com/%s' % m.group(1)), - Normalizer(re.compile(r'(?:youtube.*?(?:v=|/v/)|youtu\.be/|yooouuutuuube.*?id=)([-_a-zA-Z0-9]+)'), - lambda m: r'http://youtube.com/watch?v=%s' % m.group(1)), -) - - -def normalize(url, assume_scheme=False): - """Normalize a URL.""" - - scheme, auth, path, query, fragment = urlparse.urlsplit(url.strip()) - userinfo, host, port = re.search('([^@]*@)?([^:]*):?(.*)', auth).groups() - - # Always provide the URI scheme in lowercase characters. - scheme = scheme.lower() - - # Always provide the host, if any, in lowercase characters. - host = host.lower() - if host and host[-1] == '.': - host = host[:-1] - if host and host.startswith("www."): - if not scheme: - scheme = "http" - host = host[4:] - elif path and path.startswith("www."): - if not scheme: - scheme = "http" - path = path[4:] - - if assume_scheme and not scheme: - scheme = assume_scheme.lower() - - # Only perform percent-encoding where it is essential. - # Always use uppercase A-through-F characters when percent-encoding. - # All portions of the URI must be utf-8 encoded NFC from Unicode strings - def clean(string): - string = unicode(unquote(string), 'utf-8', 'replace') - return unicodedata.normalize('NFC', string).encode('utf-8') - - path = quote(clean(path), "~:/?#[]@!$&'()*+,;=") - fragment = quote(clean(fragment), "~") - - # note care must be taken to only encode & and = characters as values - query = "&".join(["=".join([quote(clean(t), "~:/?#[]@!$'()*+,;=") - for t in q.split("=", 1)]) for q in query.split("&")]) - - # Prevent dot-segments appearing in non-relative URI paths. - if scheme in ["", "http", "https", "ftp", "file"]: - output = [] - for input in path.split('/'): - if input == "": - if not output: - output.append(input) - elif input == ".": - pass - elif input == "..": - if len(output) > 1: - output.pop() - else: - output.append(input) - if input in ["", ".", ".."]: - output.append("") - path = '/'.join(output) - - # For schemes that define a default authority, use an empty authority if - # the default is desired. - if userinfo in ["@", ":@"]: - userinfo = "" - - # For schemes that define an empty path to be equivalent to a path of "/", - # use "/". - if path == "" and scheme in ["http", "https", "ftp", "file"]: - path = "/" - - # For schemes that define a port, use an empty port if the default is - # desired - if port and scheme in default_port.keys(): - if port.isdigit(): - port = str(int(port)) - if int(port) == default_port[scheme]: - port = '' - - # Put it all back together again - auth = (userinfo or "") + host - if port: - auth += ":" + port - if url.endswith("#") and query == "" and fragment == "": - path += "#" - normal_url = urlparse.urlunsplit((scheme, auth, path, query, - fragment)).replace("http:///", "http://") - for norm in normalizers: - m = norm.regex.match(normal_url) - if m: - return norm.normalize(m) - return normal_url diff --git a/plugins/util/web.py b/plugins/util/web.py deleted file mode 100755 index 1180bca..0000000 --- a/plugins/util/web.py +++ /dev/null @@ -1,54 +0,0 @@ -""" web.py - handy functions for web services """ - -import http -import urlnorm -import json -import urllib -import yql - -short_url = "http://is.gd/create.php" -paste_url = "http://hastebin.com" -yql_env = "http://datatables.org/alltables.env" - -YQL = yql.Public() - - -class ShortenError(Exception): - def __init__(self, code, text): - self.code = code - self.text = text - - def __str__(self): - return self.text - - -def isgd(url): - """ shortens a URL with the is.gd API """ - url = urlnorm.normalize(url.encode('utf-8'), assume_scheme='http') - params = urllib.urlencode({'format': 'json', 'url': url}) - request = http.get_json("http://is.gd/create.php?%s" % params) - - if "errorcode" in request: - raise ShortenError(request["errorcode"], request["errormessage"]) - else: - return request["shorturl"] - - -def try_isgd(url): - try: - out = isgd(url) - except (ShortenError, http.HTTPError): - out = url - return out - - -def haste(text, ext='txt'): - """ pastes text to a hastebin server """ - page = http.get(paste_url + "/documents", post_data=text) - data = json.loads(page) - return ("%s/%s.%s" % (paste_url, data['key'], ext)) - - -def query(query, params={}): - """ runs a YQL query and returns the results """ - return YQL.execute(query, params, env=yql_env)