diff --git a/plugins/google.py b/plugins/google.py index 591474c..6423951 100755 --- a/plugins/google.py +++ b/plugins/google.py @@ -1,6 +1,7 @@ import random -from util import hook -from util import http +from util import hook, http + +from util.formatting import truncate_words def api_get(kind, query): @@ -22,7 +23,7 @@ def googleimage(inp): parsed['responseStatus'], '')) if not parsed['responseData']['results']: return 'no images found' - return random.choice(parsed['responseData']['results'][:10])\ + return random.choice(parsed['responseData']['results'][:10]) \ ['unescapedUrl'] @@ -53,7 +54,6 @@ def google(inp): out = ' '.join(out.split()) - if len(out) > 300: - out = out[:out.rfind(' ')] + '...' + out = truncate_words(out, 300) return out diff --git a/plugins/util/formatting.py b/plugins/util/formatting.py index 4d35890..e43a2e0 100755 --- a/plugins/util/formatting.py +++ b/plugins/util/formatting.py @@ -1,8 +1,48 @@ -""" formatting.py - handy functions for formatting text """ +""" formatting.py - handy functions for formatting text + this file contains code from the following URL: + +""" +import re def capitalize_first(line): - """ capitalises the first letter of words - (keeps other letters intact) + """ + capitalises the first letter of words + (keeps other letters intact) """ return ' '.join([s[0].upper() + s[1:] for s in line.split(' ')]) + + +def truncate_words(s, num): + "Truncates a string after a certain number of words." + length = int(num) + words = s.split() + if len(words) > length: + words = words[:length] + if not words[-1].endswith('...'): + words.append('...') + return ' '.join(words) + +# Expression to match some_token and some_token="with spaces" (and similarly +# for single-quoted strings). +split_re = re.compile(r"""((?:[^\s'"]*(?:(?:"(?:[^"\\]|\\.)*" | '(?:[""" \ + r"""^'\\]|\\.)*')[^\s'"]*)+) | \S+)""", re.VERBOSE) + + +def smart_split(text): + r""" + Generator that splits a string by spaces, leaving quoted phrases together. + Supports both single and double quotes, and supports escaping quotes with + backslashes. In the output, strings will keep their initial and trailing + quote marks and escaped quotes will remain escaped (the results can then + be further processed with unescape_string_literal()). + + >>> list(smart_split(r'This is "a person\'s" test.')) + [u'This', u'is', u'"a person\\\'s"', u'test.'] + >>> list(smart_split(r"Another 'person\'s' test.")) + [u'Another', u"'person\\'s'", u'test.'] + >>> list(smart_split(r'A "\"funky\" style" test.')) + [u'A', u'"\\"funky\\" style"', u'test.'] + """ + for bit in split_re.finditer(text): + yield bit.group(0) diff --git a/plugins/wikipedia.py b/plugins/wikipedia.py index 36e1102..991acd4 100755 --- a/plugins/wikipedia.py +++ b/plugins/wikipedia.py @@ -4,6 +4,7 @@ Scaevolus 2009''' import re from util import hook, http +from util.formatting import truncate_words api_prefix = "http://en.wikipedia.org/w/api.php" @@ -43,8 +44,7 @@ def wiki(inp): desc = title + desc desc = re.sub('\s+', ' ', desc).strip() # remove excess spaces - - if len(desc) > 300: - desc = desc[:300] + '...' + + desc = truncate_words(desc, 300) return '%s -- %s' % (desc, http.quote(url, ':/')) diff --git a/plugins/wolframalpha.py b/plugins/wolframalpha.py index 68b071a..7445545 100755 --- a/plugins/wolframalpha.py +++ b/plugins/wolframalpha.py @@ -5,6 +5,7 @@ from util import hook, http from urllib2 import HTTPError from util.web import bitly, ShortenError +from util.formatting import truncate_words @hook.command('wa') @@ -15,14 +16,14 @@ def wolframalpha(inp, bot=None): api_key = bot.config.get("api_keys", {}).get("wolframalpha", None) bitly_user = bot.config.get("api_keys", {}).get("bitly_user", None) bitly_key = bot.config.get("api_keys", {}).get("bitly_api", None) - + if not api_key: return "error: missing api key" url = 'http://api.wolframalpha.com/v2/query?format=plaintext' result = http.get_xml(url, input=inp, appid=api_key) - + # get the URL for a user to view this query in a browser query_url = "http://www.wolframalpha.com/input/?i=" + \ urllib.quote(inp.encode('utf-8')) @@ -46,7 +47,7 @@ def wolframalpha(inp, bot=None): if results: pod_texts.append(title + ': ' + ','.join(results)) - ret = ' | '.join(pod_texts) + ret = ' - '.join(pod_texts) if not pod_texts: return 'No results.' @@ -58,10 +59,8 @@ def wolframalpha(inp, bot=None): ret = re.sub(r'\\:([0-9a-z]{4})', unicode_sub, ret) - if len(ret) > 410: - ret = ret[:ret.rfind(' ', 0, 410)] - ret = re.sub(r'\W+$', '', ret) + '...' - + ret = truncate_words(ret, 410) + if not ret: return 'No results.'