Extended formatting library

2012-04-22 04:26:24 +12:00 · 2012-04-22 04:26:24 +12:00 · c0ebc097b1
commit c0ebc097b1
parent ddee3b6ab1
4 changed files with 57 additions and 18 deletions
--- a/plugins/google.py
+++ b/plugins/google.py
@ -1,6 +1,7 @@
 import random
-from util import hook
-from util import http
+from util import hook, http
+
+from util.formatting import truncate_words


 def api_get(kind, query):
@ -53,7 +54,6 @@ def google(inp):

    out = ' '.join(out.split())

-    if len(out) > 300:
-        out = out[:out.rfind(' ')] + '...'
+    out = truncate_words(out, 300)

    return out
--- a/plugins/util/formatting.py
+++ b/plugins/util/formatting.py
@ -1,8 +1,48 @@
-""" formatting.py - handy functions for formatting text """
+""" formatting.py - handy functions for formatting text
+    this file contains code from the following URL:
+    <http://code.djangoproject.com/svn/django/trunk/django/utils/text.py>
+"""
+import re


 def capitalize_first(line):
-    """ capitalises the first letter of words
+    """
+    capitalises the first letter of words
    (keeps other letters intact)
    """
    return ' '.join([s[0].upper() + s[1:] for s in line.split(' ')])
+
+
+def truncate_words(s, num):
+    "Truncates a string after a certain number of words."
+    length = int(num)
+    words = s.split()
+    if len(words) > length:
+        words = words[:length]
+        if not words[-1].endswith('...'):
+            words.append('...')
+    return ' '.join(words)
+
+# Expression to match some_token and some_token="with spaces" (and similarly
+# for single-quoted strings).
+split_re = re.compile(r"""((?:[^\s'"]*(?:(?:"(?:[^"\\]|\\.)*" | '(?:[""" \
+                      r"""^'\\]|\\.)*')[^\s'"]*)+) | \S+)""", re.VERBOSE)
+
+
+def smart_split(text):
+    r"""
+    Generator that splits a string by spaces, leaving quoted phrases together.
+    Supports both single and double quotes, and supports escaping quotes with
+    backslashes. In the output, strings will keep their initial and trailing
+    quote marks and escaped quotes will remain escaped (the results can then
+    be further processed with unescape_string_literal()).
+
+    >>> list(smart_split(r'This is "a person\'s" test.'))
+    [u'This', u'is', u'"a person\\\'s"', u'test.']
+    >>> list(smart_split(r"Another 'person\'s' test."))
+    [u'Another', u"'person\\'s'", u'test.']
+    >>> list(smart_split(r'A "\"funky\" style" test.'))
+    [u'A', u'"\\"funky\\" style"', u'test.']
+    """
+    for bit in split_re.finditer(text):
+        yield bit.group(0)
--- a/plugins/wikipedia.py
+++ b/plugins/wikipedia.py
@ -4,6 +4,7 @@ Scaevolus 2009'''
 import re

 from util import hook, http
+from util.formatting import truncate_words


 api_prefix = "http://en.wikipedia.org/w/api.php"
@ -44,7 +45,6 @@ def wiki(inp):

    desc = re.sub('\s+', ' ', desc).strip()  # remove excess spaces
    
-    if len(desc) > 300:
-        desc = desc[:300] + '...'
+    desc = truncate_words(desc, 300)

    return '%s -- %s' % (desc, http.quote(url, ':/'))
--- a/plugins/wolframalpha.py
+++ b/plugins/wolframalpha.py
@ -5,6 +5,7 @@ from util import hook, http

 from urllib2 import HTTPError
 from util.web import bitly, ShortenError
+from util.formatting import truncate_words


@hook.command('wa')
@ -46,7 +47,7 @@ def wolframalpha(inp, bot=None):
        if results:
            pod_texts.append(title + ': ' + ','.join(results))

-    ret = ' | '.join(pod_texts)
+    ret = ' - '.join(pod_texts)

    if not pod_texts:
        return 'No results.'
@ -58,9 +59,7 @@ def wolframalpha(inp, bot=None):

    ret = re.sub(r'\\:([0-9a-z]{4})', unicode_sub, ret)

-    if len(ret) > 410:
-        ret = ret[:ret.rfind(' ', 0, 410)]
-        ret = re.sub(r'\W+$', '', ret) + '...'
+    ret = truncate_words(ret, 410)
    
    if not ret:
        return 'No results.'