2012-04-21 18:26:24 +02:00
|
|
|
""" formatting.py - handy functions for formatting text
|
|
|
|
this file contains code from the following URL:
|
|
|
|
<http://code.djangoproject.com/svn/django/trunk/django/utils/text.py>
|
|
|
|
"""
|
|
|
|
import re
|
2012-04-20 16:35:35 +02:00
|
|
|
|
2012-04-21 06:03:08 +02:00
|
|
|
|
2012-04-20 16:35:35 +02:00
|
|
|
def capitalize_first(line):
|
2012-04-21 18:26:24 +02:00
|
|
|
"""
|
|
|
|
capitalises the first letter of words
|
|
|
|
(keeps other letters intact)
|
2012-04-20 16:35:35 +02:00
|
|
|
"""
|
|
|
|
return ' '.join([s[0].upper() + s[1:] for s in line.split(' ')])
|
2012-04-21 18:26:24 +02:00
|
|
|
|
|
|
|
|
|
|
|
def truncate_words(s, num):
|
|
|
|
"Truncates a string after a certain number of words."
|
|
|
|
length = int(num)
|
|
|
|
words = s.split()
|
|
|
|
if len(words) > length:
|
|
|
|
words = words[:length]
|
|
|
|
if not words[-1].endswith('...'):
|
|
|
|
words.append('...')
|
|
|
|
return ' '.join(words)
|
|
|
|
|
|
|
|
# Expression to match some_token and some_token="with spaces" (and similarly
|
|
|
|
# for single-quoted strings).
|
|
|
|
split_re = re.compile(r"""((?:[^\s'"]*(?:(?:"(?:[^"\\]|\\.)*" | '(?:[""" \
|
|
|
|
r"""^'\\]|\\.)*')[^\s'"]*)+) | \S+)""", re.VERBOSE)
|
|
|
|
|
|
|
|
|
|
|
|
def smart_split(text):
|
|
|
|
r"""
|
|
|
|
Generator that splits a string by spaces, leaving quoted phrases together.
|
|
|
|
Supports both single and double quotes, and supports escaping quotes with
|
|
|
|
backslashes. In the output, strings will keep their initial and trailing
|
|
|
|
quote marks and escaped quotes will remain escaped (the results can then
|
|
|
|
be further processed with unescape_string_literal()).
|
|
|
|
|
|
|
|
>>> list(smart_split(r'This is "a person\'s" test.'))
|
|
|
|
[u'This', u'is', u'"a person\\\'s"', u'test.']
|
|
|
|
>>> list(smart_split(r"Another 'person\'s' test."))
|
|
|
|
[u'Another', u"'person\\'s'", u'test.']
|
|
|
|
>>> list(smart_split(r'A "\"funky\" style" test.'))
|
|
|
|
[u'A', u'"\\"funky\\" style"', u'test.']
|
|
|
|
"""
|
|
|
|
for bit in split_re.finditer(text):
|
|
|
|
yield bit.group(0)
|