""" formatting.py - handy functions for formatting text this file contains code from the following URL: """ import re def capitalize_first(line): """ capitalises the first letter of words (keeps other letters intact) """ return ' '.join([s[0].upper() + s[1:] for s in line.split(' ')]) def truncate_words(s, num): "Truncates a string after a certain number of words." length = int(num) words = s.split() if len(words) > length: words = words[:length] if not words[-1].endswith('...'): words.append('...') return ' '.join(words) # Expression to match some_token and some_token="with spaces" (and similarly # for single-quoted strings). split_re = re.compile(r"""((?:[^\s'"]*(?:(?:"(?:[^"\\]|\\.)*" | '(?:[""" \ r"""^'\\]|\\.)*')[^\s'"]*)+) | \S+)""", re.VERBOSE) def smart_split(text): r""" Generator that splits a string by spaces, leaving quoted phrases together. Supports both single and double quotes, and supports escaping quotes with backslashes. In the output, strings will keep their initial and trailing quote marks and escaped quotes will remain escaped (the results can then be further processed with unescape_string_literal()). >>> list(smart_split(r'This is "a person\'s" test.')) [u'This', u'is', u'"a person\\\'s"', u'test.'] >>> list(smart_split(r"Another 'person\'s' test.")) [u'Another', u"'person\\'s'", u'test.'] >>> list(smart_split(r'A "\"funky\" style" test.')) [u'A', u'"\\"funky\\" style"', u'test.'] """ for bit in split_re.finditer(text): yield bit.group(0)