#!/usr/bin/env python3
# Copyright (c) 2008-11 Qtrac Ltd. All rights reserved.
# This program or module is free software: you can redistribute it and/or
# modify it under the terms of the GNU General Public License as published
# by the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version. It is provided for educational
# purposes and is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
"""
This module provides a few string manipulation functions.
>>> is_balanced("(Python (is (not (lisp))))")
True
>>> shorten("The Crossing", 10)
'The Cro...'
>>> simplify(" some text with spurious whitespace ")
'some text with spurious whitespace'
"""
import string
def is_balanced(text, brackets="()[]{}<>"):
"""Returns True if all the brackets in the text are balanced
For each pair of brackets, the left and right bracket characters
must be different.
>>> is_balanced("no brackets at all")
True
>>> is_balanced("bold")
True
>>> is_balanced("[(some {thing}) goes]")
True
>>> is_balanced("[not (where {it}) is}]")
False
>>> is_balanced("(not ((like) (anything))")
False
"""
counts = {}
left_for_right = {}
for left, right in zip(brackets[::2], brackets[1::2]):
assert left != right, "the bracket characters must differ"
counts[left] = 0
left_for_right[right] = left
for c in text:
if c in counts:
counts[c] += 1
elif c in left_for_right:
left = left_for_right[c]
if counts[left] == 0:
return False
counts[left] -= 1
return not any(counts.values())
def shorten(text, length=25, indicator="..."):
"""Returns text or a truncated copy with the indicator added
text is any string; length is the maximum length of the returned
string (including any indicator); indicator is the string added at
the end to indicate that the text has been shortened
>>> shorten("Second Variety")
'Second Variety'
>>> shorten("Voices from the Street", 17)
'Voices from th...'
>>> shorten("Radio Free Albemuth", 10, "*")
'Radio Fre*'
"""
if len(text) > length:
text = text[:length - len(indicator)] + indicator
return text
def simplify(text, whitespace=string.whitespace, delete=""):
r"""Returns the text with multiple spaces reduced to single spaces
The whitespace parameter is a string of characters, each of which
is considered to be a space.
If delete is not empty it should be a string, in which case any
characters in the delete string are excluded from the resultant
string.
>>> simplify(" this and\n that\t too")
'this and that too'
>>> simplify(" Washington D.C.\n")
'Washington D.C.'
>>> simplify(" Washington D.C.\n", delete=",;:.")
'Washington DC'
>>> simplify(" disemvoweled ", delete="aeiou")
'dsmvwld'
"""
result = []
word = ""
for char in text:
if char in delete:
continue
elif char in whitespace:
if word:
result.append(word)
word = ""
else:
word += char
if word:
result.append(word)
return " ".join(result)
def insert_at(string, position, insert):
"""Returns a copy of string with insert inserted at the position
>>> string = "ABCDE"
>>> result = []
>>> for i in range(-2, len(string) + 2):
... result.append(insert_at(string, i, "-"))
>>> result[:5]
['ABC-DE', 'ABCD-E', '-ABCDE', 'A-BCDE', 'AB-CDE']
>>> result[5:]
['ABC-DE', 'ABCD-E', 'ABCDE-', 'ABCDE-']
"""
return string[:position] + insert + string[position:]
def dummy_insert_at(string, position, insert):
"""Returns a copy of string with insert inserted at the position
>>> string = "ABCDE"
>>> result = []
>>> for i in range(-2, len(string) + 2):
... result.append(insert_at(string, i, "-"))
>>> result[:5]
['ABC-DE', 'ABCD-E', '-ABCDE', 'A-BCDE', 'AB-CDE']
>>> result[5:]
['ABC-DE', 'ABCD-E', 'ABCDE-', 'ABCDE-']
"""
return string
if __name__ == "__main__":
import doctest
doctest.testmod()