17db96d56Sopenharmony_ci"""Text wrapping and filling. 27db96d56Sopenharmony_ci""" 37db96d56Sopenharmony_ci 47db96d56Sopenharmony_ci# Copyright (C) 1999-2001 Gregory P. Ward. 57db96d56Sopenharmony_ci# Copyright (C) 2002, 2003 Python Software Foundation. 67db96d56Sopenharmony_ci# Written by Greg Ward <gward@python.net> 77db96d56Sopenharmony_ci 87db96d56Sopenharmony_ciimport re 97db96d56Sopenharmony_ci 107db96d56Sopenharmony_ci__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten'] 117db96d56Sopenharmony_ci 127db96d56Sopenharmony_ci# Hardcode the recognized whitespace characters to the US-ASCII 137db96d56Sopenharmony_ci# whitespace characters. The main reason for doing this is that 147db96d56Sopenharmony_ci# some Unicode spaces (like \u00a0) are non-breaking whitespaces. 157db96d56Sopenharmony_ci_whitespace = '\t\n\x0b\x0c\r ' 167db96d56Sopenharmony_ci 177db96d56Sopenharmony_ciclass TextWrapper: 187db96d56Sopenharmony_ci """ 197db96d56Sopenharmony_ci Object for wrapping/filling text. The public interface consists of 207db96d56Sopenharmony_ci the wrap() and fill() methods; the other methods are just there for 217db96d56Sopenharmony_ci subclasses to override in order to tweak the default behaviour. 227db96d56Sopenharmony_ci If you want to completely replace the main wrapping algorithm, 237db96d56Sopenharmony_ci you'll probably have to override _wrap_chunks(). 247db96d56Sopenharmony_ci 257db96d56Sopenharmony_ci Several instance attributes control various aspects of wrapping: 267db96d56Sopenharmony_ci width (default: 70) 277db96d56Sopenharmony_ci the maximum width of wrapped lines (unless break_long_words 287db96d56Sopenharmony_ci is false) 297db96d56Sopenharmony_ci initial_indent (default: "") 307db96d56Sopenharmony_ci string that will be prepended to the first line of wrapped 317db96d56Sopenharmony_ci output. Counts towards the line's width. 327db96d56Sopenharmony_ci subsequent_indent (default: "") 337db96d56Sopenharmony_ci string that will be prepended to all lines save the first 347db96d56Sopenharmony_ci of wrapped output; also counts towards each line's width. 357db96d56Sopenharmony_ci expand_tabs (default: true) 367db96d56Sopenharmony_ci Expand tabs in input text to spaces before further processing. 377db96d56Sopenharmony_ci Each tab will become 0 .. 'tabsize' spaces, depending on its position 387db96d56Sopenharmony_ci in its line. If false, each tab is treated as a single character. 397db96d56Sopenharmony_ci tabsize (default: 8) 407db96d56Sopenharmony_ci Expand tabs in input text to 0 .. 'tabsize' spaces, unless 417db96d56Sopenharmony_ci 'expand_tabs' is false. 427db96d56Sopenharmony_ci replace_whitespace (default: true) 437db96d56Sopenharmony_ci Replace all whitespace characters in the input text by spaces 447db96d56Sopenharmony_ci after tab expansion. Note that if expand_tabs is false and 457db96d56Sopenharmony_ci replace_whitespace is true, every tab will be converted to a 467db96d56Sopenharmony_ci single space! 477db96d56Sopenharmony_ci fix_sentence_endings (default: false) 487db96d56Sopenharmony_ci Ensure that sentence-ending punctuation is always followed 497db96d56Sopenharmony_ci by two spaces. Off by default because the algorithm is 507db96d56Sopenharmony_ci (unavoidably) imperfect. 517db96d56Sopenharmony_ci break_long_words (default: true) 527db96d56Sopenharmony_ci Break words longer than 'width'. If false, those words will not 537db96d56Sopenharmony_ci be broken, and some lines might be longer than 'width'. 547db96d56Sopenharmony_ci break_on_hyphens (default: true) 557db96d56Sopenharmony_ci Allow breaking hyphenated words. If true, wrapping will occur 567db96d56Sopenharmony_ci preferably on whitespaces and right after hyphens part of 577db96d56Sopenharmony_ci compound words. 587db96d56Sopenharmony_ci drop_whitespace (default: true) 597db96d56Sopenharmony_ci Drop leading and trailing whitespace from lines. 607db96d56Sopenharmony_ci max_lines (default: None) 617db96d56Sopenharmony_ci Truncate wrapped lines. 627db96d56Sopenharmony_ci placeholder (default: ' [...]') 637db96d56Sopenharmony_ci Append to the last line of truncated text. 647db96d56Sopenharmony_ci """ 657db96d56Sopenharmony_ci 667db96d56Sopenharmony_ci unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' ')) 677db96d56Sopenharmony_ci 687db96d56Sopenharmony_ci # This funky little regex is just the trick for splitting 697db96d56Sopenharmony_ci # text up into word-wrappable chunks. E.g. 707db96d56Sopenharmony_ci # "Hello there -- you goof-ball, use the -b option!" 717db96d56Sopenharmony_ci # splits into 727db96d56Sopenharmony_ci # Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option! 737db96d56Sopenharmony_ci # (after stripping out empty strings). 747db96d56Sopenharmony_ci word_punct = r'[\w!"\'&.,?]' 757db96d56Sopenharmony_ci letter = r'[^\d\W]' 767db96d56Sopenharmony_ci whitespace = r'[%s]' % re.escape(_whitespace) 777db96d56Sopenharmony_ci nowhitespace = '[^' + whitespace[1:] 787db96d56Sopenharmony_ci wordsep_re = re.compile(r''' 797db96d56Sopenharmony_ci ( # any whitespace 807db96d56Sopenharmony_ci %(ws)s+ 817db96d56Sopenharmony_ci | # em-dash between words 827db96d56Sopenharmony_ci (?<=%(wp)s) -{2,} (?=\w) 837db96d56Sopenharmony_ci | # word, possibly hyphenated 847db96d56Sopenharmony_ci %(nws)s+? (?: 857db96d56Sopenharmony_ci # hyphenated word 867db96d56Sopenharmony_ci -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-)) 877db96d56Sopenharmony_ci (?= %(lt)s -? %(lt)s) 887db96d56Sopenharmony_ci | # end of word 897db96d56Sopenharmony_ci (?=%(ws)s|\Z) 907db96d56Sopenharmony_ci | # em-dash 917db96d56Sopenharmony_ci (?<=%(wp)s) (?=-{2,}\w) 927db96d56Sopenharmony_ci ) 937db96d56Sopenharmony_ci )''' % {'wp': word_punct, 'lt': letter, 947db96d56Sopenharmony_ci 'ws': whitespace, 'nws': nowhitespace}, 957db96d56Sopenharmony_ci re.VERBOSE) 967db96d56Sopenharmony_ci del word_punct, letter, nowhitespace 977db96d56Sopenharmony_ci 987db96d56Sopenharmony_ci # This less funky little regex just split on recognized spaces. E.g. 997db96d56Sopenharmony_ci # "Hello there -- you goof-ball, use the -b option!" 1007db96d56Sopenharmony_ci # splits into 1017db96d56Sopenharmony_ci # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/ 1027db96d56Sopenharmony_ci wordsep_simple_re = re.compile(r'(%s+)' % whitespace) 1037db96d56Sopenharmony_ci del whitespace 1047db96d56Sopenharmony_ci 1057db96d56Sopenharmony_ci # XXX this is not locale- or charset-aware -- string.lowercase 1067db96d56Sopenharmony_ci # is US-ASCII only (and therefore English-only) 1077db96d56Sopenharmony_ci sentence_end_re = re.compile(r'[a-z]' # lowercase letter 1087db96d56Sopenharmony_ci r'[\.\!\?]' # sentence-ending punct. 1097db96d56Sopenharmony_ci r'[\"\']?' # optional end-of-quote 1107db96d56Sopenharmony_ci r'\Z') # end of chunk 1117db96d56Sopenharmony_ci 1127db96d56Sopenharmony_ci def __init__(self, 1137db96d56Sopenharmony_ci width=70, 1147db96d56Sopenharmony_ci initial_indent="", 1157db96d56Sopenharmony_ci subsequent_indent="", 1167db96d56Sopenharmony_ci expand_tabs=True, 1177db96d56Sopenharmony_ci replace_whitespace=True, 1187db96d56Sopenharmony_ci fix_sentence_endings=False, 1197db96d56Sopenharmony_ci break_long_words=True, 1207db96d56Sopenharmony_ci drop_whitespace=True, 1217db96d56Sopenharmony_ci break_on_hyphens=True, 1227db96d56Sopenharmony_ci tabsize=8, 1237db96d56Sopenharmony_ci *, 1247db96d56Sopenharmony_ci max_lines=None, 1257db96d56Sopenharmony_ci placeholder=' [...]'): 1267db96d56Sopenharmony_ci self.width = width 1277db96d56Sopenharmony_ci self.initial_indent = initial_indent 1287db96d56Sopenharmony_ci self.subsequent_indent = subsequent_indent 1297db96d56Sopenharmony_ci self.expand_tabs = expand_tabs 1307db96d56Sopenharmony_ci self.replace_whitespace = replace_whitespace 1317db96d56Sopenharmony_ci self.fix_sentence_endings = fix_sentence_endings 1327db96d56Sopenharmony_ci self.break_long_words = break_long_words 1337db96d56Sopenharmony_ci self.drop_whitespace = drop_whitespace 1347db96d56Sopenharmony_ci self.break_on_hyphens = break_on_hyphens 1357db96d56Sopenharmony_ci self.tabsize = tabsize 1367db96d56Sopenharmony_ci self.max_lines = max_lines 1377db96d56Sopenharmony_ci self.placeholder = placeholder 1387db96d56Sopenharmony_ci 1397db96d56Sopenharmony_ci 1407db96d56Sopenharmony_ci # -- Private methods ----------------------------------------------- 1417db96d56Sopenharmony_ci # (possibly useful for subclasses to override) 1427db96d56Sopenharmony_ci 1437db96d56Sopenharmony_ci def _munge_whitespace(self, text): 1447db96d56Sopenharmony_ci """_munge_whitespace(text : string) -> string 1457db96d56Sopenharmony_ci 1467db96d56Sopenharmony_ci Munge whitespace in text: expand tabs and convert all other 1477db96d56Sopenharmony_ci whitespace characters to spaces. Eg. " foo\\tbar\\n\\nbaz" 1487db96d56Sopenharmony_ci becomes " foo bar baz". 1497db96d56Sopenharmony_ci """ 1507db96d56Sopenharmony_ci if self.expand_tabs: 1517db96d56Sopenharmony_ci text = text.expandtabs(self.tabsize) 1527db96d56Sopenharmony_ci if self.replace_whitespace: 1537db96d56Sopenharmony_ci text = text.translate(self.unicode_whitespace_trans) 1547db96d56Sopenharmony_ci return text 1557db96d56Sopenharmony_ci 1567db96d56Sopenharmony_ci 1577db96d56Sopenharmony_ci def _split(self, text): 1587db96d56Sopenharmony_ci """_split(text : string) -> [string] 1597db96d56Sopenharmony_ci 1607db96d56Sopenharmony_ci Split the text to wrap into indivisible chunks. Chunks are 1617db96d56Sopenharmony_ci not quite the same as words; see _wrap_chunks() for full 1627db96d56Sopenharmony_ci details. As an example, the text 1637db96d56Sopenharmony_ci Look, goof-ball -- use the -b option! 1647db96d56Sopenharmony_ci breaks into the following chunks: 1657db96d56Sopenharmony_ci 'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ', 1667db96d56Sopenharmony_ci 'use', ' ', 'the', ' ', '-b', ' ', 'option!' 1677db96d56Sopenharmony_ci if break_on_hyphens is True, or in: 1687db96d56Sopenharmony_ci 'Look,', ' ', 'goof-ball', ' ', '--', ' ', 1697db96d56Sopenharmony_ci 'use', ' ', 'the', ' ', '-b', ' ', option!' 1707db96d56Sopenharmony_ci otherwise. 1717db96d56Sopenharmony_ci """ 1727db96d56Sopenharmony_ci if self.break_on_hyphens is True: 1737db96d56Sopenharmony_ci chunks = self.wordsep_re.split(text) 1747db96d56Sopenharmony_ci else: 1757db96d56Sopenharmony_ci chunks = self.wordsep_simple_re.split(text) 1767db96d56Sopenharmony_ci chunks = [c for c in chunks if c] 1777db96d56Sopenharmony_ci return chunks 1787db96d56Sopenharmony_ci 1797db96d56Sopenharmony_ci def _fix_sentence_endings(self, chunks): 1807db96d56Sopenharmony_ci """_fix_sentence_endings(chunks : [string]) 1817db96d56Sopenharmony_ci 1827db96d56Sopenharmony_ci Correct for sentence endings buried in 'chunks'. Eg. when the 1837db96d56Sopenharmony_ci original text contains "... foo.\\nBar ...", munge_whitespace() 1847db96d56Sopenharmony_ci and split() will convert that to [..., "foo.", " ", "Bar", ...] 1857db96d56Sopenharmony_ci which has one too few spaces; this method simply changes the one 1867db96d56Sopenharmony_ci space to two. 1877db96d56Sopenharmony_ci """ 1887db96d56Sopenharmony_ci i = 0 1897db96d56Sopenharmony_ci patsearch = self.sentence_end_re.search 1907db96d56Sopenharmony_ci while i < len(chunks)-1: 1917db96d56Sopenharmony_ci if chunks[i+1] == " " and patsearch(chunks[i]): 1927db96d56Sopenharmony_ci chunks[i+1] = " " 1937db96d56Sopenharmony_ci i += 2 1947db96d56Sopenharmony_ci else: 1957db96d56Sopenharmony_ci i += 1 1967db96d56Sopenharmony_ci 1977db96d56Sopenharmony_ci def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): 1987db96d56Sopenharmony_ci """_handle_long_word(chunks : [string], 1997db96d56Sopenharmony_ci cur_line : [string], 2007db96d56Sopenharmony_ci cur_len : int, width : int) 2017db96d56Sopenharmony_ci 2027db96d56Sopenharmony_ci Handle a chunk of text (most likely a word, not whitespace) that 2037db96d56Sopenharmony_ci is too long to fit in any line. 2047db96d56Sopenharmony_ci """ 2057db96d56Sopenharmony_ci # Figure out when indent is larger than the specified width, and make 2067db96d56Sopenharmony_ci # sure at least one character is stripped off on every pass 2077db96d56Sopenharmony_ci if width < 1: 2087db96d56Sopenharmony_ci space_left = 1 2097db96d56Sopenharmony_ci else: 2107db96d56Sopenharmony_ci space_left = width - cur_len 2117db96d56Sopenharmony_ci 2127db96d56Sopenharmony_ci # If we're allowed to break long words, then do so: put as much 2137db96d56Sopenharmony_ci # of the next chunk onto the current line as will fit. 2147db96d56Sopenharmony_ci if self.break_long_words: 2157db96d56Sopenharmony_ci end = space_left 2167db96d56Sopenharmony_ci chunk = reversed_chunks[-1] 2177db96d56Sopenharmony_ci if self.break_on_hyphens and len(chunk) > space_left: 2187db96d56Sopenharmony_ci # break after last hyphen, but only if there are 2197db96d56Sopenharmony_ci # non-hyphens before it 2207db96d56Sopenharmony_ci hyphen = chunk.rfind('-', 0, space_left) 2217db96d56Sopenharmony_ci if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]): 2227db96d56Sopenharmony_ci end = hyphen + 1 2237db96d56Sopenharmony_ci cur_line.append(chunk[:end]) 2247db96d56Sopenharmony_ci reversed_chunks[-1] = chunk[end:] 2257db96d56Sopenharmony_ci 2267db96d56Sopenharmony_ci # Otherwise, we have to preserve the long word intact. Only add 2277db96d56Sopenharmony_ci # it to the current line if there's nothing already there -- 2287db96d56Sopenharmony_ci # that minimizes how much we violate the width constraint. 2297db96d56Sopenharmony_ci elif not cur_line: 2307db96d56Sopenharmony_ci cur_line.append(reversed_chunks.pop()) 2317db96d56Sopenharmony_ci 2327db96d56Sopenharmony_ci # If we're not allowed to break long words, and there's already 2337db96d56Sopenharmony_ci # text on the current line, do nothing. Next time through the 2347db96d56Sopenharmony_ci # main loop of _wrap_chunks(), we'll wind up here again, but 2357db96d56Sopenharmony_ci # cur_len will be zero, so the next line will be entirely 2367db96d56Sopenharmony_ci # devoted to the long word that we can't handle right now. 2377db96d56Sopenharmony_ci 2387db96d56Sopenharmony_ci def _wrap_chunks(self, chunks): 2397db96d56Sopenharmony_ci """_wrap_chunks(chunks : [string]) -> [string] 2407db96d56Sopenharmony_ci 2417db96d56Sopenharmony_ci Wrap a sequence of text chunks and return a list of lines of 2427db96d56Sopenharmony_ci length 'self.width' or less. (If 'break_long_words' is false, 2437db96d56Sopenharmony_ci some lines may be longer than this.) Chunks correspond roughly 2447db96d56Sopenharmony_ci to words and the whitespace between them: each chunk is 2457db96d56Sopenharmony_ci indivisible (modulo 'break_long_words'), but a line break can 2467db96d56Sopenharmony_ci come between any two chunks. Chunks should not have internal 2477db96d56Sopenharmony_ci whitespace; ie. a chunk is either all whitespace or a "word". 2487db96d56Sopenharmony_ci Whitespace chunks will be removed from the beginning and end of 2497db96d56Sopenharmony_ci lines, but apart from that whitespace is preserved. 2507db96d56Sopenharmony_ci """ 2517db96d56Sopenharmony_ci lines = [] 2527db96d56Sopenharmony_ci if self.width <= 0: 2537db96d56Sopenharmony_ci raise ValueError("invalid width %r (must be > 0)" % self.width) 2547db96d56Sopenharmony_ci if self.max_lines is not None: 2557db96d56Sopenharmony_ci if self.max_lines > 1: 2567db96d56Sopenharmony_ci indent = self.subsequent_indent 2577db96d56Sopenharmony_ci else: 2587db96d56Sopenharmony_ci indent = self.initial_indent 2597db96d56Sopenharmony_ci if len(indent) + len(self.placeholder.lstrip()) > self.width: 2607db96d56Sopenharmony_ci raise ValueError("placeholder too large for max width") 2617db96d56Sopenharmony_ci 2627db96d56Sopenharmony_ci # Arrange in reverse order so items can be efficiently popped 2637db96d56Sopenharmony_ci # from a stack of chucks. 2647db96d56Sopenharmony_ci chunks.reverse() 2657db96d56Sopenharmony_ci 2667db96d56Sopenharmony_ci while chunks: 2677db96d56Sopenharmony_ci 2687db96d56Sopenharmony_ci # Start the list of chunks that will make up the current line. 2697db96d56Sopenharmony_ci # cur_len is just the length of all the chunks in cur_line. 2707db96d56Sopenharmony_ci cur_line = [] 2717db96d56Sopenharmony_ci cur_len = 0 2727db96d56Sopenharmony_ci 2737db96d56Sopenharmony_ci # Figure out which static string will prefix this line. 2747db96d56Sopenharmony_ci if lines: 2757db96d56Sopenharmony_ci indent = self.subsequent_indent 2767db96d56Sopenharmony_ci else: 2777db96d56Sopenharmony_ci indent = self.initial_indent 2787db96d56Sopenharmony_ci 2797db96d56Sopenharmony_ci # Maximum width for this line. 2807db96d56Sopenharmony_ci width = self.width - len(indent) 2817db96d56Sopenharmony_ci 2827db96d56Sopenharmony_ci # First chunk on line is whitespace -- drop it, unless this 2837db96d56Sopenharmony_ci # is the very beginning of the text (ie. no lines started yet). 2847db96d56Sopenharmony_ci if self.drop_whitespace and chunks[-1].strip() == '' and lines: 2857db96d56Sopenharmony_ci del chunks[-1] 2867db96d56Sopenharmony_ci 2877db96d56Sopenharmony_ci while chunks: 2887db96d56Sopenharmony_ci l = len(chunks[-1]) 2897db96d56Sopenharmony_ci 2907db96d56Sopenharmony_ci # Can at least squeeze this chunk onto the current line. 2917db96d56Sopenharmony_ci if cur_len + l <= width: 2927db96d56Sopenharmony_ci cur_line.append(chunks.pop()) 2937db96d56Sopenharmony_ci cur_len += l 2947db96d56Sopenharmony_ci 2957db96d56Sopenharmony_ci # Nope, this line is full. 2967db96d56Sopenharmony_ci else: 2977db96d56Sopenharmony_ci break 2987db96d56Sopenharmony_ci 2997db96d56Sopenharmony_ci # The current line is full, and the next chunk is too big to 3007db96d56Sopenharmony_ci # fit on *any* line (not just this one). 3017db96d56Sopenharmony_ci if chunks and len(chunks[-1]) > width: 3027db96d56Sopenharmony_ci self._handle_long_word(chunks, cur_line, cur_len, width) 3037db96d56Sopenharmony_ci cur_len = sum(map(len, cur_line)) 3047db96d56Sopenharmony_ci 3057db96d56Sopenharmony_ci # If the last chunk on this line is all whitespace, drop it. 3067db96d56Sopenharmony_ci if self.drop_whitespace and cur_line and cur_line[-1].strip() == '': 3077db96d56Sopenharmony_ci cur_len -= len(cur_line[-1]) 3087db96d56Sopenharmony_ci del cur_line[-1] 3097db96d56Sopenharmony_ci 3107db96d56Sopenharmony_ci if cur_line: 3117db96d56Sopenharmony_ci if (self.max_lines is None or 3127db96d56Sopenharmony_ci len(lines) + 1 < self.max_lines or 3137db96d56Sopenharmony_ci (not chunks or 3147db96d56Sopenharmony_ci self.drop_whitespace and 3157db96d56Sopenharmony_ci len(chunks) == 1 and 3167db96d56Sopenharmony_ci not chunks[0].strip()) and cur_len <= width): 3177db96d56Sopenharmony_ci # Convert current line back to a string and store it in 3187db96d56Sopenharmony_ci # list of all lines (return value). 3197db96d56Sopenharmony_ci lines.append(indent + ''.join(cur_line)) 3207db96d56Sopenharmony_ci else: 3217db96d56Sopenharmony_ci while cur_line: 3227db96d56Sopenharmony_ci if (cur_line[-1].strip() and 3237db96d56Sopenharmony_ci cur_len + len(self.placeholder) <= width): 3247db96d56Sopenharmony_ci cur_line.append(self.placeholder) 3257db96d56Sopenharmony_ci lines.append(indent + ''.join(cur_line)) 3267db96d56Sopenharmony_ci break 3277db96d56Sopenharmony_ci cur_len -= len(cur_line[-1]) 3287db96d56Sopenharmony_ci del cur_line[-1] 3297db96d56Sopenharmony_ci else: 3307db96d56Sopenharmony_ci if lines: 3317db96d56Sopenharmony_ci prev_line = lines[-1].rstrip() 3327db96d56Sopenharmony_ci if (len(prev_line) + len(self.placeholder) <= 3337db96d56Sopenharmony_ci self.width): 3347db96d56Sopenharmony_ci lines[-1] = prev_line + self.placeholder 3357db96d56Sopenharmony_ci break 3367db96d56Sopenharmony_ci lines.append(indent + self.placeholder.lstrip()) 3377db96d56Sopenharmony_ci break 3387db96d56Sopenharmony_ci 3397db96d56Sopenharmony_ci return lines 3407db96d56Sopenharmony_ci 3417db96d56Sopenharmony_ci def _split_chunks(self, text): 3427db96d56Sopenharmony_ci text = self._munge_whitespace(text) 3437db96d56Sopenharmony_ci return self._split(text) 3447db96d56Sopenharmony_ci 3457db96d56Sopenharmony_ci # -- Public interface ---------------------------------------------- 3467db96d56Sopenharmony_ci 3477db96d56Sopenharmony_ci def wrap(self, text): 3487db96d56Sopenharmony_ci """wrap(text : string) -> [string] 3497db96d56Sopenharmony_ci 3507db96d56Sopenharmony_ci Reformat the single paragraph in 'text' so it fits in lines of 3517db96d56Sopenharmony_ci no more than 'self.width' columns, and return a list of wrapped 3527db96d56Sopenharmony_ci lines. Tabs in 'text' are expanded with string.expandtabs(), 3537db96d56Sopenharmony_ci and all other whitespace characters (including newline) are 3547db96d56Sopenharmony_ci converted to space. 3557db96d56Sopenharmony_ci """ 3567db96d56Sopenharmony_ci chunks = self._split_chunks(text) 3577db96d56Sopenharmony_ci if self.fix_sentence_endings: 3587db96d56Sopenharmony_ci self._fix_sentence_endings(chunks) 3597db96d56Sopenharmony_ci return self._wrap_chunks(chunks) 3607db96d56Sopenharmony_ci 3617db96d56Sopenharmony_ci def fill(self, text): 3627db96d56Sopenharmony_ci """fill(text : string) -> string 3637db96d56Sopenharmony_ci 3647db96d56Sopenharmony_ci Reformat the single paragraph in 'text' to fit in lines of no 3657db96d56Sopenharmony_ci more than 'self.width' columns, and return a new string 3667db96d56Sopenharmony_ci containing the entire wrapped paragraph. 3677db96d56Sopenharmony_ci """ 3687db96d56Sopenharmony_ci return "\n".join(self.wrap(text)) 3697db96d56Sopenharmony_ci 3707db96d56Sopenharmony_ci 3717db96d56Sopenharmony_ci# -- Convenience interface --------------------------------------------- 3727db96d56Sopenharmony_ci 3737db96d56Sopenharmony_cidef wrap(text, width=70, **kwargs): 3747db96d56Sopenharmony_ci """Wrap a single paragraph of text, returning a list of wrapped lines. 3757db96d56Sopenharmony_ci 3767db96d56Sopenharmony_ci Reformat the single paragraph in 'text' so it fits in lines of no 3777db96d56Sopenharmony_ci more than 'width' columns, and return a list of wrapped lines. By 3787db96d56Sopenharmony_ci default, tabs in 'text' are expanded with string.expandtabs(), and 3797db96d56Sopenharmony_ci all other whitespace characters (including newline) are converted to 3807db96d56Sopenharmony_ci space. See TextWrapper class for available keyword args to customize 3817db96d56Sopenharmony_ci wrapping behaviour. 3827db96d56Sopenharmony_ci """ 3837db96d56Sopenharmony_ci w = TextWrapper(width=width, **kwargs) 3847db96d56Sopenharmony_ci return w.wrap(text) 3857db96d56Sopenharmony_ci 3867db96d56Sopenharmony_cidef fill(text, width=70, **kwargs): 3877db96d56Sopenharmony_ci """Fill a single paragraph of text, returning a new string. 3887db96d56Sopenharmony_ci 3897db96d56Sopenharmony_ci Reformat the single paragraph in 'text' to fit in lines of no more 3907db96d56Sopenharmony_ci than 'width' columns, and return a new string containing the entire 3917db96d56Sopenharmony_ci wrapped paragraph. As with wrap(), tabs are expanded and other 3927db96d56Sopenharmony_ci whitespace characters converted to space. See TextWrapper class for 3937db96d56Sopenharmony_ci available keyword args to customize wrapping behaviour. 3947db96d56Sopenharmony_ci """ 3957db96d56Sopenharmony_ci w = TextWrapper(width=width, **kwargs) 3967db96d56Sopenharmony_ci return w.fill(text) 3977db96d56Sopenharmony_ci 3987db96d56Sopenharmony_cidef shorten(text, width, **kwargs): 3997db96d56Sopenharmony_ci """Collapse and truncate the given text to fit in the given width. 4007db96d56Sopenharmony_ci 4017db96d56Sopenharmony_ci The text first has its whitespace collapsed. If it then fits in 4027db96d56Sopenharmony_ci the *width*, it is returned as is. Otherwise, as many words 4037db96d56Sopenharmony_ci as possible are joined and then the placeholder is appended:: 4047db96d56Sopenharmony_ci 4057db96d56Sopenharmony_ci >>> textwrap.shorten("Hello world!", width=12) 4067db96d56Sopenharmony_ci 'Hello world!' 4077db96d56Sopenharmony_ci >>> textwrap.shorten("Hello world!", width=11) 4087db96d56Sopenharmony_ci 'Hello [...]' 4097db96d56Sopenharmony_ci """ 4107db96d56Sopenharmony_ci w = TextWrapper(width=width, max_lines=1, **kwargs) 4117db96d56Sopenharmony_ci return w.fill(' '.join(text.strip().split())) 4127db96d56Sopenharmony_ci 4137db96d56Sopenharmony_ci 4147db96d56Sopenharmony_ci# -- Loosely related functionality ------------------------------------- 4157db96d56Sopenharmony_ci 4167db96d56Sopenharmony_ci_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE) 4177db96d56Sopenharmony_ci_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE) 4187db96d56Sopenharmony_ci 4197db96d56Sopenharmony_cidef dedent(text): 4207db96d56Sopenharmony_ci """Remove any common leading whitespace from every line in `text`. 4217db96d56Sopenharmony_ci 4227db96d56Sopenharmony_ci This can be used to make triple-quoted strings line up with the left 4237db96d56Sopenharmony_ci edge of the display, while still presenting them in the source code 4247db96d56Sopenharmony_ci in indented form. 4257db96d56Sopenharmony_ci 4267db96d56Sopenharmony_ci Note that tabs and spaces are both treated as whitespace, but they 4277db96d56Sopenharmony_ci are not equal: the lines " hello" and "\\thello" are 4287db96d56Sopenharmony_ci considered to have no common leading whitespace. 4297db96d56Sopenharmony_ci 4307db96d56Sopenharmony_ci Entirely blank lines are normalized to a newline character. 4317db96d56Sopenharmony_ci """ 4327db96d56Sopenharmony_ci # Look for the longest leading string of spaces and tabs common to 4337db96d56Sopenharmony_ci # all lines. 4347db96d56Sopenharmony_ci margin = None 4357db96d56Sopenharmony_ci text = _whitespace_only_re.sub('', text) 4367db96d56Sopenharmony_ci indents = _leading_whitespace_re.findall(text) 4377db96d56Sopenharmony_ci for indent in indents: 4387db96d56Sopenharmony_ci if margin is None: 4397db96d56Sopenharmony_ci margin = indent 4407db96d56Sopenharmony_ci 4417db96d56Sopenharmony_ci # Current line more deeply indented than previous winner: 4427db96d56Sopenharmony_ci # no change (previous winner is still on top). 4437db96d56Sopenharmony_ci elif indent.startswith(margin): 4447db96d56Sopenharmony_ci pass 4457db96d56Sopenharmony_ci 4467db96d56Sopenharmony_ci # Current line consistent with and no deeper than previous winner: 4477db96d56Sopenharmony_ci # it's the new winner. 4487db96d56Sopenharmony_ci elif margin.startswith(indent): 4497db96d56Sopenharmony_ci margin = indent 4507db96d56Sopenharmony_ci 4517db96d56Sopenharmony_ci # Find the largest common whitespace between current line and previous 4527db96d56Sopenharmony_ci # winner. 4537db96d56Sopenharmony_ci else: 4547db96d56Sopenharmony_ci for i, (x, y) in enumerate(zip(margin, indent)): 4557db96d56Sopenharmony_ci if x != y: 4567db96d56Sopenharmony_ci margin = margin[:i] 4577db96d56Sopenharmony_ci break 4587db96d56Sopenharmony_ci 4597db96d56Sopenharmony_ci # sanity check (testing/debugging only) 4607db96d56Sopenharmony_ci if 0 and margin: 4617db96d56Sopenharmony_ci for line in text.split("\n"): 4627db96d56Sopenharmony_ci assert not line or line.startswith(margin), \ 4637db96d56Sopenharmony_ci "line = %r, margin = %r" % (line, margin) 4647db96d56Sopenharmony_ci 4657db96d56Sopenharmony_ci if margin: 4667db96d56Sopenharmony_ci text = re.sub(r'(?m)^' + margin, '', text) 4677db96d56Sopenharmony_ci return text 4687db96d56Sopenharmony_ci 4697db96d56Sopenharmony_ci 4707db96d56Sopenharmony_cidef indent(text, prefix, predicate=None): 4717db96d56Sopenharmony_ci """Adds 'prefix' to the beginning of selected lines in 'text'. 4727db96d56Sopenharmony_ci 4737db96d56Sopenharmony_ci If 'predicate' is provided, 'prefix' will only be added to the lines 4747db96d56Sopenharmony_ci where 'predicate(line)' is True. If 'predicate' is not provided, 4757db96d56Sopenharmony_ci it will default to adding 'prefix' to all non-empty lines that do not 4767db96d56Sopenharmony_ci consist solely of whitespace characters. 4777db96d56Sopenharmony_ci """ 4787db96d56Sopenharmony_ci if predicate is None: 4797db96d56Sopenharmony_ci def predicate(line): 4807db96d56Sopenharmony_ci return line.strip() 4817db96d56Sopenharmony_ci 4827db96d56Sopenharmony_ci def prefixed_lines(): 4837db96d56Sopenharmony_ci for line in text.splitlines(True): 4847db96d56Sopenharmony_ci yield (prefix + line if predicate(line) else line) 4857db96d56Sopenharmony_ci return ''.join(prefixed_lines()) 4867db96d56Sopenharmony_ci 4877db96d56Sopenharmony_ci 4887db96d56Sopenharmony_ciif __name__ == "__main__": 4897db96d56Sopenharmony_ci #print dedent("\tfoo\n\tbar") 4907db96d56Sopenharmony_ci #print dedent(" \thello there\n \t how are you?") 4917db96d56Sopenharmony_ci print(dedent("Hello there.\n This is indented.")) 492