17db96d56Sopenharmony_ci"""Text wrapping and filling.
27db96d56Sopenharmony_ci"""
37db96d56Sopenharmony_ci
47db96d56Sopenharmony_ci# Copyright (C) 1999-2001 Gregory P. Ward.
57db96d56Sopenharmony_ci# Copyright (C) 2002, 2003 Python Software Foundation.
67db96d56Sopenharmony_ci# Written by Greg Ward <gward@python.net>
77db96d56Sopenharmony_ci
87db96d56Sopenharmony_ciimport re
97db96d56Sopenharmony_ci
107db96d56Sopenharmony_ci__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']
117db96d56Sopenharmony_ci
127db96d56Sopenharmony_ci# Hardcode the recognized whitespace characters to the US-ASCII
137db96d56Sopenharmony_ci# whitespace characters.  The main reason for doing this is that
147db96d56Sopenharmony_ci# some Unicode spaces (like \u00a0) are non-breaking whitespaces.
157db96d56Sopenharmony_ci_whitespace = '\t\n\x0b\x0c\r '
167db96d56Sopenharmony_ci
177db96d56Sopenharmony_ciclass TextWrapper:
187db96d56Sopenharmony_ci    """
197db96d56Sopenharmony_ci    Object for wrapping/filling text.  The public interface consists of
207db96d56Sopenharmony_ci    the wrap() and fill() methods; the other methods are just there for
217db96d56Sopenharmony_ci    subclasses to override in order to tweak the default behaviour.
227db96d56Sopenharmony_ci    If you want to completely replace the main wrapping algorithm,
237db96d56Sopenharmony_ci    you'll probably have to override _wrap_chunks().
247db96d56Sopenharmony_ci
257db96d56Sopenharmony_ci    Several instance attributes control various aspects of wrapping:
267db96d56Sopenharmony_ci      width (default: 70)
277db96d56Sopenharmony_ci        the maximum width of wrapped lines (unless break_long_words
287db96d56Sopenharmony_ci        is false)
297db96d56Sopenharmony_ci      initial_indent (default: "")
307db96d56Sopenharmony_ci        string that will be prepended to the first line of wrapped
317db96d56Sopenharmony_ci        output.  Counts towards the line's width.
327db96d56Sopenharmony_ci      subsequent_indent (default: "")
337db96d56Sopenharmony_ci        string that will be prepended to all lines save the first
347db96d56Sopenharmony_ci        of wrapped output; also counts towards each line's width.
357db96d56Sopenharmony_ci      expand_tabs (default: true)
367db96d56Sopenharmony_ci        Expand tabs in input text to spaces before further processing.
377db96d56Sopenharmony_ci        Each tab will become 0 .. 'tabsize' spaces, depending on its position
387db96d56Sopenharmony_ci        in its line.  If false, each tab is treated as a single character.
397db96d56Sopenharmony_ci      tabsize (default: 8)
407db96d56Sopenharmony_ci        Expand tabs in input text to 0 .. 'tabsize' spaces, unless
417db96d56Sopenharmony_ci        'expand_tabs' is false.
427db96d56Sopenharmony_ci      replace_whitespace (default: true)
437db96d56Sopenharmony_ci        Replace all whitespace characters in the input text by spaces
447db96d56Sopenharmony_ci        after tab expansion.  Note that if expand_tabs is false and
457db96d56Sopenharmony_ci        replace_whitespace is true, every tab will be converted to a
467db96d56Sopenharmony_ci        single space!
477db96d56Sopenharmony_ci      fix_sentence_endings (default: false)
487db96d56Sopenharmony_ci        Ensure that sentence-ending punctuation is always followed
497db96d56Sopenharmony_ci        by two spaces.  Off by default because the algorithm is
507db96d56Sopenharmony_ci        (unavoidably) imperfect.
517db96d56Sopenharmony_ci      break_long_words (default: true)
527db96d56Sopenharmony_ci        Break words longer than 'width'.  If false, those words will not
537db96d56Sopenharmony_ci        be broken, and some lines might be longer than 'width'.
547db96d56Sopenharmony_ci      break_on_hyphens (default: true)
557db96d56Sopenharmony_ci        Allow breaking hyphenated words. If true, wrapping will occur
567db96d56Sopenharmony_ci        preferably on whitespaces and right after hyphens part of
577db96d56Sopenharmony_ci        compound words.
587db96d56Sopenharmony_ci      drop_whitespace (default: true)
597db96d56Sopenharmony_ci        Drop leading and trailing whitespace from lines.
607db96d56Sopenharmony_ci      max_lines (default: None)
617db96d56Sopenharmony_ci        Truncate wrapped lines.
627db96d56Sopenharmony_ci      placeholder (default: ' [...]')
637db96d56Sopenharmony_ci        Append to the last line of truncated text.
647db96d56Sopenharmony_ci    """
657db96d56Sopenharmony_ci
667db96d56Sopenharmony_ci    unicode_whitespace_trans = dict.fromkeys(map(ord, _whitespace), ord(' '))
677db96d56Sopenharmony_ci
687db96d56Sopenharmony_ci    # This funky little regex is just the trick for splitting
697db96d56Sopenharmony_ci    # text up into word-wrappable chunks.  E.g.
707db96d56Sopenharmony_ci    #   "Hello there -- you goof-ball, use the -b option!"
717db96d56Sopenharmony_ci    # splits into
727db96d56Sopenharmony_ci    #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!
737db96d56Sopenharmony_ci    # (after stripping out empty strings).
747db96d56Sopenharmony_ci    word_punct = r'[\w!"\'&.,?]'
757db96d56Sopenharmony_ci    letter = r'[^\d\W]'
767db96d56Sopenharmony_ci    whitespace = r'[%s]' % re.escape(_whitespace)
777db96d56Sopenharmony_ci    nowhitespace = '[^' + whitespace[1:]
787db96d56Sopenharmony_ci    wordsep_re = re.compile(r'''
797db96d56Sopenharmony_ci        ( # any whitespace
807db96d56Sopenharmony_ci          %(ws)s+
817db96d56Sopenharmony_ci        | # em-dash between words
827db96d56Sopenharmony_ci          (?<=%(wp)s) -{2,} (?=\w)
837db96d56Sopenharmony_ci        | # word, possibly hyphenated
847db96d56Sopenharmony_ci          %(nws)s+? (?:
857db96d56Sopenharmony_ci            # hyphenated word
867db96d56Sopenharmony_ci              -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
877db96d56Sopenharmony_ci              (?= %(lt)s -? %(lt)s)
887db96d56Sopenharmony_ci            | # end of word
897db96d56Sopenharmony_ci              (?=%(ws)s|\Z)
907db96d56Sopenharmony_ci            | # em-dash
917db96d56Sopenharmony_ci              (?<=%(wp)s) (?=-{2,}\w)
927db96d56Sopenharmony_ci            )
937db96d56Sopenharmony_ci        )''' % {'wp': word_punct, 'lt': letter,
947db96d56Sopenharmony_ci                'ws': whitespace, 'nws': nowhitespace},
957db96d56Sopenharmony_ci        re.VERBOSE)
967db96d56Sopenharmony_ci    del word_punct, letter, nowhitespace
977db96d56Sopenharmony_ci
987db96d56Sopenharmony_ci    # This less funky little regex just split on recognized spaces. E.g.
997db96d56Sopenharmony_ci    #   "Hello there -- you goof-ball, use the -b option!"
1007db96d56Sopenharmony_ci    # splits into
1017db96d56Sopenharmony_ci    #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
1027db96d56Sopenharmony_ci    wordsep_simple_re = re.compile(r'(%s+)' % whitespace)
1037db96d56Sopenharmony_ci    del whitespace
1047db96d56Sopenharmony_ci
1057db96d56Sopenharmony_ci    # XXX this is not locale- or charset-aware -- string.lowercase
1067db96d56Sopenharmony_ci    # is US-ASCII only (and therefore English-only)
1077db96d56Sopenharmony_ci    sentence_end_re = re.compile(r'[a-z]'             # lowercase letter
1087db96d56Sopenharmony_ci                                 r'[\.\!\?]'          # sentence-ending punct.
1097db96d56Sopenharmony_ci                                 r'[\"\']?'           # optional end-of-quote
1107db96d56Sopenharmony_ci                                 r'\Z')               # end of chunk
1117db96d56Sopenharmony_ci
1127db96d56Sopenharmony_ci    def __init__(self,
1137db96d56Sopenharmony_ci                 width=70,
1147db96d56Sopenharmony_ci                 initial_indent="",
1157db96d56Sopenharmony_ci                 subsequent_indent="",
1167db96d56Sopenharmony_ci                 expand_tabs=True,
1177db96d56Sopenharmony_ci                 replace_whitespace=True,
1187db96d56Sopenharmony_ci                 fix_sentence_endings=False,
1197db96d56Sopenharmony_ci                 break_long_words=True,
1207db96d56Sopenharmony_ci                 drop_whitespace=True,
1217db96d56Sopenharmony_ci                 break_on_hyphens=True,
1227db96d56Sopenharmony_ci                 tabsize=8,
1237db96d56Sopenharmony_ci                 *,
1247db96d56Sopenharmony_ci                 max_lines=None,
1257db96d56Sopenharmony_ci                 placeholder=' [...]'):
1267db96d56Sopenharmony_ci        self.width = width
1277db96d56Sopenharmony_ci        self.initial_indent = initial_indent
1287db96d56Sopenharmony_ci        self.subsequent_indent = subsequent_indent
1297db96d56Sopenharmony_ci        self.expand_tabs = expand_tabs
1307db96d56Sopenharmony_ci        self.replace_whitespace = replace_whitespace
1317db96d56Sopenharmony_ci        self.fix_sentence_endings = fix_sentence_endings
1327db96d56Sopenharmony_ci        self.break_long_words = break_long_words
1337db96d56Sopenharmony_ci        self.drop_whitespace = drop_whitespace
1347db96d56Sopenharmony_ci        self.break_on_hyphens = break_on_hyphens
1357db96d56Sopenharmony_ci        self.tabsize = tabsize
1367db96d56Sopenharmony_ci        self.max_lines = max_lines
1377db96d56Sopenharmony_ci        self.placeholder = placeholder
1387db96d56Sopenharmony_ci
1397db96d56Sopenharmony_ci
1407db96d56Sopenharmony_ci    # -- Private methods -----------------------------------------------
1417db96d56Sopenharmony_ci    # (possibly useful for subclasses to override)
1427db96d56Sopenharmony_ci
1437db96d56Sopenharmony_ci    def _munge_whitespace(self, text):
1447db96d56Sopenharmony_ci        """_munge_whitespace(text : string) -> string
1457db96d56Sopenharmony_ci
1467db96d56Sopenharmony_ci        Munge whitespace in text: expand tabs and convert all other
1477db96d56Sopenharmony_ci        whitespace characters to spaces.  Eg. " foo\\tbar\\n\\nbaz"
1487db96d56Sopenharmony_ci        becomes " foo    bar  baz".
1497db96d56Sopenharmony_ci        """
1507db96d56Sopenharmony_ci        if self.expand_tabs:
1517db96d56Sopenharmony_ci            text = text.expandtabs(self.tabsize)
1527db96d56Sopenharmony_ci        if self.replace_whitespace:
1537db96d56Sopenharmony_ci            text = text.translate(self.unicode_whitespace_trans)
1547db96d56Sopenharmony_ci        return text
1557db96d56Sopenharmony_ci
1567db96d56Sopenharmony_ci
1577db96d56Sopenharmony_ci    def _split(self, text):
1587db96d56Sopenharmony_ci        """_split(text : string) -> [string]
1597db96d56Sopenharmony_ci
1607db96d56Sopenharmony_ci        Split the text to wrap into indivisible chunks.  Chunks are
1617db96d56Sopenharmony_ci        not quite the same as words; see _wrap_chunks() for full
1627db96d56Sopenharmony_ci        details.  As an example, the text
1637db96d56Sopenharmony_ci          Look, goof-ball -- use the -b option!
1647db96d56Sopenharmony_ci        breaks into the following chunks:
1657db96d56Sopenharmony_ci          'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',
1667db96d56Sopenharmony_ci          'use', ' ', 'the', ' ', '-b', ' ', 'option!'
1677db96d56Sopenharmony_ci        if break_on_hyphens is True, or in:
1687db96d56Sopenharmony_ci          'Look,', ' ', 'goof-ball', ' ', '--', ' ',
1697db96d56Sopenharmony_ci          'use', ' ', 'the', ' ', '-b', ' ', option!'
1707db96d56Sopenharmony_ci        otherwise.
1717db96d56Sopenharmony_ci        """
1727db96d56Sopenharmony_ci        if self.break_on_hyphens is True:
1737db96d56Sopenharmony_ci            chunks = self.wordsep_re.split(text)
1747db96d56Sopenharmony_ci        else:
1757db96d56Sopenharmony_ci            chunks = self.wordsep_simple_re.split(text)
1767db96d56Sopenharmony_ci        chunks = [c for c in chunks if c]
1777db96d56Sopenharmony_ci        return chunks
1787db96d56Sopenharmony_ci
1797db96d56Sopenharmony_ci    def _fix_sentence_endings(self, chunks):
1807db96d56Sopenharmony_ci        """_fix_sentence_endings(chunks : [string])
1817db96d56Sopenharmony_ci
1827db96d56Sopenharmony_ci        Correct for sentence endings buried in 'chunks'.  Eg. when the
1837db96d56Sopenharmony_ci        original text contains "... foo.\\nBar ...", munge_whitespace()
1847db96d56Sopenharmony_ci        and split() will convert that to [..., "foo.", " ", "Bar", ...]
1857db96d56Sopenharmony_ci        which has one too few spaces; this method simply changes the one
1867db96d56Sopenharmony_ci        space to two.
1877db96d56Sopenharmony_ci        """
1887db96d56Sopenharmony_ci        i = 0
1897db96d56Sopenharmony_ci        patsearch = self.sentence_end_re.search
1907db96d56Sopenharmony_ci        while i < len(chunks)-1:
1917db96d56Sopenharmony_ci            if chunks[i+1] == " " and patsearch(chunks[i]):
1927db96d56Sopenharmony_ci                chunks[i+1] = "  "
1937db96d56Sopenharmony_ci                i += 2
1947db96d56Sopenharmony_ci            else:
1957db96d56Sopenharmony_ci                i += 1
1967db96d56Sopenharmony_ci
1977db96d56Sopenharmony_ci    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
1987db96d56Sopenharmony_ci        """_handle_long_word(chunks : [string],
1997db96d56Sopenharmony_ci                             cur_line : [string],
2007db96d56Sopenharmony_ci                             cur_len : int, width : int)
2017db96d56Sopenharmony_ci
2027db96d56Sopenharmony_ci        Handle a chunk of text (most likely a word, not whitespace) that
2037db96d56Sopenharmony_ci        is too long to fit in any line.
2047db96d56Sopenharmony_ci        """
2057db96d56Sopenharmony_ci        # Figure out when indent is larger than the specified width, and make
2067db96d56Sopenharmony_ci        # sure at least one character is stripped off on every pass
2077db96d56Sopenharmony_ci        if width < 1:
2087db96d56Sopenharmony_ci            space_left = 1
2097db96d56Sopenharmony_ci        else:
2107db96d56Sopenharmony_ci            space_left = width - cur_len
2117db96d56Sopenharmony_ci
2127db96d56Sopenharmony_ci        # If we're allowed to break long words, then do so: put as much
2137db96d56Sopenharmony_ci        # of the next chunk onto the current line as will fit.
2147db96d56Sopenharmony_ci        if self.break_long_words:
2157db96d56Sopenharmony_ci            end = space_left
2167db96d56Sopenharmony_ci            chunk = reversed_chunks[-1]
2177db96d56Sopenharmony_ci            if self.break_on_hyphens and len(chunk) > space_left:
2187db96d56Sopenharmony_ci                # break after last hyphen, but only if there are
2197db96d56Sopenharmony_ci                # non-hyphens before it
2207db96d56Sopenharmony_ci                hyphen = chunk.rfind('-', 0, space_left)
2217db96d56Sopenharmony_ci                if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
2227db96d56Sopenharmony_ci                    end = hyphen + 1
2237db96d56Sopenharmony_ci            cur_line.append(chunk[:end])
2247db96d56Sopenharmony_ci            reversed_chunks[-1] = chunk[end:]
2257db96d56Sopenharmony_ci
2267db96d56Sopenharmony_ci        # Otherwise, we have to preserve the long word intact.  Only add
2277db96d56Sopenharmony_ci        # it to the current line if there's nothing already there --
2287db96d56Sopenharmony_ci        # that minimizes how much we violate the width constraint.
2297db96d56Sopenharmony_ci        elif not cur_line:
2307db96d56Sopenharmony_ci            cur_line.append(reversed_chunks.pop())
2317db96d56Sopenharmony_ci
2327db96d56Sopenharmony_ci        # If we're not allowed to break long words, and there's already
2337db96d56Sopenharmony_ci        # text on the current line, do nothing.  Next time through the
2347db96d56Sopenharmony_ci        # main loop of _wrap_chunks(), we'll wind up here again, but
2357db96d56Sopenharmony_ci        # cur_len will be zero, so the next line will be entirely
2367db96d56Sopenharmony_ci        # devoted to the long word that we can't handle right now.
2377db96d56Sopenharmony_ci
2387db96d56Sopenharmony_ci    def _wrap_chunks(self, chunks):
2397db96d56Sopenharmony_ci        """_wrap_chunks(chunks : [string]) -> [string]
2407db96d56Sopenharmony_ci
2417db96d56Sopenharmony_ci        Wrap a sequence of text chunks and return a list of lines of
2427db96d56Sopenharmony_ci        length 'self.width' or less.  (If 'break_long_words' is false,
2437db96d56Sopenharmony_ci        some lines may be longer than this.)  Chunks correspond roughly
2447db96d56Sopenharmony_ci        to words and the whitespace between them: each chunk is
2457db96d56Sopenharmony_ci        indivisible (modulo 'break_long_words'), but a line break can
2467db96d56Sopenharmony_ci        come between any two chunks.  Chunks should not have internal
2477db96d56Sopenharmony_ci        whitespace; ie. a chunk is either all whitespace or a "word".
2487db96d56Sopenharmony_ci        Whitespace chunks will be removed from the beginning and end of
2497db96d56Sopenharmony_ci        lines, but apart from that whitespace is preserved.
2507db96d56Sopenharmony_ci        """
2517db96d56Sopenharmony_ci        lines = []
2527db96d56Sopenharmony_ci        if self.width <= 0:
2537db96d56Sopenharmony_ci            raise ValueError("invalid width %r (must be > 0)" % self.width)
2547db96d56Sopenharmony_ci        if self.max_lines is not None:
2557db96d56Sopenharmony_ci            if self.max_lines > 1:
2567db96d56Sopenharmony_ci                indent = self.subsequent_indent
2577db96d56Sopenharmony_ci            else:
2587db96d56Sopenharmony_ci                indent = self.initial_indent
2597db96d56Sopenharmony_ci            if len(indent) + len(self.placeholder.lstrip()) > self.width:
2607db96d56Sopenharmony_ci                raise ValueError("placeholder too large for max width")
2617db96d56Sopenharmony_ci
2627db96d56Sopenharmony_ci        # Arrange in reverse order so items can be efficiently popped
2637db96d56Sopenharmony_ci        # from a stack of chucks.
2647db96d56Sopenharmony_ci        chunks.reverse()
2657db96d56Sopenharmony_ci
2667db96d56Sopenharmony_ci        while chunks:
2677db96d56Sopenharmony_ci
2687db96d56Sopenharmony_ci            # Start the list of chunks that will make up the current line.
2697db96d56Sopenharmony_ci            # cur_len is just the length of all the chunks in cur_line.
2707db96d56Sopenharmony_ci            cur_line = []
2717db96d56Sopenharmony_ci            cur_len = 0
2727db96d56Sopenharmony_ci
2737db96d56Sopenharmony_ci            # Figure out which static string will prefix this line.
2747db96d56Sopenharmony_ci            if lines:
2757db96d56Sopenharmony_ci                indent = self.subsequent_indent
2767db96d56Sopenharmony_ci            else:
2777db96d56Sopenharmony_ci                indent = self.initial_indent
2787db96d56Sopenharmony_ci
2797db96d56Sopenharmony_ci            # Maximum width for this line.
2807db96d56Sopenharmony_ci            width = self.width - len(indent)
2817db96d56Sopenharmony_ci
2827db96d56Sopenharmony_ci            # First chunk on line is whitespace -- drop it, unless this
2837db96d56Sopenharmony_ci            # is the very beginning of the text (ie. no lines started yet).
2847db96d56Sopenharmony_ci            if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2857db96d56Sopenharmony_ci                del chunks[-1]
2867db96d56Sopenharmony_ci
2877db96d56Sopenharmony_ci            while chunks:
2887db96d56Sopenharmony_ci                l = len(chunks[-1])
2897db96d56Sopenharmony_ci
2907db96d56Sopenharmony_ci                # Can at least squeeze this chunk onto the current line.
2917db96d56Sopenharmony_ci                if cur_len + l <= width:
2927db96d56Sopenharmony_ci                    cur_line.append(chunks.pop())
2937db96d56Sopenharmony_ci                    cur_len += l
2947db96d56Sopenharmony_ci
2957db96d56Sopenharmony_ci                # Nope, this line is full.
2967db96d56Sopenharmony_ci                else:
2977db96d56Sopenharmony_ci                    break
2987db96d56Sopenharmony_ci
2997db96d56Sopenharmony_ci            # The current line is full, and the next chunk is too big to
3007db96d56Sopenharmony_ci            # fit on *any* line (not just this one).
3017db96d56Sopenharmony_ci            if chunks and len(chunks[-1]) > width:
3027db96d56Sopenharmony_ci                self._handle_long_word(chunks, cur_line, cur_len, width)
3037db96d56Sopenharmony_ci                cur_len = sum(map(len, cur_line))
3047db96d56Sopenharmony_ci
3057db96d56Sopenharmony_ci            # If the last chunk on this line is all whitespace, drop it.
3067db96d56Sopenharmony_ci            if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
3077db96d56Sopenharmony_ci                cur_len -= len(cur_line[-1])
3087db96d56Sopenharmony_ci                del cur_line[-1]
3097db96d56Sopenharmony_ci
3107db96d56Sopenharmony_ci            if cur_line:
3117db96d56Sopenharmony_ci                if (self.max_lines is None or
3127db96d56Sopenharmony_ci                    len(lines) + 1 < self.max_lines or
3137db96d56Sopenharmony_ci                    (not chunks or
3147db96d56Sopenharmony_ci                     self.drop_whitespace and
3157db96d56Sopenharmony_ci                     len(chunks) == 1 and
3167db96d56Sopenharmony_ci                     not chunks[0].strip()) and cur_len <= width):
3177db96d56Sopenharmony_ci                    # Convert current line back to a string and store it in
3187db96d56Sopenharmony_ci                    # list of all lines (return value).
3197db96d56Sopenharmony_ci                    lines.append(indent + ''.join(cur_line))
3207db96d56Sopenharmony_ci                else:
3217db96d56Sopenharmony_ci                    while cur_line:
3227db96d56Sopenharmony_ci                        if (cur_line[-1].strip() and
3237db96d56Sopenharmony_ci                            cur_len + len(self.placeholder) <= width):
3247db96d56Sopenharmony_ci                            cur_line.append(self.placeholder)
3257db96d56Sopenharmony_ci                            lines.append(indent + ''.join(cur_line))
3267db96d56Sopenharmony_ci                            break
3277db96d56Sopenharmony_ci                        cur_len -= len(cur_line[-1])
3287db96d56Sopenharmony_ci                        del cur_line[-1]
3297db96d56Sopenharmony_ci                    else:
3307db96d56Sopenharmony_ci                        if lines:
3317db96d56Sopenharmony_ci                            prev_line = lines[-1].rstrip()
3327db96d56Sopenharmony_ci                            if (len(prev_line) + len(self.placeholder) <=
3337db96d56Sopenharmony_ci                                    self.width):
3347db96d56Sopenharmony_ci                                lines[-1] = prev_line + self.placeholder
3357db96d56Sopenharmony_ci                                break
3367db96d56Sopenharmony_ci                        lines.append(indent + self.placeholder.lstrip())
3377db96d56Sopenharmony_ci                    break
3387db96d56Sopenharmony_ci
3397db96d56Sopenharmony_ci        return lines
3407db96d56Sopenharmony_ci
3417db96d56Sopenharmony_ci    def _split_chunks(self, text):
3427db96d56Sopenharmony_ci        text = self._munge_whitespace(text)
3437db96d56Sopenharmony_ci        return self._split(text)
3447db96d56Sopenharmony_ci
3457db96d56Sopenharmony_ci    # -- Public interface ----------------------------------------------
3467db96d56Sopenharmony_ci
3477db96d56Sopenharmony_ci    def wrap(self, text):
3487db96d56Sopenharmony_ci        """wrap(text : string) -> [string]
3497db96d56Sopenharmony_ci
3507db96d56Sopenharmony_ci        Reformat the single paragraph in 'text' so it fits in lines of
3517db96d56Sopenharmony_ci        no more than 'self.width' columns, and return a list of wrapped
3527db96d56Sopenharmony_ci        lines.  Tabs in 'text' are expanded with string.expandtabs(),
3537db96d56Sopenharmony_ci        and all other whitespace characters (including newline) are
3547db96d56Sopenharmony_ci        converted to space.
3557db96d56Sopenharmony_ci        """
3567db96d56Sopenharmony_ci        chunks = self._split_chunks(text)
3577db96d56Sopenharmony_ci        if self.fix_sentence_endings:
3587db96d56Sopenharmony_ci            self._fix_sentence_endings(chunks)
3597db96d56Sopenharmony_ci        return self._wrap_chunks(chunks)
3607db96d56Sopenharmony_ci
3617db96d56Sopenharmony_ci    def fill(self, text):
3627db96d56Sopenharmony_ci        """fill(text : string) -> string
3637db96d56Sopenharmony_ci
3647db96d56Sopenharmony_ci        Reformat the single paragraph in 'text' to fit in lines of no
3657db96d56Sopenharmony_ci        more than 'self.width' columns, and return a new string
3667db96d56Sopenharmony_ci        containing the entire wrapped paragraph.
3677db96d56Sopenharmony_ci        """
3687db96d56Sopenharmony_ci        return "\n".join(self.wrap(text))
3697db96d56Sopenharmony_ci
3707db96d56Sopenharmony_ci
3717db96d56Sopenharmony_ci# -- Convenience interface ---------------------------------------------
3727db96d56Sopenharmony_ci
3737db96d56Sopenharmony_cidef wrap(text, width=70, **kwargs):
3747db96d56Sopenharmony_ci    """Wrap a single paragraph of text, returning a list of wrapped lines.
3757db96d56Sopenharmony_ci
3767db96d56Sopenharmony_ci    Reformat the single paragraph in 'text' so it fits in lines of no
3777db96d56Sopenharmony_ci    more than 'width' columns, and return a list of wrapped lines.  By
3787db96d56Sopenharmony_ci    default, tabs in 'text' are expanded with string.expandtabs(), and
3797db96d56Sopenharmony_ci    all other whitespace characters (including newline) are converted to
3807db96d56Sopenharmony_ci    space.  See TextWrapper class for available keyword args to customize
3817db96d56Sopenharmony_ci    wrapping behaviour.
3827db96d56Sopenharmony_ci    """
3837db96d56Sopenharmony_ci    w = TextWrapper(width=width, **kwargs)
3847db96d56Sopenharmony_ci    return w.wrap(text)
3857db96d56Sopenharmony_ci
3867db96d56Sopenharmony_cidef fill(text, width=70, **kwargs):
3877db96d56Sopenharmony_ci    """Fill a single paragraph of text, returning a new string.
3887db96d56Sopenharmony_ci
3897db96d56Sopenharmony_ci    Reformat the single paragraph in 'text' to fit in lines of no more
3907db96d56Sopenharmony_ci    than 'width' columns, and return a new string containing the entire
3917db96d56Sopenharmony_ci    wrapped paragraph.  As with wrap(), tabs are expanded and other
3927db96d56Sopenharmony_ci    whitespace characters converted to space.  See TextWrapper class for
3937db96d56Sopenharmony_ci    available keyword args to customize wrapping behaviour.
3947db96d56Sopenharmony_ci    """
3957db96d56Sopenharmony_ci    w = TextWrapper(width=width, **kwargs)
3967db96d56Sopenharmony_ci    return w.fill(text)
3977db96d56Sopenharmony_ci
3987db96d56Sopenharmony_cidef shorten(text, width, **kwargs):
3997db96d56Sopenharmony_ci    """Collapse and truncate the given text to fit in the given width.
4007db96d56Sopenharmony_ci
4017db96d56Sopenharmony_ci    The text first has its whitespace collapsed.  If it then fits in
4027db96d56Sopenharmony_ci    the *width*, it is returned as is.  Otherwise, as many words
4037db96d56Sopenharmony_ci    as possible are joined and then the placeholder is appended::
4047db96d56Sopenharmony_ci
4057db96d56Sopenharmony_ci        >>> textwrap.shorten("Hello  world!", width=12)
4067db96d56Sopenharmony_ci        'Hello world!'
4077db96d56Sopenharmony_ci        >>> textwrap.shorten("Hello  world!", width=11)
4087db96d56Sopenharmony_ci        'Hello [...]'
4097db96d56Sopenharmony_ci    """
4107db96d56Sopenharmony_ci    w = TextWrapper(width=width, max_lines=1, **kwargs)
4117db96d56Sopenharmony_ci    return w.fill(' '.join(text.strip().split()))
4127db96d56Sopenharmony_ci
4137db96d56Sopenharmony_ci
4147db96d56Sopenharmony_ci# -- Loosely related functionality -------------------------------------
4157db96d56Sopenharmony_ci
4167db96d56Sopenharmony_ci_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)
4177db96d56Sopenharmony_ci_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)
4187db96d56Sopenharmony_ci
4197db96d56Sopenharmony_cidef dedent(text):
4207db96d56Sopenharmony_ci    """Remove any common leading whitespace from every line in `text`.
4217db96d56Sopenharmony_ci
4227db96d56Sopenharmony_ci    This can be used to make triple-quoted strings line up with the left
4237db96d56Sopenharmony_ci    edge of the display, while still presenting them in the source code
4247db96d56Sopenharmony_ci    in indented form.
4257db96d56Sopenharmony_ci
4267db96d56Sopenharmony_ci    Note that tabs and spaces are both treated as whitespace, but they
4277db96d56Sopenharmony_ci    are not equal: the lines "  hello" and "\\thello" are
4287db96d56Sopenharmony_ci    considered to have no common leading whitespace.
4297db96d56Sopenharmony_ci
4307db96d56Sopenharmony_ci    Entirely blank lines are normalized to a newline character.
4317db96d56Sopenharmony_ci    """
4327db96d56Sopenharmony_ci    # Look for the longest leading string of spaces and tabs common to
4337db96d56Sopenharmony_ci    # all lines.
4347db96d56Sopenharmony_ci    margin = None
4357db96d56Sopenharmony_ci    text = _whitespace_only_re.sub('', text)
4367db96d56Sopenharmony_ci    indents = _leading_whitespace_re.findall(text)
4377db96d56Sopenharmony_ci    for indent in indents:
4387db96d56Sopenharmony_ci        if margin is None:
4397db96d56Sopenharmony_ci            margin = indent
4407db96d56Sopenharmony_ci
4417db96d56Sopenharmony_ci        # Current line more deeply indented than previous winner:
4427db96d56Sopenharmony_ci        # no change (previous winner is still on top).
4437db96d56Sopenharmony_ci        elif indent.startswith(margin):
4447db96d56Sopenharmony_ci            pass
4457db96d56Sopenharmony_ci
4467db96d56Sopenharmony_ci        # Current line consistent with and no deeper than previous winner:
4477db96d56Sopenharmony_ci        # it's the new winner.
4487db96d56Sopenharmony_ci        elif margin.startswith(indent):
4497db96d56Sopenharmony_ci            margin = indent
4507db96d56Sopenharmony_ci
4517db96d56Sopenharmony_ci        # Find the largest common whitespace between current line and previous
4527db96d56Sopenharmony_ci        # winner.
4537db96d56Sopenharmony_ci        else:
4547db96d56Sopenharmony_ci            for i, (x, y) in enumerate(zip(margin, indent)):
4557db96d56Sopenharmony_ci                if x != y:
4567db96d56Sopenharmony_ci                    margin = margin[:i]
4577db96d56Sopenharmony_ci                    break
4587db96d56Sopenharmony_ci
4597db96d56Sopenharmony_ci    # sanity check (testing/debugging only)
4607db96d56Sopenharmony_ci    if 0 and margin:
4617db96d56Sopenharmony_ci        for line in text.split("\n"):
4627db96d56Sopenharmony_ci            assert not line or line.startswith(margin), \
4637db96d56Sopenharmony_ci                   "line = %r, margin = %r" % (line, margin)
4647db96d56Sopenharmony_ci
4657db96d56Sopenharmony_ci    if margin:
4667db96d56Sopenharmony_ci        text = re.sub(r'(?m)^' + margin, '', text)
4677db96d56Sopenharmony_ci    return text
4687db96d56Sopenharmony_ci
4697db96d56Sopenharmony_ci
4707db96d56Sopenharmony_cidef indent(text, prefix, predicate=None):
4717db96d56Sopenharmony_ci    """Adds 'prefix' to the beginning of selected lines in 'text'.
4727db96d56Sopenharmony_ci
4737db96d56Sopenharmony_ci    If 'predicate' is provided, 'prefix' will only be added to the lines
4747db96d56Sopenharmony_ci    where 'predicate(line)' is True. If 'predicate' is not provided,
4757db96d56Sopenharmony_ci    it will default to adding 'prefix' to all non-empty lines that do not
4767db96d56Sopenharmony_ci    consist solely of whitespace characters.
4777db96d56Sopenharmony_ci    """
4787db96d56Sopenharmony_ci    if predicate is None:
4797db96d56Sopenharmony_ci        def predicate(line):
4807db96d56Sopenharmony_ci            return line.strip()
4817db96d56Sopenharmony_ci
4827db96d56Sopenharmony_ci    def prefixed_lines():
4837db96d56Sopenharmony_ci        for line in text.splitlines(True):
4847db96d56Sopenharmony_ci            yield (prefix + line if predicate(line) else line)
4857db96d56Sopenharmony_ci    return ''.join(prefixed_lines())
4867db96d56Sopenharmony_ci
4877db96d56Sopenharmony_ci
4887db96d56Sopenharmony_ciif __name__ == "__main__":
4897db96d56Sopenharmony_ci    #print dedent("\tfoo\n\tbar")
4907db96d56Sopenharmony_ci    #print dedent("  \thello there\n  \t  how are you?")
4917db96d56Sopenharmony_ci    print(dedent("Hello there.\n  This is indented."))
492