1a5f9918aSopenharmony_ci
2a5f9918aSopenharmony_ci# Scanner produces tokens of the following types:
3a5f9918aSopenharmony_ci# STREAM-START
4a5f9918aSopenharmony_ci# STREAM-END
5a5f9918aSopenharmony_ci# DIRECTIVE(name, value)
6a5f9918aSopenharmony_ci# DOCUMENT-START
7a5f9918aSopenharmony_ci# DOCUMENT-END
8a5f9918aSopenharmony_ci# BLOCK-SEQUENCE-START
9a5f9918aSopenharmony_ci# BLOCK-MAPPING-START
10a5f9918aSopenharmony_ci# BLOCK-END
11a5f9918aSopenharmony_ci# FLOW-SEQUENCE-START
12a5f9918aSopenharmony_ci# FLOW-MAPPING-START
13a5f9918aSopenharmony_ci# FLOW-SEQUENCE-END
14a5f9918aSopenharmony_ci# FLOW-MAPPING-END
15a5f9918aSopenharmony_ci# BLOCK-ENTRY
16a5f9918aSopenharmony_ci# FLOW-ENTRY
17a5f9918aSopenharmony_ci# KEY
18a5f9918aSopenharmony_ci# VALUE
19a5f9918aSopenharmony_ci# ALIAS(value)
20a5f9918aSopenharmony_ci# ANCHOR(value)
21a5f9918aSopenharmony_ci# TAG(value)
22a5f9918aSopenharmony_ci# SCALAR(value, plain, style)
23a5f9918aSopenharmony_ci#
24a5f9918aSopenharmony_ci# Read comments in the Scanner code for more details.
25a5f9918aSopenharmony_ci#
26a5f9918aSopenharmony_ci
27a5f9918aSopenharmony_ci__all__ = ['Scanner', 'ScannerError']
28a5f9918aSopenharmony_ci
29a5f9918aSopenharmony_cifrom .error import MarkedYAMLError
30a5f9918aSopenharmony_cifrom .tokens import *
31a5f9918aSopenharmony_ci
32a5f9918aSopenharmony_ciclass ScannerError(MarkedYAMLError):
33a5f9918aSopenharmony_ci    pass
34a5f9918aSopenharmony_ci
35a5f9918aSopenharmony_ciclass SimpleKey:
36a5f9918aSopenharmony_ci    # See below simple keys treatment.
37a5f9918aSopenharmony_ci
38a5f9918aSopenharmony_ci    def __init__(self, token_number, required, index, line, column, mark):
39a5f9918aSopenharmony_ci        self.token_number = token_number
40a5f9918aSopenharmony_ci        self.required = required
41a5f9918aSopenharmony_ci        self.index = index
42a5f9918aSopenharmony_ci        self.line = line
43a5f9918aSopenharmony_ci        self.column = column
44a5f9918aSopenharmony_ci        self.mark = mark
45a5f9918aSopenharmony_ci
46a5f9918aSopenharmony_ciclass Scanner:
47a5f9918aSopenharmony_ci
48a5f9918aSopenharmony_ci    def __init__(self):
49a5f9918aSopenharmony_ci        """Initialize the scanner."""
50a5f9918aSopenharmony_ci        # It is assumed that Scanner and Reader will have a common descendant.
51a5f9918aSopenharmony_ci        # Reader do the dirty work of checking for BOM and converting the
52a5f9918aSopenharmony_ci        # input data to Unicode. It also adds NUL to the end.
53a5f9918aSopenharmony_ci        #
54a5f9918aSopenharmony_ci        # Reader supports the following methods
55a5f9918aSopenharmony_ci        #   self.peek(i=0)       # peek the next i-th character
56a5f9918aSopenharmony_ci        #   self.prefix(l=1)     # peek the next l characters
57a5f9918aSopenharmony_ci        #   self.forward(l=1)    # read the next l characters and move the pointer.
58a5f9918aSopenharmony_ci
59a5f9918aSopenharmony_ci        # Had we reached the end of the stream?
60a5f9918aSopenharmony_ci        self.done = False
61a5f9918aSopenharmony_ci
62a5f9918aSopenharmony_ci        # The number of unclosed '{' and '['. `flow_level == 0` means block
63a5f9918aSopenharmony_ci        # context.
64a5f9918aSopenharmony_ci        self.flow_level = 0
65a5f9918aSopenharmony_ci
66a5f9918aSopenharmony_ci        # List of processed tokens that are not yet emitted.
67a5f9918aSopenharmony_ci        self.tokens = []
68a5f9918aSopenharmony_ci
69a5f9918aSopenharmony_ci        # Add the STREAM-START token.
70a5f9918aSopenharmony_ci        self.fetch_stream_start()
71a5f9918aSopenharmony_ci
72a5f9918aSopenharmony_ci        # Number of tokens that were emitted through the `get_token` method.
73a5f9918aSopenharmony_ci        self.tokens_taken = 0
74a5f9918aSopenharmony_ci
75a5f9918aSopenharmony_ci        # The current indentation level.
76a5f9918aSopenharmony_ci        self.indent = -1
77a5f9918aSopenharmony_ci
78a5f9918aSopenharmony_ci        # Past indentation levels.
79a5f9918aSopenharmony_ci        self.indents = []
80a5f9918aSopenharmony_ci
81a5f9918aSopenharmony_ci        # Variables related to simple keys treatment.
82a5f9918aSopenharmony_ci
83a5f9918aSopenharmony_ci        # A simple key is a key that is not denoted by the '?' indicator.
84a5f9918aSopenharmony_ci        # Example of simple keys:
85a5f9918aSopenharmony_ci        #   ---
86a5f9918aSopenharmony_ci        #   block simple key: value
87a5f9918aSopenharmony_ci        #   ? not a simple key:
88a5f9918aSopenharmony_ci        #   : { flow simple key: value }
89a5f9918aSopenharmony_ci        # We emit the KEY token before all keys, so when we find a potential
90a5f9918aSopenharmony_ci        # simple key, we try to locate the corresponding ':' indicator.
91a5f9918aSopenharmony_ci        # Simple keys should be limited to a single line and 1024 characters.
92a5f9918aSopenharmony_ci
93a5f9918aSopenharmony_ci        # Can a simple key start at the current position? A simple key may
94a5f9918aSopenharmony_ci        # start:
95a5f9918aSopenharmony_ci        # - at the beginning of the line, not counting indentation spaces
96a5f9918aSopenharmony_ci        #       (in block context),
97a5f9918aSopenharmony_ci        # - after '{', '[', ',' (in the flow context),
98a5f9918aSopenharmony_ci        # - after '?', ':', '-' (in the block context).
99a5f9918aSopenharmony_ci        # In the block context, this flag also signifies if a block collection
100a5f9918aSopenharmony_ci        # may start at the current position.
101a5f9918aSopenharmony_ci        self.allow_simple_key = True
102a5f9918aSopenharmony_ci
103a5f9918aSopenharmony_ci        # Keep track of possible simple keys. This is a dictionary. The key
104a5f9918aSopenharmony_ci        # is `flow_level`; there can be no more that one possible simple key
105a5f9918aSopenharmony_ci        # for each level. The value is a SimpleKey record:
106a5f9918aSopenharmony_ci        #   (token_number, required, index, line, column, mark)
107a5f9918aSopenharmony_ci        # A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),
108a5f9918aSopenharmony_ci        # '[', or '{' tokens.
109a5f9918aSopenharmony_ci        self.possible_simple_keys = {}
110a5f9918aSopenharmony_ci
111a5f9918aSopenharmony_ci    # Public methods.
112a5f9918aSopenharmony_ci
113a5f9918aSopenharmony_ci    def check_token(self, *choices):
114a5f9918aSopenharmony_ci        # Check if the next token is one of the given types.
115a5f9918aSopenharmony_ci        while self.need_more_tokens():
116a5f9918aSopenharmony_ci            self.fetch_more_tokens()
117a5f9918aSopenharmony_ci        if self.tokens:
118a5f9918aSopenharmony_ci            if not choices:
119a5f9918aSopenharmony_ci                return True
120a5f9918aSopenharmony_ci            for choice in choices:
121a5f9918aSopenharmony_ci                if isinstance(self.tokens[0], choice):
122a5f9918aSopenharmony_ci                    return True
123a5f9918aSopenharmony_ci        return False
124a5f9918aSopenharmony_ci
125a5f9918aSopenharmony_ci    def peek_token(self):
126a5f9918aSopenharmony_ci        # Return the next token, but do not delete if from the queue.
127a5f9918aSopenharmony_ci        # Return None if no more tokens.
128a5f9918aSopenharmony_ci        while self.need_more_tokens():
129a5f9918aSopenharmony_ci            self.fetch_more_tokens()
130a5f9918aSopenharmony_ci        if self.tokens:
131a5f9918aSopenharmony_ci            return self.tokens[0]
132a5f9918aSopenharmony_ci        else:
133a5f9918aSopenharmony_ci            return None
134a5f9918aSopenharmony_ci
135a5f9918aSopenharmony_ci    def get_token(self):
136a5f9918aSopenharmony_ci        # Return the next token.
137a5f9918aSopenharmony_ci        while self.need_more_tokens():
138a5f9918aSopenharmony_ci            self.fetch_more_tokens()
139a5f9918aSopenharmony_ci        if self.tokens:
140a5f9918aSopenharmony_ci            self.tokens_taken += 1
141a5f9918aSopenharmony_ci            return self.tokens.pop(0)
142a5f9918aSopenharmony_ci
143a5f9918aSopenharmony_ci    # Private methods.
144a5f9918aSopenharmony_ci
145a5f9918aSopenharmony_ci    def need_more_tokens(self):
146a5f9918aSopenharmony_ci        if self.done:
147a5f9918aSopenharmony_ci            return False
148a5f9918aSopenharmony_ci        if not self.tokens:
149a5f9918aSopenharmony_ci            return True
150a5f9918aSopenharmony_ci        # The current token may be a potential simple key, so we
151a5f9918aSopenharmony_ci        # need to look further.
152a5f9918aSopenharmony_ci        self.stale_possible_simple_keys()
153a5f9918aSopenharmony_ci        if self.next_possible_simple_key() == self.tokens_taken:
154a5f9918aSopenharmony_ci            return True
155a5f9918aSopenharmony_ci
156a5f9918aSopenharmony_ci    def fetch_more_tokens(self):
157a5f9918aSopenharmony_ci
158a5f9918aSopenharmony_ci        # Eat whitespaces and comments until we reach the next token.
159a5f9918aSopenharmony_ci        self.scan_to_next_token()
160a5f9918aSopenharmony_ci
161a5f9918aSopenharmony_ci        # Remove obsolete possible simple keys.
162a5f9918aSopenharmony_ci        self.stale_possible_simple_keys()
163a5f9918aSopenharmony_ci
164a5f9918aSopenharmony_ci        # Compare the current indentation and column. It may add some tokens
165a5f9918aSopenharmony_ci        # and decrease the current indentation level.
166a5f9918aSopenharmony_ci        self.unwind_indent(self.column)
167a5f9918aSopenharmony_ci
168a5f9918aSopenharmony_ci        # Peek the next character.
169a5f9918aSopenharmony_ci        ch = self.peek()
170a5f9918aSopenharmony_ci
171a5f9918aSopenharmony_ci        # Is it the end of stream?
172a5f9918aSopenharmony_ci        if ch == '\0':
173a5f9918aSopenharmony_ci            return self.fetch_stream_end()
174a5f9918aSopenharmony_ci
175a5f9918aSopenharmony_ci        # Is it a directive?
176a5f9918aSopenharmony_ci        if ch == '%' and self.check_directive():
177a5f9918aSopenharmony_ci            return self.fetch_directive()
178a5f9918aSopenharmony_ci
179a5f9918aSopenharmony_ci        # Is it the document start?
180a5f9918aSopenharmony_ci        if ch == '-' and self.check_document_start():
181a5f9918aSopenharmony_ci            return self.fetch_document_start()
182a5f9918aSopenharmony_ci
183a5f9918aSopenharmony_ci        # Is it the document end?
184a5f9918aSopenharmony_ci        if ch == '.' and self.check_document_end():
185a5f9918aSopenharmony_ci            return self.fetch_document_end()
186a5f9918aSopenharmony_ci
187a5f9918aSopenharmony_ci        # TODO: support for BOM within a stream.
188a5f9918aSopenharmony_ci        #if ch == '\uFEFF':
189a5f9918aSopenharmony_ci        #    return self.fetch_bom()    <-- issue BOMToken
190a5f9918aSopenharmony_ci
191a5f9918aSopenharmony_ci        # Note: the order of the following checks is NOT significant.
192a5f9918aSopenharmony_ci
193a5f9918aSopenharmony_ci        # Is it the flow sequence start indicator?
194a5f9918aSopenharmony_ci        if ch == '[':
195a5f9918aSopenharmony_ci            return self.fetch_flow_sequence_start()
196a5f9918aSopenharmony_ci
197a5f9918aSopenharmony_ci        # Is it the flow mapping start indicator?
198a5f9918aSopenharmony_ci        if ch == '{':
199a5f9918aSopenharmony_ci            return self.fetch_flow_mapping_start()
200a5f9918aSopenharmony_ci
201a5f9918aSopenharmony_ci        # Is it the flow sequence end indicator?
202a5f9918aSopenharmony_ci        if ch == ']':
203a5f9918aSopenharmony_ci            return self.fetch_flow_sequence_end()
204a5f9918aSopenharmony_ci
205a5f9918aSopenharmony_ci        # Is it the flow mapping end indicator?
206a5f9918aSopenharmony_ci        if ch == '}':
207a5f9918aSopenharmony_ci            return self.fetch_flow_mapping_end()
208a5f9918aSopenharmony_ci
209a5f9918aSopenharmony_ci        # Is it the flow entry indicator?
210a5f9918aSopenharmony_ci        if ch == ',':
211a5f9918aSopenharmony_ci            return self.fetch_flow_entry()
212a5f9918aSopenharmony_ci
213a5f9918aSopenharmony_ci        # Is it the block entry indicator?
214a5f9918aSopenharmony_ci        if ch == '-' and self.check_block_entry():
215a5f9918aSopenharmony_ci            return self.fetch_block_entry()
216a5f9918aSopenharmony_ci
217a5f9918aSopenharmony_ci        # Is it the key indicator?
218a5f9918aSopenharmony_ci        if ch == '?' and self.check_key():
219a5f9918aSopenharmony_ci            return self.fetch_key()
220a5f9918aSopenharmony_ci
221a5f9918aSopenharmony_ci        # Is it the value indicator?
222a5f9918aSopenharmony_ci        if ch == ':' and self.check_value():
223a5f9918aSopenharmony_ci            return self.fetch_value()
224a5f9918aSopenharmony_ci
225a5f9918aSopenharmony_ci        # Is it an alias?
226a5f9918aSopenharmony_ci        if ch == '*':
227a5f9918aSopenharmony_ci            return self.fetch_alias()
228a5f9918aSopenharmony_ci
229a5f9918aSopenharmony_ci        # Is it an anchor?
230a5f9918aSopenharmony_ci        if ch == '&':
231a5f9918aSopenharmony_ci            return self.fetch_anchor()
232a5f9918aSopenharmony_ci
233a5f9918aSopenharmony_ci        # Is it a tag?
234a5f9918aSopenharmony_ci        if ch == '!':
235a5f9918aSopenharmony_ci            return self.fetch_tag()
236a5f9918aSopenharmony_ci
237a5f9918aSopenharmony_ci        # Is it a literal scalar?
238a5f9918aSopenharmony_ci        if ch == '|' and not self.flow_level:
239a5f9918aSopenharmony_ci            return self.fetch_literal()
240a5f9918aSopenharmony_ci
241a5f9918aSopenharmony_ci        # Is it a folded scalar?
242a5f9918aSopenharmony_ci        if ch == '>' and not self.flow_level:
243a5f9918aSopenharmony_ci            return self.fetch_folded()
244a5f9918aSopenharmony_ci
245a5f9918aSopenharmony_ci        # Is it a single quoted scalar?
246a5f9918aSopenharmony_ci        if ch == '\'':
247a5f9918aSopenharmony_ci            return self.fetch_single()
248a5f9918aSopenharmony_ci
249a5f9918aSopenharmony_ci        # Is it a double quoted scalar?
250a5f9918aSopenharmony_ci        if ch == '\"':
251a5f9918aSopenharmony_ci            return self.fetch_double()
252a5f9918aSopenharmony_ci
253a5f9918aSopenharmony_ci        # It must be a plain scalar then.
254a5f9918aSopenharmony_ci        if self.check_plain():
255a5f9918aSopenharmony_ci            return self.fetch_plain()
256a5f9918aSopenharmony_ci
257a5f9918aSopenharmony_ci        # No? It's an error. Let's produce a nice error message.
258a5f9918aSopenharmony_ci        raise ScannerError("while scanning for the next token", None,
259a5f9918aSopenharmony_ci                "found character %r that cannot start any token" % ch,
260a5f9918aSopenharmony_ci                self.get_mark())
261a5f9918aSopenharmony_ci
262a5f9918aSopenharmony_ci    # Simple keys treatment.
263a5f9918aSopenharmony_ci
264a5f9918aSopenharmony_ci    def next_possible_simple_key(self):
265a5f9918aSopenharmony_ci        # Return the number of the nearest possible simple key. Actually we
266a5f9918aSopenharmony_ci        # don't need to loop through the whole dictionary. We may replace it
267a5f9918aSopenharmony_ci        # with the following code:
268a5f9918aSopenharmony_ci        #   if not self.possible_simple_keys:
269a5f9918aSopenharmony_ci        #       return None
270a5f9918aSopenharmony_ci        #   return self.possible_simple_keys[
271a5f9918aSopenharmony_ci        #           min(self.possible_simple_keys.keys())].token_number
272a5f9918aSopenharmony_ci        min_token_number = None
273a5f9918aSopenharmony_ci        for level in self.possible_simple_keys:
274a5f9918aSopenharmony_ci            key = self.possible_simple_keys[level]
275a5f9918aSopenharmony_ci            if min_token_number is None or key.token_number < min_token_number:
276a5f9918aSopenharmony_ci                min_token_number = key.token_number
277a5f9918aSopenharmony_ci        return min_token_number
278a5f9918aSopenharmony_ci
279a5f9918aSopenharmony_ci    def stale_possible_simple_keys(self):
280a5f9918aSopenharmony_ci        # Remove entries that are no longer possible simple keys. According to
281a5f9918aSopenharmony_ci        # the YAML specification, simple keys
282a5f9918aSopenharmony_ci        # - should be limited to a single line,
283a5f9918aSopenharmony_ci        # - should be no longer than 1024 characters.
284a5f9918aSopenharmony_ci        # Disabling this procedure will allow simple keys of any length and
285a5f9918aSopenharmony_ci        # height (may cause problems if indentation is broken though).
286a5f9918aSopenharmony_ci        for level in list(self.possible_simple_keys):
287a5f9918aSopenharmony_ci            key = self.possible_simple_keys[level]
288a5f9918aSopenharmony_ci            if key.line != self.line  \
289a5f9918aSopenharmony_ci                    or self.index-key.index > 1024:
290a5f9918aSopenharmony_ci                if key.required:
291a5f9918aSopenharmony_ci                    raise ScannerError("while scanning a simple key", key.mark,
292a5f9918aSopenharmony_ci                            "could not find expected ':'", self.get_mark())
293a5f9918aSopenharmony_ci                del self.possible_simple_keys[level]
294a5f9918aSopenharmony_ci
295a5f9918aSopenharmony_ci    def save_possible_simple_key(self):
296a5f9918aSopenharmony_ci        # The next token may start a simple key. We check if it's possible
297a5f9918aSopenharmony_ci        # and save its position. This function is called for
298a5f9918aSopenharmony_ci        #   ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.
299a5f9918aSopenharmony_ci
300a5f9918aSopenharmony_ci        # Check if a simple key is required at the current position.
301a5f9918aSopenharmony_ci        required = not self.flow_level and self.indent == self.column
302a5f9918aSopenharmony_ci
303a5f9918aSopenharmony_ci        # The next token might be a simple key. Let's save it's number and
304a5f9918aSopenharmony_ci        # position.
305a5f9918aSopenharmony_ci        if self.allow_simple_key:
306a5f9918aSopenharmony_ci            self.remove_possible_simple_key()
307a5f9918aSopenharmony_ci            token_number = self.tokens_taken+len(self.tokens)
308a5f9918aSopenharmony_ci            key = SimpleKey(token_number, required,
309a5f9918aSopenharmony_ci                    self.index, self.line, self.column, self.get_mark())
310a5f9918aSopenharmony_ci            self.possible_simple_keys[self.flow_level] = key
311a5f9918aSopenharmony_ci
312a5f9918aSopenharmony_ci    def remove_possible_simple_key(self):
313a5f9918aSopenharmony_ci        # Remove the saved possible key position at the current flow level.
314a5f9918aSopenharmony_ci        if self.flow_level in self.possible_simple_keys:
315a5f9918aSopenharmony_ci            key = self.possible_simple_keys[self.flow_level]
316a5f9918aSopenharmony_ci
317a5f9918aSopenharmony_ci            if key.required:
318a5f9918aSopenharmony_ci                raise ScannerError("while scanning a simple key", key.mark,
319a5f9918aSopenharmony_ci                        "could not find expected ':'", self.get_mark())
320a5f9918aSopenharmony_ci
321a5f9918aSopenharmony_ci            del self.possible_simple_keys[self.flow_level]
322a5f9918aSopenharmony_ci
323a5f9918aSopenharmony_ci    # Indentation functions.
324a5f9918aSopenharmony_ci
325a5f9918aSopenharmony_ci    def unwind_indent(self, column):
326a5f9918aSopenharmony_ci
327a5f9918aSopenharmony_ci        ## In flow context, tokens should respect indentation.
328a5f9918aSopenharmony_ci        ## Actually the condition should be `self.indent >= column` according to
329a5f9918aSopenharmony_ci        ## the spec. But this condition will prohibit intuitively correct
330a5f9918aSopenharmony_ci        ## constructions such as
331a5f9918aSopenharmony_ci        ## key : {
332a5f9918aSopenharmony_ci        ## }
333a5f9918aSopenharmony_ci        #if self.flow_level and self.indent > column:
334a5f9918aSopenharmony_ci        #    raise ScannerError(None, None,
335a5f9918aSopenharmony_ci        #            "invalid indentation or unclosed '[' or '{'",
336a5f9918aSopenharmony_ci        #            self.get_mark())
337a5f9918aSopenharmony_ci
338a5f9918aSopenharmony_ci        # In the flow context, indentation is ignored. We make the scanner less
339a5f9918aSopenharmony_ci        # restrictive then specification requires.
340a5f9918aSopenharmony_ci        if self.flow_level:
341a5f9918aSopenharmony_ci            return
342a5f9918aSopenharmony_ci
343a5f9918aSopenharmony_ci        # In block context, we may need to issue the BLOCK-END tokens.
344a5f9918aSopenharmony_ci        while self.indent > column:
345a5f9918aSopenharmony_ci            mark = self.get_mark()
346a5f9918aSopenharmony_ci            self.indent = self.indents.pop()
347a5f9918aSopenharmony_ci            self.tokens.append(BlockEndToken(mark, mark))
348a5f9918aSopenharmony_ci
349a5f9918aSopenharmony_ci    def add_indent(self, column):
350a5f9918aSopenharmony_ci        # Check if we need to increase indentation.
351a5f9918aSopenharmony_ci        if self.indent < column:
352a5f9918aSopenharmony_ci            self.indents.append(self.indent)
353a5f9918aSopenharmony_ci            self.indent = column
354a5f9918aSopenharmony_ci            return True
355a5f9918aSopenharmony_ci        return False
356a5f9918aSopenharmony_ci
357a5f9918aSopenharmony_ci    # Fetchers.
358a5f9918aSopenharmony_ci
359a5f9918aSopenharmony_ci    def fetch_stream_start(self):
360a5f9918aSopenharmony_ci        # We always add STREAM-START as the first token and STREAM-END as the
361a5f9918aSopenharmony_ci        # last token.
362a5f9918aSopenharmony_ci
363a5f9918aSopenharmony_ci        # Read the token.
364a5f9918aSopenharmony_ci        mark = self.get_mark()
365a5f9918aSopenharmony_ci
366a5f9918aSopenharmony_ci        # Add STREAM-START.
367a5f9918aSopenharmony_ci        self.tokens.append(StreamStartToken(mark, mark,
368a5f9918aSopenharmony_ci            encoding=self.encoding))
369a5f9918aSopenharmony_ci
370a5f9918aSopenharmony_ci
371a5f9918aSopenharmony_ci    def fetch_stream_end(self):
372a5f9918aSopenharmony_ci
373a5f9918aSopenharmony_ci        # Set the current indentation to -1.
374a5f9918aSopenharmony_ci        self.unwind_indent(-1)
375a5f9918aSopenharmony_ci
376a5f9918aSopenharmony_ci        # Reset simple keys.
377a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
378a5f9918aSopenharmony_ci        self.allow_simple_key = False
379a5f9918aSopenharmony_ci        self.possible_simple_keys = {}
380a5f9918aSopenharmony_ci
381a5f9918aSopenharmony_ci        # Read the token.
382a5f9918aSopenharmony_ci        mark = self.get_mark()
383a5f9918aSopenharmony_ci
384a5f9918aSopenharmony_ci        # Add STREAM-END.
385a5f9918aSopenharmony_ci        self.tokens.append(StreamEndToken(mark, mark))
386a5f9918aSopenharmony_ci
387a5f9918aSopenharmony_ci        # The steam is finished.
388a5f9918aSopenharmony_ci        self.done = True
389a5f9918aSopenharmony_ci
390a5f9918aSopenharmony_ci    def fetch_directive(self):
391a5f9918aSopenharmony_ci
392a5f9918aSopenharmony_ci        # Set the current indentation to -1.
393a5f9918aSopenharmony_ci        self.unwind_indent(-1)
394a5f9918aSopenharmony_ci
395a5f9918aSopenharmony_ci        # Reset simple keys.
396a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
397a5f9918aSopenharmony_ci        self.allow_simple_key = False
398a5f9918aSopenharmony_ci
399a5f9918aSopenharmony_ci        # Scan and add DIRECTIVE.
400a5f9918aSopenharmony_ci        self.tokens.append(self.scan_directive())
401a5f9918aSopenharmony_ci
402a5f9918aSopenharmony_ci    def fetch_document_start(self):
403a5f9918aSopenharmony_ci        self.fetch_document_indicator(DocumentStartToken)
404a5f9918aSopenharmony_ci
405a5f9918aSopenharmony_ci    def fetch_document_end(self):
406a5f9918aSopenharmony_ci        self.fetch_document_indicator(DocumentEndToken)
407a5f9918aSopenharmony_ci
408a5f9918aSopenharmony_ci    def fetch_document_indicator(self, TokenClass):
409a5f9918aSopenharmony_ci
410a5f9918aSopenharmony_ci        # Set the current indentation to -1.
411a5f9918aSopenharmony_ci        self.unwind_indent(-1)
412a5f9918aSopenharmony_ci
413a5f9918aSopenharmony_ci        # Reset simple keys. Note that there could not be a block collection
414a5f9918aSopenharmony_ci        # after '---'.
415a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
416a5f9918aSopenharmony_ci        self.allow_simple_key = False
417a5f9918aSopenharmony_ci
418a5f9918aSopenharmony_ci        # Add DOCUMENT-START or DOCUMENT-END.
419a5f9918aSopenharmony_ci        start_mark = self.get_mark()
420a5f9918aSopenharmony_ci        self.forward(3)
421a5f9918aSopenharmony_ci        end_mark = self.get_mark()
422a5f9918aSopenharmony_ci        self.tokens.append(TokenClass(start_mark, end_mark))
423a5f9918aSopenharmony_ci
424a5f9918aSopenharmony_ci    def fetch_flow_sequence_start(self):
425a5f9918aSopenharmony_ci        self.fetch_flow_collection_start(FlowSequenceStartToken)
426a5f9918aSopenharmony_ci
427a5f9918aSopenharmony_ci    def fetch_flow_mapping_start(self):
428a5f9918aSopenharmony_ci        self.fetch_flow_collection_start(FlowMappingStartToken)
429a5f9918aSopenharmony_ci
430a5f9918aSopenharmony_ci    def fetch_flow_collection_start(self, TokenClass):
431a5f9918aSopenharmony_ci
432a5f9918aSopenharmony_ci        # '[' and '{' may start a simple key.
433a5f9918aSopenharmony_ci        self.save_possible_simple_key()
434a5f9918aSopenharmony_ci
435a5f9918aSopenharmony_ci        # Increase the flow level.
436a5f9918aSopenharmony_ci        self.flow_level += 1
437a5f9918aSopenharmony_ci
438a5f9918aSopenharmony_ci        # Simple keys are allowed after '[' and '{'.
439a5f9918aSopenharmony_ci        self.allow_simple_key = True
440a5f9918aSopenharmony_ci
441a5f9918aSopenharmony_ci        # Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.
442a5f9918aSopenharmony_ci        start_mark = self.get_mark()
443a5f9918aSopenharmony_ci        self.forward()
444a5f9918aSopenharmony_ci        end_mark = self.get_mark()
445a5f9918aSopenharmony_ci        self.tokens.append(TokenClass(start_mark, end_mark))
446a5f9918aSopenharmony_ci
447a5f9918aSopenharmony_ci    def fetch_flow_sequence_end(self):
448a5f9918aSopenharmony_ci        self.fetch_flow_collection_end(FlowSequenceEndToken)
449a5f9918aSopenharmony_ci
450a5f9918aSopenharmony_ci    def fetch_flow_mapping_end(self):
451a5f9918aSopenharmony_ci        self.fetch_flow_collection_end(FlowMappingEndToken)
452a5f9918aSopenharmony_ci
453a5f9918aSopenharmony_ci    def fetch_flow_collection_end(self, TokenClass):
454a5f9918aSopenharmony_ci
455a5f9918aSopenharmony_ci        # Reset possible simple key on the current level.
456a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
457a5f9918aSopenharmony_ci
458a5f9918aSopenharmony_ci        # Decrease the flow level.
459a5f9918aSopenharmony_ci        self.flow_level -= 1
460a5f9918aSopenharmony_ci
461a5f9918aSopenharmony_ci        # No simple keys after ']' or '}'.
462a5f9918aSopenharmony_ci        self.allow_simple_key = False
463a5f9918aSopenharmony_ci
464a5f9918aSopenharmony_ci        # Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.
465a5f9918aSopenharmony_ci        start_mark = self.get_mark()
466a5f9918aSopenharmony_ci        self.forward()
467a5f9918aSopenharmony_ci        end_mark = self.get_mark()
468a5f9918aSopenharmony_ci        self.tokens.append(TokenClass(start_mark, end_mark))
469a5f9918aSopenharmony_ci
470a5f9918aSopenharmony_ci    def fetch_flow_entry(self):
471a5f9918aSopenharmony_ci
472a5f9918aSopenharmony_ci        # Simple keys are allowed after ','.
473a5f9918aSopenharmony_ci        self.allow_simple_key = True
474a5f9918aSopenharmony_ci
475a5f9918aSopenharmony_ci        # Reset possible simple key on the current level.
476a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
477a5f9918aSopenharmony_ci
478a5f9918aSopenharmony_ci        # Add FLOW-ENTRY.
479a5f9918aSopenharmony_ci        start_mark = self.get_mark()
480a5f9918aSopenharmony_ci        self.forward()
481a5f9918aSopenharmony_ci        end_mark = self.get_mark()
482a5f9918aSopenharmony_ci        self.tokens.append(FlowEntryToken(start_mark, end_mark))
483a5f9918aSopenharmony_ci
484a5f9918aSopenharmony_ci    def fetch_block_entry(self):
485a5f9918aSopenharmony_ci
486a5f9918aSopenharmony_ci        # Block context needs additional checks.
487a5f9918aSopenharmony_ci        if not self.flow_level:
488a5f9918aSopenharmony_ci
489a5f9918aSopenharmony_ci            # Are we allowed to start a new entry?
490a5f9918aSopenharmony_ci            if not self.allow_simple_key:
491a5f9918aSopenharmony_ci                raise ScannerError(None, None,
492a5f9918aSopenharmony_ci                        "sequence entries are not allowed here",
493a5f9918aSopenharmony_ci                        self.get_mark())
494a5f9918aSopenharmony_ci
495a5f9918aSopenharmony_ci            # We may need to add BLOCK-SEQUENCE-START.
496a5f9918aSopenharmony_ci            if self.add_indent(self.column):
497a5f9918aSopenharmony_ci                mark = self.get_mark()
498a5f9918aSopenharmony_ci                self.tokens.append(BlockSequenceStartToken(mark, mark))
499a5f9918aSopenharmony_ci
500a5f9918aSopenharmony_ci        # It's an error for the block entry to occur in the flow context,
501a5f9918aSopenharmony_ci        # but we let the parser detect this.
502a5f9918aSopenharmony_ci        else:
503a5f9918aSopenharmony_ci            pass
504a5f9918aSopenharmony_ci
505a5f9918aSopenharmony_ci        # Simple keys are allowed after '-'.
506a5f9918aSopenharmony_ci        self.allow_simple_key = True
507a5f9918aSopenharmony_ci
508a5f9918aSopenharmony_ci        # Reset possible simple key on the current level.
509a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
510a5f9918aSopenharmony_ci
511a5f9918aSopenharmony_ci        # Add BLOCK-ENTRY.
512a5f9918aSopenharmony_ci        start_mark = self.get_mark()
513a5f9918aSopenharmony_ci        self.forward()
514a5f9918aSopenharmony_ci        end_mark = self.get_mark()
515a5f9918aSopenharmony_ci        self.tokens.append(BlockEntryToken(start_mark, end_mark))
516a5f9918aSopenharmony_ci
517a5f9918aSopenharmony_ci    def fetch_key(self):
518a5f9918aSopenharmony_ci
519a5f9918aSopenharmony_ci        # Block context needs additional checks.
520a5f9918aSopenharmony_ci        if not self.flow_level:
521a5f9918aSopenharmony_ci
522a5f9918aSopenharmony_ci            # Are we allowed to start a key (not necessary a simple)?
523a5f9918aSopenharmony_ci            if not self.allow_simple_key:
524a5f9918aSopenharmony_ci                raise ScannerError(None, None,
525a5f9918aSopenharmony_ci                        "mapping keys are not allowed here",
526a5f9918aSopenharmony_ci                        self.get_mark())
527a5f9918aSopenharmony_ci
528a5f9918aSopenharmony_ci            # We may need to add BLOCK-MAPPING-START.
529a5f9918aSopenharmony_ci            if self.add_indent(self.column):
530a5f9918aSopenharmony_ci                mark = self.get_mark()
531a5f9918aSopenharmony_ci                self.tokens.append(BlockMappingStartToken(mark, mark))
532a5f9918aSopenharmony_ci
533a5f9918aSopenharmony_ci        # Simple keys are allowed after '?' in the block context.
534a5f9918aSopenharmony_ci        self.allow_simple_key = not self.flow_level
535a5f9918aSopenharmony_ci
536a5f9918aSopenharmony_ci        # Reset possible simple key on the current level.
537a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
538a5f9918aSopenharmony_ci
539a5f9918aSopenharmony_ci        # Add KEY.
540a5f9918aSopenharmony_ci        start_mark = self.get_mark()
541a5f9918aSopenharmony_ci        self.forward()
542a5f9918aSopenharmony_ci        end_mark = self.get_mark()
543a5f9918aSopenharmony_ci        self.tokens.append(KeyToken(start_mark, end_mark))
544a5f9918aSopenharmony_ci
545a5f9918aSopenharmony_ci    def fetch_value(self):
546a5f9918aSopenharmony_ci
547a5f9918aSopenharmony_ci        # Do we determine a simple key?
548a5f9918aSopenharmony_ci        if self.flow_level in self.possible_simple_keys:
549a5f9918aSopenharmony_ci
550a5f9918aSopenharmony_ci            # Add KEY.
551a5f9918aSopenharmony_ci            key = self.possible_simple_keys[self.flow_level]
552a5f9918aSopenharmony_ci            del self.possible_simple_keys[self.flow_level]
553a5f9918aSopenharmony_ci            self.tokens.insert(key.token_number-self.tokens_taken,
554a5f9918aSopenharmony_ci                    KeyToken(key.mark, key.mark))
555a5f9918aSopenharmony_ci
556a5f9918aSopenharmony_ci            # If this key starts a new block mapping, we need to add
557a5f9918aSopenharmony_ci            # BLOCK-MAPPING-START.
558a5f9918aSopenharmony_ci            if not self.flow_level:
559a5f9918aSopenharmony_ci                if self.add_indent(key.column):
560a5f9918aSopenharmony_ci                    self.tokens.insert(key.token_number-self.tokens_taken,
561a5f9918aSopenharmony_ci                            BlockMappingStartToken(key.mark, key.mark))
562a5f9918aSopenharmony_ci
563a5f9918aSopenharmony_ci            # There cannot be two simple keys one after another.
564a5f9918aSopenharmony_ci            self.allow_simple_key = False
565a5f9918aSopenharmony_ci
566a5f9918aSopenharmony_ci        # It must be a part of a complex key.
567a5f9918aSopenharmony_ci        else:
568a5f9918aSopenharmony_ci
569a5f9918aSopenharmony_ci            # Block context needs additional checks.
570a5f9918aSopenharmony_ci            # (Do we really need them? They will be caught by the parser
571a5f9918aSopenharmony_ci            # anyway.)
572a5f9918aSopenharmony_ci            if not self.flow_level:
573a5f9918aSopenharmony_ci
574a5f9918aSopenharmony_ci                # We are allowed to start a complex value if and only if
575a5f9918aSopenharmony_ci                # we can start a simple key.
576a5f9918aSopenharmony_ci                if not self.allow_simple_key:
577a5f9918aSopenharmony_ci                    raise ScannerError(None, None,
578a5f9918aSopenharmony_ci                            "mapping values are not allowed here",
579a5f9918aSopenharmony_ci                            self.get_mark())
580a5f9918aSopenharmony_ci
581a5f9918aSopenharmony_ci            # If this value starts a new block mapping, we need to add
582a5f9918aSopenharmony_ci            # BLOCK-MAPPING-START.  It will be detected as an error later by
583a5f9918aSopenharmony_ci            # the parser.
584a5f9918aSopenharmony_ci            if not self.flow_level:
585a5f9918aSopenharmony_ci                if self.add_indent(self.column):
586a5f9918aSopenharmony_ci                    mark = self.get_mark()
587a5f9918aSopenharmony_ci                    self.tokens.append(BlockMappingStartToken(mark, mark))
588a5f9918aSopenharmony_ci
589a5f9918aSopenharmony_ci            # Simple keys are allowed after ':' in the block context.
590a5f9918aSopenharmony_ci            self.allow_simple_key = not self.flow_level
591a5f9918aSopenharmony_ci
592a5f9918aSopenharmony_ci            # Reset possible simple key on the current level.
593a5f9918aSopenharmony_ci            self.remove_possible_simple_key()
594a5f9918aSopenharmony_ci
595a5f9918aSopenharmony_ci        # Add VALUE.
596a5f9918aSopenharmony_ci        start_mark = self.get_mark()
597a5f9918aSopenharmony_ci        self.forward()
598a5f9918aSopenharmony_ci        end_mark = self.get_mark()
599a5f9918aSopenharmony_ci        self.tokens.append(ValueToken(start_mark, end_mark))
600a5f9918aSopenharmony_ci
601a5f9918aSopenharmony_ci    def fetch_alias(self):
602a5f9918aSopenharmony_ci
603a5f9918aSopenharmony_ci        # ALIAS could be a simple key.
604a5f9918aSopenharmony_ci        self.save_possible_simple_key()
605a5f9918aSopenharmony_ci
606a5f9918aSopenharmony_ci        # No simple keys after ALIAS.
607a5f9918aSopenharmony_ci        self.allow_simple_key = False
608a5f9918aSopenharmony_ci
609a5f9918aSopenharmony_ci        # Scan and add ALIAS.
610a5f9918aSopenharmony_ci        self.tokens.append(self.scan_anchor(AliasToken))
611a5f9918aSopenharmony_ci
612a5f9918aSopenharmony_ci    def fetch_anchor(self):
613a5f9918aSopenharmony_ci
614a5f9918aSopenharmony_ci        # ANCHOR could start a simple key.
615a5f9918aSopenharmony_ci        self.save_possible_simple_key()
616a5f9918aSopenharmony_ci
617a5f9918aSopenharmony_ci        # No simple keys after ANCHOR.
618a5f9918aSopenharmony_ci        self.allow_simple_key = False
619a5f9918aSopenharmony_ci
620a5f9918aSopenharmony_ci        # Scan and add ANCHOR.
621a5f9918aSopenharmony_ci        self.tokens.append(self.scan_anchor(AnchorToken))
622a5f9918aSopenharmony_ci
623a5f9918aSopenharmony_ci    def fetch_tag(self):
624a5f9918aSopenharmony_ci
625a5f9918aSopenharmony_ci        # TAG could start a simple key.
626a5f9918aSopenharmony_ci        self.save_possible_simple_key()
627a5f9918aSopenharmony_ci
628a5f9918aSopenharmony_ci        # No simple keys after TAG.
629a5f9918aSopenharmony_ci        self.allow_simple_key = False
630a5f9918aSopenharmony_ci
631a5f9918aSopenharmony_ci        # Scan and add TAG.
632a5f9918aSopenharmony_ci        self.tokens.append(self.scan_tag())
633a5f9918aSopenharmony_ci
634a5f9918aSopenharmony_ci    def fetch_literal(self):
635a5f9918aSopenharmony_ci        self.fetch_block_scalar(style='|')
636a5f9918aSopenharmony_ci
637a5f9918aSopenharmony_ci    def fetch_folded(self):
638a5f9918aSopenharmony_ci        self.fetch_block_scalar(style='>')
639a5f9918aSopenharmony_ci
640a5f9918aSopenharmony_ci    def fetch_block_scalar(self, style):
641a5f9918aSopenharmony_ci
642a5f9918aSopenharmony_ci        # A simple key may follow a block scalar.
643a5f9918aSopenharmony_ci        self.allow_simple_key = True
644a5f9918aSopenharmony_ci
645a5f9918aSopenharmony_ci        # Reset possible simple key on the current level.
646a5f9918aSopenharmony_ci        self.remove_possible_simple_key()
647a5f9918aSopenharmony_ci
648a5f9918aSopenharmony_ci        # Scan and add SCALAR.
649a5f9918aSopenharmony_ci        self.tokens.append(self.scan_block_scalar(style))
650a5f9918aSopenharmony_ci
651a5f9918aSopenharmony_ci    def fetch_single(self):
652a5f9918aSopenharmony_ci        self.fetch_flow_scalar(style='\'')
653a5f9918aSopenharmony_ci
654a5f9918aSopenharmony_ci    def fetch_double(self):
655a5f9918aSopenharmony_ci        self.fetch_flow_scalar(style='"')
656a5f9918aSopenharmony_ci
657a5f9918aSopenharmony_ci    def fetch_flow_scalar(self, style):
658a5f9918aSopenharmony_ci
659a5f9918aSopenharmony_ci        # A flow scalar could be a simple key.
660a5f9918aSopenharmony_ci        self.save_possible_simple_key()
661a5f9918aSopenharmony_ci
662a5f9918aSopenharmony_ci        # No simple keys after flow scalars.
663a5f9918aSopenharmony_ci        self.allow_simple_key = False
664a5f9918aSopenharmony_ci
665a5f9918aSopenharmony_ci        # Scan and add SCALAR.
666a5f9918aSopenharmony_ci        self.tokens.append(self.scan_flow_scalar(style))
667a5f9918aSopenharmony_ci
668a5f9918aSopenharmony_ci    def fetch_plain(self):
669a5f9918aSopenharmony_ci
670a5f9918aSopenharmony_ci        # A plain scalar could be a simple key.
671a5f9918aSopenharmony_ci        self.save_possible_simple_key()
672a5f9918aSopenharmony_ci
673a5f9918aSopenharmony_ci        # No simple keys after plain scalars. But note that `scan_plain` will
674a5f9918aSopenharmony_ci        # change this flag if the scan is finished at the beginning of the
675a5f9918aSopenharmony_ci        # line.
676a5f9918aSopenharmony_ci        self.allow_simple_key = False
677a5f9918aSopenharmony_ci
678a5f9918aSopenharmony_ci        # Scan and add SCALAR. May change `allow_simple_key`.
679a5f9918aSopenharmony_ci        self.tokens.append(self.scan_plain())
680a5f9918aSopenharmony_ci
681a5f9918aSopenharmony_ci    # Checkers.
682a5f9918aSopenharmony_ci
683a5f9918aSopenharmony_ci    def check_directive(self):
684a5f9918aSopenharmony_ci
685a5f9918aSopenharmony_ci        # DIRECTIVE:        ^ '%' ...
686a5f9918aSopenharmony_ci        # The '%' indicator is already checked.
687a5f9918aSopenharmony_ci        if self.column == 0:
688a5f9918aSopenharmony_ci            return True
689a5f9918aSopenharmony_ci
690a5f9918aSopenharmony_ci    def check_document_start(self):
691a5f9918aSopenharmony_ci
692a5f9918aSopenharmony_ci        # DOCUMENT-START:   ^ '---' (' '|'\n')
693a5f9918aSopenharmony_ci        if self.column == 0:
694a5f9918aSopenharmony_ci            if self.prefix(3) == '---'  \
695a5f9918aSopenharmony_ci                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
696a5f9918aSopenharmony_ci                return True
697a5f9918aSopenharmony_ci
698a5f9918aSopenharmony_ci    def check_document_end(self):
699a5f9918aSopenharmony_ci
700a5f9918aSopenharmony_ci        # DOCUMENT-END:     ^ '...' (' '|'\n')
701a5f9918aSopenharmony_ci        if self.column == 0:
702a5f9918aSopenharmony_ci            if self.prefix(3) == '...'  \
703a5f9918aSopenharmony_ci                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
704a5f9918aSopenharmony_ci                return True
705a5f9918aSopenharmony_ci
706a5f9918aSopenharmony_ci    def check_block_entry(self):
707a5f9918aSopenharmony_ci
708a5f9918aSopenharmony_ci        # BLOCK-ENTRY:      '-' (' '|'\n')
709a5f9918aSopenharmony_ci        return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
710a5f9918aSopenharmony_ci
711a5f9918aSopenharmony_ci    def check_key(self):
712a5f9918aSopenharmony_ci
713a5f9918aSopenharmony_ci        # KEY(flow context):    '?'
714a5f9918aSopenharmony_ci        if self.flow_level:
715a5f9918aSopenharmony_ci            return True
716a5f9918aSopenharmony_ci
717a5f9918aSopenharmony_ci        # KEY(block context):   '?' (' '|'\n')
718a5f9918aSopenharmony_ci        else:
719a5f9918aSopenharmony_ci            return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
720a5f9918aSopenharmony_ci
721a5f9918aSopenharmony_ci    def check_value(self):
722a5f9918aSopenharmony_ci
723a5f9918aSopenharmony_ci        # VALUE(flow context):  ':'
724a5f9918aSopenharmony_ci        if self.flow_level:
725a5f9918aSopenharmony_ci            return True
726a5f9918aSopenharmony_ci
727a5f9918aSopenharmony_ci        # VALUE(block context): ':' (' '|'\n')
728a5f9918aSopenharmony_ci        else:
729a5f9918aSopenharmony_ci            return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'
730a5f9918aSopenharmony_ci
731a5f9918aSopenharmony_ci    def check_plain(self):
732a5f9918aSopenharmony_ci
733a5f9918aSopenharmony_ci        # A plain scalar may start with any non-space character except:
734a5f9918aSopenharmony_ci        #   '-', '?', ':', ',', '[', ']', '{', '}',
735a5f9918aSopenharmony_ci        #   '#', '&', '*', '!', '|', '>', '\'', '\"',
736a5f9918aSopenharmony_ci        #   '%', '@', '`'.
737a5f9918aSopenharmony_ci        #
738a5f9918aSopenharmony_ci        # It may also start with
739a5f9918aSopenharmony_ci        #   '-', '?', ':'
740a5f9918aSopenharmony_ci        # if it is followed by a non-space character.
741a5f9918aSopenharmony_ci        #
742a5f9918aSopenharmony_ci        # Note that we limit the last rule to the block context (except the
743a5f9918aSopenharmony_ci        # '-' character) because we want the flow context to be space
744a5f9918aSopenharmony_ci        # independent.
745a5f9918aSopenharmony_ci        ch = self.peek()
746a5f9918aSopenharmony_ci        return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`'  \
747a5f9918aSopenharmony_ci                or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029'
748a5f9918aSopenharmony_ci                        and (ch == '-' or (not self.flow_level and ch in '?:')))
749a5f9918aSopenharmony_ci
750a5f9918aSopenharmony_ci    # Scanners.
751a5f9918aSopenharmony_ci
752a5f9918aSopenharmony_ci    def scan_to_next_token(self):
753a5f9918aSopenharmony_ci        # We ignore spaces, line breaks and comments.
754a5f9918aSopenharmony_ci        # If we find a line break in the block context, we set the flag
755a5f9918aSopenharmony_ci        # `allow_simple_key` on.
756a5f9918aSopenharmony_ci        # The byte order mark is stripped if it's the first character in the
757a5f9918aSopenharmony_ci        # stream. We do not yet support BOM inside the stream as the
758a5f9918aSopenharmony_ci        # specification requires. Any such mark will be considered as a part
759a5f9918aSopenharmony_ci        # of the document.
760a5f9918aSopenharmony_ci        #
761a5f9918aSopenharmony_ci        # TODO: We need to make tab handling rules more sane. A good rule is
762a5f9918aSopenharmony_ci        #   Tabs cannot precede tokens
763a5f9918aSopenharmony_ci        #   BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,
764a5f9918aSopenharmony_ci        #   KEY(block), VALUE(block), BLOCK-ENTRY
765a5f9918aSopenharmony_ci        # So the checking code is
766a5f9918aSopenharmony_ci        #   if <TAB>:
767a5f9918aSopenharmony_ci        #       self.allow_simple_keys = False
768a5f9918aSopenharmony_ci        # We also need to add the check for `allow_simple_keys == True` to
769a5f9918aSopenharmony_ci        # `unwind_indent` before issuing BLOCK-END.
770a5f9918aSopenharmony_ci        # Scanners for block, flow, and plain scalars need to be modified.
771a5f9918aSopenharmony_ci
772a5f9918aSopenharmony_ci        if self.index == 0 and self.peek() == '\uFEFF':
773a5f9918aSopenharmony_ci            self.forward()
774a5f9918aSopenharmony_ci        found = False
775a5f9918aSopenharmony_ci        while not found:
776a5f9918aSopenharmony_ci            while self.peek() == ' ':
777a5f9918aSopenharmony_ci                self.forward()
778a5f9918aSopenharmony_ci            if self.peek() == '#':
779a5f9918aSopenharmony_ci                while self.peek() not in '\0\r\n\x85\u2028\u2029':
780a5f9918aSopenharmony_ci                    self.forward()
781a5f9918aSopenharmony_ci            if self.scan_line_break():
782a5f9918aSopenharmony_ci                if not self.flow_level:
783a5f9918aSopenharmony_ci                    self.allow_simple_key = True
784a5f9918aSopenharmony_ci            else:
785a5f9918aSopenharmony_ci                found = True
786a5f9918aSopenharmony_ci
787a5f9918aSopenharmony_ci    def scan_directive(self):
788a5f9918aSopenharmony_ci        # See the specification for details.
789a5f9918aSopenharmony_ci        start_mark = self.get_mark()
790a5f9918aSopenharmony_ci        self.forward()
791a5f9918aSopenharmony_ci        name = self.scan_directive_name(start_mark)
792a5f9918aSopenharmony_ci        value = None
793a5f9918aSopenharmony_ci        if name == 'YAML':
794a5f9918aSopenharmony_ci            value = self.scan_yaml_directive_value(start_mark)
795a5f9918aSopenharmony_ci            end_mark = self.get_mark()
796a5f9918aSopenharmony_ci        elif name == 'TAG':
797a5f9918aSopenharmony_ci            value = self.scan_tag_directive_value(start_mark)
798a5f9918aSopenharmony_ci            end_mark = self.get_mark()
799a5f9918aSopenharmony_ci        else:
800a5f9918aSopenharmony_ci            end_mark = self.get_mark()
801a5f9918aSopenharmony_ci            while self.peek() not in '\0\r\n\x85\u2028\u2029':
802a5f9918aSopenharmony_ci                self.forward()
803a5f9918aSopenharmony_ci        self.scan_directive_ignored_line(start_mark)
804a5f9918aSopenharmony_ci        return DirectiveToken(name, value, start_mark, end_mark)
805a5f9918aSopenharmony_ci
806a5f9918aSopenharmony_ci    def scan_directive_name(self, start_mark):
807a5f9918aSopenharmony_ci        # See the specification for details.
808a5f9918aSopenharmony_ci        length = 0
809a5f9918aSopenharmony_ci        ch = self.peek(length)
810a5f9918aSopenharmony_ci        while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
811a5f9918aSopenharmony_ci                or ch in '-_':
812a5f9918aSopenharmony_ci            length += 1
813a5f9918aSopenharmony_ci            ch = self.peek(length)
814a5f9918aSopenharmony_ci        if not length:
815a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
816a5f9918aSopenharmony_ci                    "expected alphabetic or numeric character, but found %r"
817a5f9918aSopenharmony_ci                    % ch, self.get_mark())
818a5f9918aSopenharmony_ci        value = self.prefix(length)
819a5f9918aSopenharmony_ci        self.forward(length)
820a5f9918aSopenharmony_ci        ch = self.peek()
821a5f9918aSopenharmony_ci        if ch not in '\0 \r\n\x85\u2028\u2029':
822a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
823a5f9918aSopenharmony_ci                    "expected alphabetic or numeric character, but found %r"
824a5f9918aSopenharmony_ci                    % ch, self.get_mark())
825a5f9918aSopenharmony_ci        return value
826a5f9918aSopenharmony_ci
827a5f9918aSopenharmony_ci    def scan_yaml_directive_value(self, start_mark):
828a5f9918aSopenharmony_ci        # See the specification for details.
829a5f9918aSopenharmony_ci        while self.peek() == ' ':
830a5f9918aSopenharmony_ci            self.forward()
831a5f9918aSopenharmony_ci        major = self.scan_yaml_directive_number(start_mark)
832a5f9918aSopenharmony_ci        if self.peek() != '.':
833a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
834a5f9918aSopenharmony_ci                    "expected a digit or '.', but found %r" % self.peek(),
835a5f9918aSopenharmony_ci                    self.get_mark())
836a5f9918aSopenharmony_ci        self.forward()
837a5f9918aSopenharmony_ci        minor = self.scan_yaml_directive_number(start_mark)
838a5f9918aSopenharmony_ci        if self.peek() not in '\0 \r\n\x85\u2028\u2029':
839a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
840a5f9918aSopenharmony_ci                    "expected a digit or ' ', but found %r" % self.peek(),
841a5f9918aSopenharmony_ci                    self.get_mark())
842a5f9918aSopenharmony_ci        return (major, minor)
843a5f9918aSopenharmony_ci
844a5f9918aSopenharmony_ci    def scan_yaml_directive_number(self, start_mark):
845a5f9918aSopenharmony_ci        # See the specification for details.
846a5f9918aSopenharmony_ci        ch = self.peek()
847a5f9918aSopenharmony_ci        if not ('0' <= ch <= '9'):
848a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
849a5f9918aSopenharmony_ci                    "expected a digit, but found %r" % ch, self.get_mark())
850a5f9918aSopenharmony_ci        length = 0
851a5f9918aSopenharmony_ci        while '0' <= self.peek(length) <= '9':
852a5f9918aSopenharmony_ci            length += 1
853a5f9918aSopenharmony_ci        value = int(self.prefix(length))
854a5f9918aSopenharmony_ci        self.forward(length)
855a5f9918aSopenharmony_ci        return value
856a5f9918aSopenharmony_ci
857a5f9918aSopenharmony_ci    def scan_tag_directive_value(self, start_mark):
858a5f9918aSopenharmony_ci        # See the specification for details.
859a5f9918aSopenharmony_ci        while self.peek() == ' ':
860a5f9918aSopenharmony_ci            self.forward()
861a5f9918aSopenharmony_ci        handle = self.scan_tag_directive_handle(start_mark)
862a5f9918aSopenharmony_ci        while self.peek() == ' ':
863a5f9918aSopenharmony_ci            self.forward()
864a5f9918aSopenharmony_ci        prefix = self.scan_tag_directive_prefix(start_mark)
865a5f9918aSopenharmony_ci        return (handle, prefix)
866a5f9918aSopenharmony_ci
867a5f9918aSopenharmony_ci    def scan_tag_directive_handle(self, start_mark):
868a5f9918aSopenharmony_ci        # See the specification for details.
869a5f9918aSopenharmony_ci        value = self.scan_tag_handle('directive', start_mark)
870a5f9918aSopenharmony_ci        ch = self.peek()
871a5f9918aSopenharmony_ci        if ch != ' ':
872a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
873a5f9918aSopenharmony_ci                    "expected ' ', but found %r" % ch, self.get_mark())
874a5f9918aSopenharmony_ci        return value
875a5f9918aSopenharmony_ci
876a5f9918aSopenharmony_ci    def scan_tag_directive_prefix(self, start_mark):
877a5f9918aSopenharmony_ci        # See the specification for details.
878a5f9918aSopenharmony_ci        value = self.scan_tag_uri('directive', start_mark)
879a5f9918aSopenharmony_ci        ch = self.peek()
880a5f9918aSopenharmony_ci        if ch not in '\0 \r\n\x85\u2028\u2029':
881a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
882a5f9918aSopenharmony_ci                    "expected ' ', but found %r" % ch, self.get_mark())
883a5f9918aSopenharmony_ci        return value
884a5f9918aSopenharmony_ci
885a5f9918aSopenharmony_ci    def scan_directive_ignored_line(self, start_mark):
886a5f9918aSopenharmony_ci        # See the specification for details.
887a5f9918aSopenharmony_ci        while self.peek() == ' ':
888a5f9918aSopenharmony_ci            self.forward()
889a5f9918aSopenharmony_ci        if self.peek() == '#':
890a5f9918aSopenharmony_ci            while self.peek() not in '\0\r\n\x85\u2028\u2029':
891a5f9918aSopenharmony_ci                self.forward()
892a5f9918aSopenharmony_ci        ch = self.peek()
893a5f9918aSopenharmony_ci        if ch not in '\0\r\n\x85\u2028\u2029':
894a5f9918aSopenharmony_ci            raise ScannerError("while scanning a directive", start_mark,
895a5f9918aSopenharmony_ci                    "expected a comment or a line break, but found %r"
896a5f9918aSopenharmony_ci                        % ch, self.get_mark())
897a5f9918aSopenharmony_ci        self.scan_line_break()
898a5f9918aSopenharmony_ci
899a5f9918aSopenharmony_ci    def scan_anchor(self, TokenClass):
900a5f9918aSopenharmony_ci        # The specification does not restrict characters for anchors and
901a5f9918aSopenharmony_ci        # aliases. This may lead to problems, for instance, the document:
902a5f9918aSopenharmony_ci        #   [ *alias, value ]
903a5f9918aSopenharmony_ci        # can be interpreted in two ways, as
904a5f9918aSopenharmony_ci        #   [ "value" ]
905a5f9918aSopenharmony_ci        # and
906a5f9918aSopenharmony_ci        #   [ *alias , "value" ]
907a5f9918aSopenharmony_ci        # Therefore we restrict aliases to numbers and ASCII letters.
908a5f9918aSopenharmony_ci        start_mark = self.get_mark()
909a5f9918aSopenharmony_ci        indicator = self.peek()
910a5f9918aSopenharmony_ci        if indicator == '*':
911a5f9918aSopenharmony_ci            name = 'alias'
912a5f9918aSopenharmony_ci        else:
913a5f9918aSopenharmony_ci            name = 'anchor'
914a5f9918aSopenharmony_ci        self.forward()
915a5f9918aSopenharmony_ci        length = 0
916a5f9918aSopenharmony_ci        ch = self.peek(length)
917a5f9918aSopenharmony_ci        while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
918a5f9918aSopenharmony_ci                or ch in '-_':
919a5f9918aSopenharmony_ci            length += 1
920a5f9918aSopenharmony_ci            ch = self.peek(length)
921a5f9918aSopenharmony_ci        if not length:
922a5f9918aSopenharmony_ci            raise ScannerError("while scanning an %s" % name, start_mark,
923a5f9918aSopenharmony_ci                    "expected alphabetic or numeric character, but found %r"
924a5f9918aSopenharmony_ci                    % ch, self.get_mark())
925a5f9918aSopenharmony_ci        value = self.prefix(length)
926a5f9918aSopenharmony_ci        self.forward(length)
927a5f9918aSopenharmony_ci        ch = self.peek()
928a5f9918aSopenharmony_ci        if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`':
929a5f9918aSopenharmony_ci            raise ScannerError("while scanning an %s" % name, start_mark,
930a5f9918aSopenharmony_ci                    "expected alphabetic or numeric character, but found %r"
931a5f9918aSopenharmony_ci                    % ch, self.get_mark())
932a5f9918aSopenharmony_ci        end_mark = self.get_mark()
933a5f9918aSopenharmony_ci        return TokenClass(value, start_mark, end_mark)
934a5f9918aSopenharmony_ci
935a5f9918aSopenharmony_ci    def scan_tag(self):
936a5f9918aSopenharmony_ci        # See the specification for details.
937a5f9918aSopenharmony_ci        start_mark = self.get_mark()
938a5f9918aSopenharmony_ci        ch = self.peek(1)
939a5f9918aSopenharmony_ci        if ch == '<':
940a5f9918aSopenharmony_ci            handle = None
941a5f9918aSopenharmony_ci            self.forward(2)
942a5f9918aSopenharmony_ci            suffix = self.scan_tag_uri('tag', start_mark)
943a5f9918aSopenharmony_ci            if self.peek() != '>':
944a5f9918aSopenharmony_ci                raise ScannerError("while parsing a tag", start_mark,
945a5f9918aSopenharmony_ci                        "expected '>', but found %r" % self.peek(),
946a5f9918aSopenharmony_ci                        self.get_mark())
947a5f9918aSopenharmony_ci            self.forward()
948a5f9918aSopenharmony_ci        elif ch in '\0 \t\r\n\x85\u2028\u2029':
949a5f9918aSopenharmony_ci            handle = None
950a5f9918aSopenharmony_ci            suffix = '!'
951a5f9918aSopenharmony_ci            self.forward()
952a5f9918aSopenharmony_ci        else:
953a5f9918aSopenharmony_ci            length = 1
954a5f9918aSopenharmony_ci            use_handle = False
955a5f9918aSopenharmony_ci            while ch not in '\0 \r\n\x85\u2028\u2029':
956a5f9918aSopenharmony_ci                if ch == '!':
957a5f9918aSopenharmony_ci                    use_handle = True
958a5f9918aSopenharmony_ci                    break
959a5f9918aSopenharmony_ci                length += 1
960a5f9918aSopenharmony_ci                ch = self.peek(length)
961a5f9918aSopenharmony_ci            handle = '!'
962a5f9918aSopenharmony_ci            if use_handle:
963a5f9918aSopenharmony_ci                handle = self.scan_tag_handle('tag', start_mark)
964a5f9918aSopenharmony_ci            else:
965a5f9918aSopenharmony_ci                handle = '!'
966a5f9918aSopenharmony_ci                self.forward()
967a5f9918aSopenharmony_ci            suffix = self.scan_tag_uri('tag', start_mark)
968a5f9918aSopenharmony_ci        ch = self.peek()
969a5f9918aSopenharmony_ci        if ch not in '\0 \r\n\x85\u2028\u2029':
970a5f9918aSopenharmony_ci            raise ScannerError("while scanning a tag", start_mark,
971a5f9918aSopenharmony_ci                    "expected ' ', but found %r" % ch, self.get_mark())
972a5f9918aSopenharmony_ci        value = (handle, suffix)
973a5f9918aSopenharmony_ci        end_mark = self.get_mark()
974a5f9918aSopenharmony_ci        return TagToken(value, start_mark, end_mark)
975a5f9918aSopenharmony_ci
976a5f9918aSopenharmony_ci    def scan_block_scalar(self, style):
977a5f9918aSopenharmony_ci        # See the specification for details.
978a5f9918aSopenharmony_ci
979a5f9918aSopenharmony_ci        if style == '>':
980a5f9918aSopenharmony_ci            folded = True
981a5f9918aSopenharmony_ci        else:
982a5f9918aSopenharmony_ci            folded = False
983a5f9918aSopenharmony_ci
984a5f9918aSopenharmony_ci        chunks = []
985a5f9918aSopenharmony_ci        start_mark = self.get_mark()
986a5f9918aSopenharmony_ci
987a5f9918aSopenharmony_ci        # Scan the header.
988a5f9918aSopenharmony_ci        self.forward()
989a5f9918aSopenharmony_ci        chomping, increment = self.scan_block_scalar_indicators(start_mark)
990a5f9918aSopenharmony_ci        self.scan_block_scalar_ignored_line(start_mark)
991a5f9918aSopenharmony_ci
992a5f9918aSopenharmony_ci        # Determine the indentation level and go to the first non-empty line.
993a5f9918aSopenharmony_ci        min_indent = self.indent+1
994a5f9918aSopenharmony_ci        if min_indent < 1:
995a5f9918aSopenharmony_ci            min_indent = 1
996a5f9918aSopenharmony_ci        if increment is None:
997a5f9918aSopenharmony_ci            breaks, max_indent, end_mark = self.scan_block_scalar_indentation()
998a5f9918aSopenharmony_ci            indent = max(min_indent, max_indent)
999a5f9918aSopenharmony_ci        else:
1000a5f9918aSopenharmony_ci            indent = min_indent+increment-1
1001a5f9918aSopenharmony_ci            breaks, end_mark = self.scan_block_scalar_breaks(indent)
1002a5f9918aSopenharmony_ci        line_break = ''
1003a5f9918aSopenharmony_ci
1004a5f9918aSopenharmony_ci        # Scan the inner part of the block scalar.
1005a5f9918aSopenharmony_ci        while self.column == indent and self.peek() != '\0':
1006a5f9918aSopenharmony_ci            chunks.extend(breaks)
1007a5f9918aSopenharmony_ci            leading_non_space = self.peek() not in ' \t'
1008a5f9918aSopenharmony_ci            length = 0
1009a5f9918aSopenharmony_ci            while self.peek(length) not in '\0\r\n\x85\u2028\u2029':
1010a5f9918aSopenharmony_ci                length += 1
1011a5f9918aSopenharmony_ci            chunks.append(self.prefix(length))
1012a5f9918aSopenharmony_ci            self.forward(length)
1013a5f9918aSopenharmony_ci            line_break = self.scan_line_break()
1014a5f9918aSopenharmony_ci            breaks, end_mark = self.scan_block_scalar_breaks(indent)
1015a5f9918aSopenharmony_ci            if self.column == indent and self.peek() != '\0':
1016a5f9918aSopenharmony_ci
1017a5f9918aSopenharmony_ci                # Unfortunately, folding rules are ambiguous.
1018a5f9918aSopenharmony_ci                #
1019a5f9918aSopenharmony_ci                # This is the folding according to the specification:
1020a5f9918aSopenharmony_ci
1021a5f9918aSopenharmony_ci                if folded and line_break == '\n'    \
1022a5f9918aSopenharmony_ci                        and leading_non_space and self.peek() not in ' \t':
1023a5f9918aSopenharmony_ci                    if not breaks:
1024a5f9918aSopenharmony_ci                        chunks.append(' ')
1025a5f9918aSopenharmony_ci                else:
1026a5f9918aSopenharmony_ci                    chunks.append(line_break)
1027a5f9918aSopenharmony_ci
1028a5f9918aSopenharmony_ci                # This is Clark Evans's interpretation (also in the spec
1029a5f9918aSopenharmony_ci                # examples):
1030a5f9918aSopenharmony_ci                #
1031a5f9918aSopenharmony_ci                #if folded and line_break == '\n':
1032a5f9918aSopenharmony_ci                #    if not breaks:
1033a5f9918aSopenharmony_ci                #        if self.peek() not in ' \t':
1034a5f9918aSopenharmony_ci                #            chunks.append(' ')
1035a5f9918aSopenharmony_ci                #        else:
1036a5f9918aSopenharmony_ci                #            chunks.append(line_break)
1037a5f9918aSopenharmony_ci                #else:
1038a5f9918aSopenharmony_ci                #    chunks.append(line_break)
1039a5f9918aSopenharmony_ci            else:
1040a5f9918aSopenharmony_ci                break
1041a5f9918aSopenharmony_ci
1042a5f9918aSopenharmony_ci        # Chomp the tail.
1043a5f9918aSopenharmony_ci        if chomping is not False:
1044a5f9918aSopenharmony_ci            chunks.append(line_break)
1045a5f9918aSopenharmony_ci        if chomping is True:
1046a5f9918aSopenharmony_ci            chunks.extend(breaks)
1047a5f9918aSopenharmony_ci
1048a5f9918aSopenharmony_ci        # We are done.
1049a5f9918aSopenharmony_ci        return ScalarToken(''.join(chunks), False, start_mark, end_mark,
1050a5f9918aSopenharmony_ci                style)
1051a5f9918aSopenharmony_ci
1052a5f9918aSopenharmony_ci    def scan_block_scalar_indicators(self, start_mark):
1053a5f9918aSopenharmony_ci        # See the specification for details.
1054a5f9918aSopenharmony_ci        chomping = None
1055a5f9918aSopenharmony_ci        increment = None
1056a5f9918aSopenharmony_ci        ch = self.peek()
1057a5f9918aSopenharmony_ci        if ch in '+-':
1058a5f9918aSopenharmony_ci            if ch == '+':
1059a5f9918aSopenharmony_ci                chomping = True
1060a5f9918aSopenharmony_ci            else:
1061a5f9918aSopenharmony_ci                chomping = False
1062a5f9918aSopenharmony_ci            self.forward()
1063a5f9918aSopenharmony_ci            ch = self.peek()
1064a5f9918aSopenharmony_ci            if ch in '0123456789':
1065a5f9918aSopenharmony_ci                increment = int(ch)
1066a5f9918aSopenharmony_ci                if increment == 0:
1067a5f9918aSopenharmony_ci                    raise ScannerError("while scanning a block scalar", start_mark,
1068a5f9918aSopenharmony_ci                            "expected indentation indicator in the range 1-9, but found 0",
1069a5f9918aSopenharmony_ci                            self.get_mark())
1070a5f9918aSopenharmony_ci                self.forward()
1071a5f9918aSopenharmony_ci        elif ch in '0123456789':
1072a5f9918aSopenharmony_ci            increment = int(ch)
1073a5f9918aSopenharmony_ci            if increment == 0:
1074a5f9918aSopenharmony_ci                raise ScannerError("while scanning a block scalar", start_mark,
1075a5f9918aSopenharmony_ci                        "expected indentation indicator in the range 1-9, but found 0",
1076a5f9918aSopenharmony_ci                        self.get_mark())
1077a5f9918aSopenharmony_ci            self.forward()
1078a5f9918aSopenharmony_ci            ch = self.peek()
1079a5f9918aSopenharmony_ci            if ch in '+-':
1080a5f9918aSopenharmony_ci                if ch == '+':
1081a5f9918aSopenharmony_ci                    chomping = True
1082a5f9918aSopenharmony_ci                else:
1083a5f9918aSopenharmony_ci                    chomping = False
1084a5f9918aSopenharmony_ci                self.forward()
1085a5f9918aSopenharmony_ci        ch = self.peek()
1086a5f9918aSopenharmony_ci        if ch not in '\0 \r\n\x85\u2028\u2029':
1087a5f9918aSopenharmony_ci            raise ScannerError("while scanning a block scalar", start_mark,
1088a5f9918aSopenharmony_ci                    "expected chomping or indentation indicators, but found %r"
1089a5f9918aSopenharmony_ci                    % ch, self.get_mark())
1090a5f9918aSopenharmony_ci        return chomping, increment
1091a5f9918aSopenharmony_ci
1092a5f9918aSopenharmony_ci    def scan_block_scalar_ignored_line(self, start_mark):
1093a5f9918aSopenharmony_ci        # See the specification for details.
1094a5f9918aSopenharmony_ci        while self.peek() == ' ':
1095a5f9918aSopenharmony_ci            self.forward()
1096a5f9918aSopenharmony_ci        if self.peek() == '#':
1097a5f9918aSopenharmony_ci            while self.peek() not in '\0\r\n\x85\u2028\u2029':
1098a5f9918aSopenharmony_ci                self.forward()
1099a5f9918aSopenharmony_ci        ch = self.peek()
1100a5f9918aSopenharmony_ci        if ch not in '\0\r\n\x85\u2028\u2029':
1101a5f9918aSopenharmony_ci            raise ScannerError("while scanning a block scalar", start_mark,
1102a5f9918aSopenharmony_ci                    "expected a comment or a line break, but found %r" % ch,
1103a5f9918aSopenharmony_ci                    self.get_mark())
1104a5f9918aSopenharmony_ci        self.scan_line_break()
1105a5f9918aSopenharmony_ci
1106a5f9918aSopenharmony_ci    def scan_block_scalar_indentation(self):
1107a5f9918aSopenharmony_ci        # See the specification for details.
1108a5f9918aSopenharmony_ci        chunks = []
1109a5f9918aSopenharmony_ci        max_indent = 0
1110a5f9918aSopenharmony_ci        end_mark = self.get_mark()
1111a5f9918aSopenharmony_ci        while self.peek() in ' \r\n\x85\u2028\u2029':
1112a5f9918aSopenharmony_ci            if self.peek() != ' ':
1113a5f9918aSopenharmony_ci                chunks.append(self.scan_line_break())
1114a5f9918aSopenharmony_ci                end_mark = self.get_mark()
1115a5f9918aSopenharmony_ci            else:
1116a5f9918aSopenharmony_ci                self.forward()
1117a5f9918aSopenharmony_ci                if self.column > max_indent:
1118a5f9918aSopenharmony_ci                    max_indent = self.column
1119a5f9918aSopenharmony_ci        return chunks, max_indent, end_mark
1120a5f9918aSopenharmony_ci
1121a5f9918aSopenharmony_ci    def scan_block_scalar_breaks(self, indent):
1122a5f9918aSopenharmony_ci        # See the specification for details.
1123a5f9918aSopenharmony_ci        chunks = []
1124a5f9918aSopenharmony_ci        end_mark = self.get_mark()
1125a5f9918aSopenharmony_ci        while self.column < indent and self.peek() == ' ':
1126a5f9918aSopenharmony_ci            self.forward()
1127a5f9918aSopenharmony_ci        while self.peek() in '\r\n\x85\u2028\u2029':
1128a5f9918aSopenharmony_ci            chunks.append(self.scan_line_break())
1129a5f9918aSopenharmony_ci            end_mark = self.get_mark()
1130a5f9918aSopenharmony_ci            while self.column < indent and self.peek() == ' ':
1131a5f9918aSopenharmony_ci                self.forward()
1132a5f9918aSopenharmony_ci        return chunks, end_mark
1133a5f9918aSopenharmony_ci
1134a5f9918aSopenharmony_ci    def scan_flow_scalar(self, style):
1135a5f9918aSopenharmony_ci        # See the specification for details.
1136a5f9918aSopenharmony_ci        # Note that we loose indentation rules for quoted scalars. Quoted
1137a5f9918aSopenharmony_ci        # scalars don't need to adhere indentation because " and ' clearly
1138a5f9918aSopenharmony_ci        # mark the beginning and the end of them. Therefore we are less
1139a5f9918aSopenharmony_ci        # restrictive then the specification requires. We only need to check
1140a5f9918aSopenharmony_ci        # that document separators are not included in scalars.
1141a5f9918aSopenharmony_ci        if style == '"':
1142a5f9918aSopenharmony_ci            double = True
1143a5f9918aSopenharmony_ci        else:
1144a5f9918aSopenharmony_ci            double = False
1145a5f9918aSopenharmony_ci        chunks = []
1146a5f9918aSopenharmony_ci        start_mark = self.get_mark()
1147a5f9918aSopenharmony_ci        quote = self.peek()
1148a5f9918aSopenharmony_ci        self.forward()
1149a5f9918aSopenharmony_ci        chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1150a5f9918aSopenharmony_ci        while self.peek() != quote:
1151a5f9918aSopenharmony_ci            chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))
1152a5f9918aSopenharmony_ci            chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))
1153a5f9918aSopenharmony_ci        self.forward()
1154a5f9918aSopenharmony_ci        end_mark = self.get_mark()
1155a5f9918aSopenharmony_ci        return ScalarToken(''.join(chunks), False, start_mark, end_mark,
1156a5f9918aSopenharmony_ci                style)
1157a5f9918aSopenharmony_ci
1158a5f9918aSopenharmony_ci    ESCAPE_REPLACEMENTS = {
1159a5f9918aSopenharmony_ci        '0':    '\0',
1160a5f9918aSopenharmony_ci        'a':    '\x07',
1161a5f9918aSopenharmony_ci        'b':    '\x08',
1162a5f9918aSopenharmony_ci        't':    '\x09',
1163a5f9918aSopenharmony_ci        '\t':   '\x09',
1164a5f9918aSopenharmony_ci        'n':    '\x0A',
1165a5f9918aSopenharmony_ci        'v':    '\x0B',
1166a5f9918aSopenharmony_ci        'f':    '\x0C',
1167a5f9918aSopenharmony_ci        'r':    '\x0D',
1168a5f9918aSopenharmony_ci        'e':    '\x1B',
1169a5f9918aSopenharmony_ci        ' ':    '\x20',
1170a5f9918aSopenharmony_ci        '\"':   '\"',
1171a5f9918aSopenharmony_ci        '\\':   '\\',
1172a5f9918aSopenharmony_ci        '/':    '/',
1173a5f9918aSopenharmony_ci        'N':    '\x85',
1174a5f9918aSopenharmony_ci        '_':    '\xA0',
1175a5f9918aSopenharmony_ci        'L':    '\u2028',
1176a5f9918aSopenharmony_ci        'P':    '\u2029',
1177a5f9918aSopenharmony_ci    }
1178a5f9918aSopenharmony_ci
1179a5f9918aSopenharmony_ci    ESCAPE_CODES = {
1180a5f9918aSopenharmony_ci        'x':    2,
1181a5f9918aSopenharmony_ci        'u':    4,
1182a5f9918aSopenharmony_ci        'U':    8,
1183a5f9918aSopenharmony_ci    }
1184a5f9918aSopenharmony_ci
1185a5f9918aSopenharmony_ci    def scan_flow_scalar_non_spaces(self, double, start_mark):
1186a5f9918aSopenharmony_ci        # See the specification for details.
1187a5f9918aSopenharmony_ci        chunks = []
1188a5f9918aSopenharmony_ci        while True:
1189a5f9918aSopenharmony_ci            length = 0
1190a5f9918aSopenharmony_ci            while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029':
1191a5f9918aSopenharmony_ci                length += 1
1192a5f9918aSopenharmony_ci            if length:
1193a5f9918aSopenharmony_ci                chunks.append(self.prefix(length))
1194a5f9918aSopenharmony_ci                self.forward(length)
1195a5f9918aSopenharmony_ci            ch = self.peek()
1196a5f9918aSopenharmony_ci            if not double and ch == '\'' and self.peek(1) == '\'':
1197a5f9918aSopenharmony_ci                chunks.append('\'')
1198a5f9918aSopenharmony_ci                self.forward(2)
1199a5f9918aSopenharmony_ci            elif (double and ch == '\'') or (not double and ch in '\"\\'):
1200a5f9918aSopenharmony_ci                chunks.append(ch)
1201a5f9918aSopenharmony_ci                self.forward()
1202a5f9918aSopenharmony_ci            elif double and ch == '\\':
1203a5f9918aSopenharmony_ci                self.forward()
1204a5f9918aSopenharmony_ci                ch = self.peek()
1205a5f9918aSopenharmony_ci                if ch in self.ESCAPE_REPLACEMENTS:
1206a5f9918aSopenharmony_ci                    chunks.append(self.ESCAPE_REPLACEMENTS[ch])
1207a5f9918aSopenharmony_ci                    self.forward()
1208a5f9918aSopenharmony_ci                elif ch in self.ESCAPE_CODES:
1209a5f9918aSopenharmony_ci                    length = self.ESCAPE_CODES[ch]
1210a5f9918aSopenharmony_ci                    self.forward()
1211a5f9918aSopenharmony_ci                    for k in range(length):
1212a5f9918aSopenharmony_ci                        if self.peek(k) not in '0123456789ABCDEFabcdef':
1213a5f9918aSopenharmony_ci                            raise ScannerError("while scanning a double-quoted scalar", start_mark,
1214a5f9918aSopenharmony_ci                                    "expected escape sequence of %d hexadecimal numbers, but found %r" %
1215a5f9918aSopenharmony_ci                                        (length, self.peek(k)), self.get_mark())
1216a5f9918aSopenharmony_ci                    code = int(self.prefix(length), 16)
1217a5f9918aSopenharmony_ci                    chunks.append(chr(code))
1218a5f9918aSopenharmony_ci                    self.forward(length)
1219a5f9918aSopenharmony_ci                elif ch in '\r\n\x85\u2028\u2029':
1220a5f9918aSopenharmony_ci                    self.scan_line_break()
1221a5f9918aSopenharmony_ci                    chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))
1222a5f9918aSopenharmony_ci                else:
1223a5f9918aSopenharmony_ci                    raise ScannerError("while scanning a double-quoted scalar", start_mark,
1224a5f9918aSopenharmony_ci                            "found unknown escape character %r" % ch, self.get_mark())
1225a5f9918aSopenharmony_ci            else:
1226a5f9918aSopenharmony_ci                return chunks
1227a5f9918aSopenharmony_ci
1228a5f9918aSopenharmony_ci    def scan_flow_scalar_spaces(self, double, start_mark):
1229a5f9918aSopenharmony_ci        # See the specification for details.
1230a5f9918aSopenharmony_ci        chunks = []
1231a5f9918aSopenharmony_ci        length = 0
1232a5f9918aSopenharmony_ci        while self.peek(length) in ' \t':
1233a5f9918aSopenharmony_ci            length += 1
1234a5f9918aSopenharmony_ci        whitespaces = self.prefix(length)
1235a5f9918aSopenharmony_ci        self.forward(length)
1236a5f9918aSopenharmony_ci        ch = self.peek()
1237a5f9918aSopenharmony_ci        if ch == '\0':
1238a5f9918aSopenharmony_ci            raise ScannerError("while scanning a quoted scalar", start_mark,
1239a5f9918aSopenharmony_ci                    "found unexpected end of stream", self.get_mark())
1240a5f9918aSopenharmony_ci        elif ch in '\r\n\x85\u2028\u2029':
1241a5f9918aSopenharmony_ci            line_break = self.scan_line_break()
1242a5f9918aSopenharmony_ci            breaks = self.scan_flow_scalar_breaks(double, start_mark)
1243a5f9918aSopenharmony_ci            if line_break != '\n':
1244a5f9918aSopenharmony_ci                chunks.append(line_break)
1245a5f9918aSopenharmony_ci            elif not breaks:
1246a5f9918aSopenharmony_ci                chunks.append(' ')
1247a5f9918aSopenharmony_ci            chunks.extend(breaks)
1248a5f9918aSopenharmony_ci        else:
1249a5f9918aSopenharmony_ci            chunks.append(whitespaces)
1250a5f9918aSopenharmony_ci        return chunks
1251a5f9918aSopenharmony_ci
1252a5f9918aSopenharmony_ci    def scan_flow_scalar_breaks(self, double, start_mark):
1253a5f9918aSopenharmony_ci        # See the specification for details.
1254a5f9918aSopenharmony_ci        chunks = []
1255a5f9918aSopenharmony_ci        while True:
1256a5f9918aSopenharmony_ci            # Instead of checking indentation, we check for document
1257a5f9918aSopenharmony_ci            # separators.
1258a5f9918aSopenharmony_ci            prefix = self.prefix(3)
1259a5f9918aSopenharmony_ci            if (prefix == '---' or prefix == '...')   \
1260a5f9918aSopenharmony_ci                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
1261a5f9918aSopenharmony_ci                raise ScannerError("while scanning a quoted scalar", start_mark,
1262a5f9918aSopenharmony_ci                        "found unexpected document separator", self.get_mark())
1263a5f9918aSopenharmony_ci            while self.peek() in ' \t':
1264a5f9918aSopenharmony_ci                self.forward()
1265a5f9918aSopenharmony_ci            if self.peek() in '\r\n\x85\u2028\u2029':
1266a5f9918aSopenharmony_ci                chunks.append(self.scan_line_break())
1267a5f9918aSopenharmony_ci            else:
1268a5f9918aSopenharmony_ci                return chunks
1269a5f9918aSopenharmony_ci
1270a5f9918aSopenharmony_ci    def scan_plain(self):
1271a5f9918aSopenharmony_ci        # See the specification for details.
1272a5f9918aSopenharmony_ci        # We add an additional restriction for the flow context:
1273a5f9918aSopenharmony_ci        #   plain scalars in the flow context cannot contain ',' or '?'.
1274a5f9918aSopenharmony_ci        # We also keep track of the `allow_simple_key` flag here.
1275a5f9918aSopenharmony_ci        # Indentation rules are loosed for the flow context.
1276a5f9918aSopenharmony_ci        chunks = []
1277a5f9918aSopenharmony_ci        start_mark = self.get_mark()
1278a5f9918aSopenharmony_ci        end_mark = start_mark
1279a5f9918aSopenharmony_ci        indent = self.indent+1
1280a5f9918aSopenharmony_ci        # We allow zero indentation for scalars, but then we need to check for
1281a5f9918aSopenharmony_ci        # document separators at the beginning of the line.
1282a5f9918aSopenharmony_ci        #if indent == 0:
1283a5f9918aSopenharmony_ci        #    indent = 1
1284a5f9918aSopenharmony_ci        spaces = []
1285a5f9918aSopenharmony_ci        while True:
1286a5f9918aSopenharmony_ci            length = 0
1287a5f9918aSopenharmony_ci            if self.peek() == '#':
1288a5f9918aSopenharmony_ci                break
1289a5f9918aSopenharmony_ci            while True:
1290a5f9918aSopenharmony_ci                ch = self.peek(length)
1291a5f9918aSopenharmony_ci                if ch in '\0 \t\r\n\x85\u2028\u2029'    \
1292a5f9918aSopenharmony_ci                        or (ch == ':' and
1293a5f9918aSopenharmony_ci                                self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029'
1294a5f9918aSopenharmony_ci                                      + (u',[]{}' if self.flow_level else u''))\
1295a5f9918aSopenharmony_ci                        or (self.flow_level and ch in ',?[]{}'):
1296a5f9918aSopenharmony_ci                    break
1297a5f9918aSopenharmony_ci                length += 1
1298a5f9918aSopenharmony_ci            if length == 0:
1299a5f9918aSopenharmony_ci                break
1300a5f9918aSopenharmony_ci            self.allow_simple_key = False
1301a5f9918aSopenharmony_ci            chunks.extend(spaces)
1302a5f9918aSopenharmony_ci            chunks.append(self.prefix(length))
1303a5f9918aSopenharmony_ci            self.forward(length)
1304a5f9918aSopenharmony_ci            end_mark = self.get_mark()
1305a5f9918aSopenharmony_ci            spaces = self.scan_plain_spaces(indent, start_mark)
1306a5f9918aSopenharmony_ci            if not spaces or self.peek() == '#' \
1307a5f9918aSopenharmony_ci                    or (not self.flow_level and self.column < indent):
1308a5f9918aSopenharmony_ci                break
1309a5f9918aSopenharmony_ci        return ScalarToken(''.join(chunks), True, start_mark, end_mark)
1310a5f9918aSopenharmony_ci
1311a5f9918aSopenharmony_ci    def scan_plain_spaces(self, indent, start_mark):
1312a5f9918aSopenharmony_ci        # See the specification for details.
1313a5f9918aSopenharmony_ci        # The specification is really confusing about tabs in plain scalars.
1314a5f9918aSopenharmony_ci        # We just forbid them completely. Do not use tabs in YAML!
1315a5f9918aSopenharmony_ci        chunks = []
1316a5f9918aSopenharmony_ci        length = 0
1317a5f9918aSopenharmony_ci        while self.peek(length) in ' ':
1318a5f9918aSopenharmony_ci            length += 1
1319a5f9918aSopenharmony_ci        whitespaces = self.prefix(length)
1320a5f9918aSopenharmony_ci        self.forward(length)
1321a5f9918aSopenharmony_ci        ch = self.peek()
1322a5f9918aSopenharmony_ci        if ch in '\r\n\x85\u2028\u2029':
1323a5f9918aSopenharmony_ci            line_break = self.scan_line_break()
1324a5f9918aSopenharmony_ci            self.allow_simple_key = True
1325a5f9918aSopenharmony_ci            prefix = self.prefix(3)
1326a5f9918aSopenharmony_ci            if (prefix == '---' or prefix == '...')   \
1327a5f9918aSopenharmony_ci                    and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
1328a5f9918aSopenharmony_ci                return
1329a5f9918aSopenharmony_ci            breaks = []
1330a5f9918aSopenharmony_ci            while self.peek() in ' \r\n\x85\u2028\u2029':
1331a5f9918aSopenharmony_ci                if self.peek() == ' ':
1332a5f9918aSopenharmony_ci                    self.forward()
1333a5f9918aSopenharmony_ci                else:
1334a5f9918aSopenharmony_ci                    breaks.append(self.scan_line_break())
1335a5f9918aSopenharmony_ci                    prefix = self.prefix(3)
1336a5f9918aSopenharmony_ci                    if (prefix == '---' or prefix == '...')   \
1337a5f9918aSopenharmony_ci                            and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':
1338a5f9918aSopenharmony_ci                        return
1339a5f9918aSopenharmony_ci            if line_break != '\n':
1340a5f9918aSopenharmony_ci                chunks.append(line_break)
1341a5f9918aSopenharmony_ci            elif not breaks:
1342a5f9918aSopenharmony_ci                chunks.append(' ')
1343a5f9918aSopenharmony_ci            chunks.extend(breaks)
1344a5f9918aSopenharmony_ci        elif whitespaces:
1345a5f9918aSopenharmony_ci            chunks.append(whitespaces)
1346a5f9918aSopenharmony_ci        return chunks
1347a5f9918aSopenharmony_ci
1348a5f9918aSopenharmony_ci    def scan_tag_handle(self, name, start_mark):
1349a5f9918aSopenharmony_ci        # See the specification for details.
1350a5f9918aSopenharmony_ci        # For some strange reasons, the specification does not allow '_' in
1351a5f9918aSopenharmony_ci        # tag handles. I have allowed it anyway.
1352a5f9918aSopenharmony_ci        ch = self.peek()
1353a5f9918aSopenharmony_ci        if ch != '!':
1354a5f9918aSopenharmony_ci            raise ScannerError("while scanning a %s" % name, start_mark,
1355a5f9918aSopenharmony_ci                    "expected '!', but found %r" % ch, self.get_mark())
1356a5f9918aSopenharmony_ci        length = 1
1357a5f9918aSopenharmony_ci        ch = self.peek(length)
1358a5f9918aSopenharmony_ci        if ch != ' ':
1359a5f9918aSopenharmony_ci            while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
1360a5f9918aSopenharmony_ci                    or ch in '-_':
1361a5f9918aSopenharmony_ci                length += 1
1362a5f9918aSopenharmony_ci                ch = self.peek(length)
1363a5f9918aSopenharmony_ci            if ch != '!':
1364a5f9918aSopenharmony_ci                self.forward(length)
1365a5f9918aSopenharmony_ci                raise ScannerError("while scanning a %s" % name, start_mark,
1366a5f9918aSopenharmony_ci                        "expected '!', but found %r" % ch, self.get_mark())
1367a5f9918aSopenharmony_ci            length += 1
1368a5f9918aSopenharmony_ci        value = self.prefix(length)
1369a5f9918aSopenharmony_ci        self.forward(length)
1370a5f9918aSopenharmony_ci        return value
1371a5f9918aSopenharmony_ci
1372a5f9918aSopenharmony_ci    def scan_tag_uri(self, name, start_mark):
1373a5f9918aSopenharmony_ci        # See the specification for details.
1374a5f9918aSopenharmony_ci        # Note: we do not check if URI is well-formed.
1375a5f9918aSopenharmony_ci        chunks = []
1376a5f9918aSopenharmony_ci        length = 0
1377a5f9918aSopenharmony_ci        ch = self.peek(length)
1378a5f9918aSopenharmony_ci        while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z'  \
1379a5f9918aSopenharmony_ci                or ch in '-;/?:@&=+$,_.!~*\'()[]%':
1380a5f9918aSopenharmony_ci            if ch == '%':
1381a5f9918aSopenharmony_ci                chunks.append(self.prefix(length))
1382a5f9918aSopenharmony_ci                self.forward(length)
1383a5f9918aSopenharmony_ci                length = 0
1384a5f9918aSopenharmony_ci                chunks.append(self.scan_uri_escapes(name, start_mark))
1385a5f9918aSopenharmony_ci            else:
1386a5f9918aSopenharmony_ci                length += 1
1387a5f9918aSopenharmony_ci            ch = self.peek(length)
1388a5f9918aSopenharmony_ci        if length:
1389a5f9918aSopenharmony_ci            chunks.append(self.prefix(length))
1390a5f9918aSopenharmony_ci            self.forward(length)
1391a5f9918aSopenharmony_ci            length = 0
1392a5f9918aSopenharmony_ci        if not chunks:
1393a5f9918aSopenharmony_ci            raise ScannerError("while parsing a %s" % name, start_mark,
1394a5f9918aSopenharmony_ci                    "expected URI, but found %r" % ch, self.get_mark())
1395a5f9918aSopenharmony_ci        return ''.join(chunks)
1396a5f9918aSopenharmony_ci
1397a5f9918aSopenharmony_ci    def scan_uri_escapes(self, name, start_mark):
1398a5f9918aSopenharmony_ci        # See the specification for details.
1399a5f9918aSopenharmony_ci        codes = []
1400a5f9918aSopenharmony_ci        mark = self.get_mark()
1401a5f9918aSopenharmony_ci        while self.peek() == '%':
1402a5f9918aSopenharmony_ci            self.forward()
1403a5f9918aSopenharmony_ci            for k in range(2):
1404a5f9918aSopenharmony_ci                if self.peek(k) not in '0123456789ABCDEFabcdef':
1405a5f9918aSopenharmony_ci                    raise ScannerError("while scanning a %s" % name, start_mark,
1406a5f9918aSopenharmony_ci                            "expected URI escape sequence of 2 hexadecimal numbers, but found %r"
1407a5f9918aSopenharmony_ci                            % self.peek(k), self.get_mark())
1408a5f9918aSopenharmony_ci            codes.append(int(self.prefix(2), 16))
1409a5f9918aSopenharmony_ci            self.forward(2)
1410a5f9918aSopenharmony_ci        try:
1411a5f9918aSopenharmony_ci            value = bytes(codes).decode('utf-8')
1412a5f9918aSopenharmony_ci        except UnicodeDecodeError as exc:
1413a5f9918aSopenharmony_ci            raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)
1414a5f9918aSopenharmony_ci        return value
1415a5f9918aSopenharmony_ci
1416a5f9918aSopenharmony_ci    def scan_line_break(self):
1417a5f9918aSopenharmony_ci        # Transforms:
1418a5f9918aSopenharmony_ci        #   '\r\n'      :   '\n'
1419a5f9918aSopenharmony_ci        #   '\r'        :   '\n'
1420a5f9918aSopenharmony_ci        #   '\n'        :   '\n'
1421a5f9918aSopenharmony_ci        #   '\x85'      :   '\n'
1422a5f9918aSopenharmony_ci        #   '\u2028'    :   '\u2028'
1423a5f9918aSopenharmony_ci        #   '\u2029     :   '\u2029'
1424a5f9918aSopenharmony_ci        #   default     :   ''
1425a5f9918aSopenharmony_ci        ch = self.peek()
1426a5f9918aSopenharmony_ci        if ch in '\r\n\x85':
1427a5f9918aSopenharmony_ci            if self.prefix(2) == '\r\n':
1428a5f9918aSopenharmony_ci                self.forward(2)
1429a5f9918aSopenharmony_ci            else:
1430a5f9918aSopenharmony_ci                self.forward()
1431a5f9918aSopenharmony_ci            return '\n'
1432a5f9918aSopenharmony_ci        elif ch in '\u2028\u2029':
1433a5f9918aSopenharmony_ci            self.forward()
1434a5f9918aSopenharmony_ci            return ch
1435a5f9918aSopenharmony_ci        return ''
1436