13af6ab5fSopenharmony_ci#!/usr/bin/env python3
23af6ab5fSopenharmony_ci# coding=utf-8
33af6ab5fSopenharmony_ci#
43af6ab5fSopenharmony_ci# Copyright (c) 2024 Huawei Device Co., Ltd.
53af6ab5fSopenharmony_ci# Licensed under the Apache License, Version 2.0 (the "License");
63af6ab5fSopenharmony_ci# you may not use this file except in compliance with the License.
73af6ab5fSopenharmony_ci# You may obtain a copy of the License at
83af6ab5fSopenharmony_ci#
93af6ab5fSopenharmony_ci# http://www.apache.org/licenses/LICENSE-2.0
103af6ab5fSopenharmony_ci#
113af6ab5fSopenharmony_ci# Unless required by applicable law or agreed to in writing, software
123af6ab5fSopenharmony_ci# distributed under the License is distributed on an "AS IS" BASIS,
133af6ab5fSopenharmony_ci# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
143af6ab5fSopenharmony_ci# See the License for the specific language governing permissions and
153af6ab5fSopenharmony_ci# limitations under the License.
163af6ab5fSopenharmony_ci
173af6ab5fSopenharmony_ci
183af6ab5fSopenharmony_ci"""Module provides custom text tools for parsing."""
193af6ab5fSopenharmony_ci
203af6ab5fSopenharmony_cifrom typing import Tuple, Dict
213af6ab5fSopenharmony_cifrom log_tools import warning_log
223af6ab5fSopenharmony_ci
233af6ab5fSopenharmony_ci
243af6ab5fSopenharmony_ciMAX_LEN = 10000000
253af6ab5fSopenharmony_ci
263af6ab5fSopenharmony_ci
273af6ab5fSopenharmony_cidef find_first_not_restricted_character(restricted: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int:
283af6ab5fSopenharmony_ci    for i in range(pos, min(len(data), pos_end)):
293af6ab5fSopenharmony_ci        if data[i] not in restricted:
303af6ab5fSopenharmony_ci            return i
313af6ab5fSopenharmony_ci    return len(data)
323af6ab5fSopenharmony_ci
333af6ab5fSopenharmony_ci
343af6ab5fSopenharmony_cidef rfind_first_not_restricted_character(restricted: str, data: str, pos: int, pos_end: int = 0) -> int:
353af6ab5fSopenharmony_ci    """pos_end includes in searching"""
363af6ab5fSopenharmony_ci    if pos > len(data):
373af6ab5fSopenharmony_ci        pos = len(data) - 1
383af6ab5fSopenharmony_ci    while pos >= max(0, pos_end):
393af6ab5fSopenharmony_ci        if data[pos] not in restricted:
403af6ab5fSopenharmony_ci            return pos
413af6ab5fSopenharmony_ci        pos -= 1
423af6ab5fSopenharmony_ci    return len(data)
433af6ab5fSopenharmony_ci
443af6ab5fSopenharmony_ci
453af6ab5fSopenharmony_cidef find_first_of_characters(characters: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int:
463af6ab5fSopenharmony_ci    for i in range(pos, min(len(data), pos_end)):
473af6ab5fSopenharmony_ci        if data[i] in characters:
483af6ab5fSopenharmony_ci            return i
493af6ab5fSopenharmony_ci    return len(data)
503af6ab5fSopenharmony_ci
513af6ab5fSopenharmony_ci
523af6ab5fSopenharmony_cidef rfind_first_of_characters(characters: str, data: str, pos: int, pos_end: int = 0) -> int:
533af6ab5fSopenharmony_ci    """pos_end includes in searching"""
543af6ab5fSopenharmony_ci    if pos > len(data):
553af6ab5fSopenharmony_ci        pos = len(data) - 1
563af6ab5fSopenharmony_ci    while pos >= max(0, pos_end):
573af6ab5fSopenharmony_ci        if data[pos] in characters:
583af6ab5fSopenharmony_ci            return pos
593af6ab5fSopenharmony_ci        pos -= 1
603af6ab5fSopenharmony_ci    return len(data)
613af6ab5fSopenharmony_ci
623af6ab5fSopenharmony_ci
633af6ab5fSopenharmony_cidef find_scope_borders(data: str, start: int = 0, opening: str = "{") -> Tuple[int, int]:
643af6ab5fSopenharmony_ci    """
653af6ab5fSopenharmony_ci    Returns pos of opening and closing brackets in 'data'.
663af6ab5fSopenharmony_ci    If it can't find proper scope -> raise error.
673af6ab5fSopenharmony_ci    """
683af6ab5fSopenharmony_ci    brackets_match: Dict[str, str] = {
693af6ab5fSopenharmony_ci        "{": "}",
703af6ab5fSopenharmony_ci        "(": ")",
713af6ab5fSopenharmony_ci        "<": ">",
723af6ab5fSopenharmony_ci        "[": "]",
733af6ab5fSopenharmony_ci    }
743af6ab5fSopenharmony_ci
753af6ab5fSopenharmony_ci    if opening == "":
763af6ab5fSopenharmony_ci        opening_pos = find_first_of_characters("({<[", data, start)
773af6ab5fSopenharmony_ci        if opening_pos == len(data):
783af6ab5fSopenharmony_ci            raise RuntimeError("Error while finding end of scope in ANY mode")
793af6ab5fSopenharmony_ci        opening = data[opening_pos]
803af6ab5fSopenharmony_ci
813af6ab5fSopenharmony_ci    closing = brackets_match[opening]
823af6ab5fSopenharmony_ci    start_of_scope = data.find(opening, start)
833af6ab5fSopenharmony_ci
843af6ab5fSopenharmony_ci    if start_of_scope == -1:
853af6ab5fSopenharmony_ci        raise RuntimeError("No opening bracket found!")
863af6ab5fSopenharmony_ci
873af6ab5fSopenharmony_ci    end_of_scope = start_of_scope + 1
883af6ab5fSopenharmony_ci
893af6ab5fSopenharmony_ci    def check_opening_closing() -> bool:
903af6ab5fSopenharmony_ci        openings = data[start_of_scope : end_of_scope + 1].count(opening)
913af6ab5fSopenharmony_ci        closings = data[start_of_scope : end_of_scope + 1].count(closing)
923af6ab5fSopenharmony_ci        return openings == closings
933af6ab5fSopenharmony_ci
943af6ab5fSopenharmony_ci    while not check_opening_closing():
953af6ab5fSopenharmony_ci        end_of_scope = data.find(closing, end_of_scope + 1)
963af6ab5fSopenharmony_ci
973af6ab5fSopenharmony_ci        if end_of_scope == -1:
983af6ab5fSopenharmony_ci            raise RuntimeError("Error while finding end of scope.")
993af6ab5fSopenharmony_ci
1003af6ab5fSopenharmony_ci    return start_of_scope, end_of_scope
1013af6ab5fSopenharmony_ci
1023af6ab5fSopenharmony_ci
1033af6ab5fSopenharmony_cidef smart_split_by(data: str, delim: str = ",") -> list:
1043af6ab5fSopenharmony_ci    data = data.strip(" \n")
1053af6ab5fSopenharmony_ci
1063af6ab5fSopenharmony_ci    res = []
1073af6ab5fSopenharmony_ci    segment_start = 0
1083af6ab5fSopenharmony_ci
1093af6ab5fSopenharmony_ci    while segment_start < len(data):
1103af6ab5fSopenharmony_ci
1113af6ab5fSopenharmony_ci        next_delim = smart_find_first_of_characters(delim, data, segment_start)
1123af6ab5fSopenharmony_ci
1133af6ab5fSopenharmony_ci        segment = data[segment_start:next_delim].strip(" \n")
1143af6ab5fSopenharmony_ci        if segment != "":
1153af6ab5fSopenharmony_ci            res.append(segment)
1163af6ab5fSopenharmony_ci        else:
1173af6ab5fSopenharmony_ci            warning_log("Warning: empty segment in smart_split_by")
1183af6ab5fSopenharmony_ci
1193af6ab5fSopenharmony_ci        segment_start = find_first_not_restricted_character(f"{delim} \n", data, next_delim)
1203af6ab5fSopenharmony_ci
1213af6ab5fSopenharmony_ci    return res
1223af6ab5fSopenharmony_ci
1233af6ab5fSopenharmony_ci
1243af6ab5fSopenharmony_cidef smart_find_first_of_characters(characters: str, data: str, pos: int) -> int:
1253af6ab5fSopenharmony_ci    i = pos
1263af6ab5fSopenharmony_ci    while i < len(data):
1273af6ab5fSopenharmony_ci        if data[i] in characters:
1283af6ab5fSopenharmony_ci            return i
1293af6ab5fSopenharmony_ci
1303af6ab5fSopenharmony_ci        if data[i] in "<({[":
1313af6ab5fSopenharmony_ci            _, close_bracket = find_scope_borders(data, i, "")
1323af6ab5fSopenharmony_ci            i = close_bracket
1333af6ab5fSopenharmony_ci
1343af6ab5fSopenharmony_ci        elif data[i] == '"':
1353af6ab5fSopenharmony_ci            i = data.find('"', i + 1)
1363af6ab5fSopenharmony_ci            while i != -1 and data[i] == '"' and i != 0 and data[i - 1] == "\\":
1373af6ab5fSopenharmony_ci                i = data.find('"', i + 1)
1383af6ab5fSopenharmony_ci
1393af6ab5fSopenharmony_ci        elif data[i] == "'":
1403af6ab5fSopenharmony_ci            i = data.find("'", i + 1)
1413af6ab5fSopenharmony_ci
1423af6ab5fSopenharmony_ci        i += 1
1433af6ab5fSopenharmony_ci
1443af6ab5fSopenharmony_ci    return len(data)
1453af6ab5fSopenharmony_ci
1463af6ab5fSopenharmony_ci
1473af6ab5fSopenharmony_cidef check_cpp_name(data: str) -> bool:
1483af6ab5fSopenharmony_ci    data = data.lower()
1493af6ab5fSopenharmony_ci    forbidden_chars = " ~!@#$%^&*()-+=[]\\{}|;:'\",./<>?"
1503af6ab5fSopenharmony_ci    return find_first_of_characters(forbidden_chars, data) == len(data)
151