13af6ab5fSopenharmony_ci#!/usr/bin/env python3 23af6ab5fSopenharmony_ci# coding=utf-8 33af6ab5fSopenharmony_ci# 43af6ab5fSopenharmony_ci# Copyright (c) 2024 Huawei Device Co., Ltd. 53af6ab5fSopenharmony_ci# Licensed under the Apache License, Version 2.0 (the "License"); 63af6ab5fSopenharmony_ci# you may not use this file except in compliance with the License. 73af6ab5fSopenharmony_ci# You may obtain a copy of the License at 83af6ab5fSopenharmony_ci# 93af6ab5fSopenharmony_ci# http://www.apache.org/licenses/LICENSE-2.0 103af6ab5fSopenharmony_ci# 113af6ab5fSopenharmony_ci# Unless required by applicable law or agreed to in writing, software 123af6ab5fSopenharmony_ci# distributed under the License is distributed on an "AS IS" BASIS, 133af6ab5fSopenharmony_ci# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 143af6ab5fSopenharmony_ci# See the License for the specific language governing permissions and 153af6ab5fSopenharmony_ci# limitations under the License. 163af6ab5fSopenharmony_ci 173af6ab5fSopenharmony_ci 183af6ab5fSopenharmony_ci"""Module provides custom text tools for parsing.""" 193af6ab5fSopenharmony_ci 203af6ab5fSopenharmony_cifrom typing import Tuple, Dict 213af6ab5fSopenharmony_cifrom log_tools import warning_log 223af6ab5fSopenharmony_ci 233af6ab5fSopenharmony_ci 243af6ab5fSopenharmony_ciMAX_LEN = 10000000 253af6ab5fSopenharmony_ci 263af6ab5fSopenharmony_ci 273af6ab5fSopenharmony_cidef find_first_not_restricted_character(restricted: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int: 283af6ab5fSopenharmony_ci for i in range(pos, min(len(data), pos_end)): 293af6ab5fSopenharmony_ci if data[i] not in restricted: 303af6ab5fSopenharmony_ci return i 313af6ab5fSopenharmony_ci return len(data) 323af6ab5fSopenharmony_ci 333af6ab5fSopenharmony_ci 343af6ab5fSopenharmony_cidef rfind_first_not_restricted_character(restricted: str, data: str, pos: int, pos_end: int = 0) -> int: 353af6ab5fSopenharmony_ci """pos_end includes in searching""" 363af6ab5fSopenharmony_ci if pos > len(data): 373af6ab5fSopenharmony_ci pos = len(data) - 1 383af6ab5fSopenharmony_ci while pos >= max(0, pos_end): 393af6ab5fSopenharmony_ci if data[pos] not in restricted: 403af6ab5fSopenharmony_ci return pos 413af6ab5fSopenharmony_ci pos -= 1 423af6ab5fSopenharmony_ci return len(data) 433af6ab5fSopenharmony_ci 443af6ab5fSopenharmony_ci 453af6ab5fSopenharmony_cidef find_first_of_characters(characters: str, data: str, pos: int = 0, pos_end: int = MAX_LEN) -> int: 463af6ab5fSopenharmony_ci for i in range(pos, min(len(data), pos_end)): 473af6ab5fSopenharmony_ci if data[i] in characters: 483af6ab5fSopenharmony_ci return i 493af6ab5fSopenharmony_ci return len(data) 503af6ab5fSopenharmony_ci 513af6ab5fSopenharmony_ci 523af6ab5fSopenharmony_cidef rfind_first_of_characters(characters: str, data: str, pos: int, pos_end: int = 0) -> int: 533af6ab5fSopenharmony_ci """pos_end includes in searching""" 543af6ab5fSopenharmony_ci if pos > len(data): 553af6ab5fSopenharmony_ci pos = len(data) - 1 563af6ab5fSopenharmony_ci while pos >= max(0, pos_end): 573af6ab5fSopenharmony_ci if data[pos] in characters: 583af6ab5fSopenharmony_ci return pos 593af6ab5fSopenharmony_ci pos -= 1 603af6ab5fSopenharmony_ci return len(data) 613af6ab5fSopenharmony_ci 623af6ab5fSopenharmony_ci 633af6ab5fSopenharmony_cidef find_scope_borders(data: str, start: int = 0, opening: str = "{") -> Tuple[int, int]: 643af6ab5fSopenharmony_ci """ 653af6ab5fSopenharmony_ci Returns pos of opening and closing brackets in 'data'. 663af6ab5fSopenharmony_ci If it can't find proper scope -> raise error. 673af6ab5fSopenharmony_ci """ 683af6ab5fSopenharmony_ci brackets_match: Dict[str, str] = { 693af6ab5fSopenharmony_ci "{": "}", 703af6ab5fSopenharmony_ci "(": ")", 713af6ab5fSopenharmony_ci "<": ">", 723af6ab5fSopenharmony_ci "[": "]", 733af6ab5fSopenharmony_ci } 743af6ab5fSopenharmony_ci 753af6ab5fSopenharmony_ci if opening == "": 763af6ab5fSopenharmony_ci opening_pos = find_first_of_characters("({<[", data, start) 773af6ab5fSopenharmony_ci if opening_pos == len(data): 783af6ab5fSopenharmony_ci raise RuntimeError("Error while finding end of scope in ANY mode") 793af6ab5fSopenharmony_ci opening = data[opening_pos] 803af6ab5fSopenharmony_ci 813af6ab5fSopenharmony_ci closing = brackets_match[opening] 823af6ab5fSopenharmony_ci start_of_scope = data.find(opening, start) 833af6ab5fSopenharmony_ci 843af6ab5fSopenharmony_ci if start_of_scope == -1: 853af6ab5fSopenharmony_ci raise RuntimeError("No opening bracket found!") 863af6ab5fSopenharmony_ci 873af6ab5fSopenharmony_ci end_of_scope = start_of_scope + 1 883af6ab5fSopenharmony_ci 893af6ab5fSopenharmony_ci def check_opening_closing() -> bool: 903af6ab5fSopenharmony_ci openings = data[start_of_scope : end_of_scope + 1].count(opening) 913af6ab5fSopenharmony_ci closings = data[start_of_scope : end_of_scope + 1].count(closing) 923af6ab5fSopenharmony_ci return openings == closings 933af6ab5fSopenharmony_ci 943af6ab5fSopenharmony_ci while not check_opening_closing(): 953af6ab5fSopenharmony_ci end_of_scope = data.find(closing, end_of_scope + 1) 963af6ab5fSopenharmony_ci 973af6ab5fSopenharmony_ci if end_of_scope == -1: 983af6ab5fSopenharmony_ci raise RuntimeError("Error while finding end of scope.") 993af6ab5fSopenharmony_ci 1003af6ab5fSopenharmony_ci return start_of_scope, end_of_scope 1013af6ab5fSopenharmony_ci 1023af6ab5fSopenharmony_ci 1033af6ab5fSopenharmony_cidef smart_split_by(data: str, delim: str = ",") -> list: 1043af6ab5fSopenharmony_ci data = data.strip(" \n") 1053af6ab5fSopenharmony_ci 1063af6ab5fSopenharmony_ci res = [] 1073af6ab5fSopenharmony_ci segment_start = 0 1083af6ab5fSopenharmony_ci 1093af6ab5fSopenharmony_ci while segment_start < len(data): 1103af6ab5fSopenharmony_ci 1113af6ab5fSopenharmony_ci next_delim = smart_find_first_of_characters(delim, data, segment_start) 1123af6ab5fSopenharmony_ci 1133af6ab5fSopenharmony_ci segment = data[segment_start:next_delim].strip(" \n") 1143af6ab5fSopenharmony_ci if segment != "": 1153af6ab5fSopenharmony_ci res.append(segment) 1163af6ab5fSopenharmony_ci else: 1173af6ab5fSopenharmony_ci warning_log("Warning: empty segment in smart_split_by") 1183af6ab5fSopenharmony_ci 1193af6ab5fSopenharmony_ci segment_start = find_first_not_restricted_character(f"{delim} \n", data, next_delim) 1203af6ab5fSopenharmony_ci 1213af6ab5fSopenharmony_ci return res 1223af6ab5fSopenharmony_ci 1233af6ab5fSopenharmony_ci 1243af6ab5fSopenharmony_cidef smart_find_first_of_characters(characters: str, data: str, pos: int) -> int: 1253af6ab5fSopenharmony_ci i = pos 1263af6ab5fSopenharmony_ci while i < len(data): 1273af6ab5fSopenharmony_ci if data[i] in characters: 1283af6ab5fSopenharmony_ci return i 1293af6ab5fSopenharmony_ci 1303af6ab5fSopenharmony_ci if data[i] in "<({[": 1313af6ab5fSopenharmony_ci _, close_bracket = find_scope_borders(data, i, "") 1323af6ab5fSopenharmony_ci i = close_bracket 1333af6ab5fSopenharmony_ci 1343af6ab5fSopenharmony_ci elif data[i] == '"': 1353af6ab5fSopenharmony_ci i = data.find('"', i + 1) 1363af6ab5fSopenharmony_ci while i != -1 and data[i] == '"' and i != 0 and data[i - 1] == "\\": 1373af6ab5fSopenharmony_ci i = data.find('"', i + 1) 1383af6ab5fSopenharmony_ci 1393af6ab5fSopenharmony_ci elif data[i] == "'": 1403af6ab5fSopenharmony_ci i = data.find("'", i + 1) 1413af6ab5fSopenharmony_ci 1423af6ab5fSopenharmony_ci i += 1 1433af6ab5fSopenharmony_ci 1443af6ab5fSopenharmony_ci return len(data) 1453af6ab5fSopenharmony_ci 1463af6ab5fSopenharmony_ci 1473af6ab5fSopenharmony_cidef check_cpp_name(data: str) -> bool: 1483af6ab5fSopenharmony_ci data = data.lower() 1493af6ab5fSopenharmony_ci forbidden_chars = " ~!@#$%^&*()-+=[]\\{}|;:'\",./<>?" 1503af6ab5fSopenharmony_ci return find_first_of_characters(forbidden_chars, data) == len(data) 151