16e80583aSopenharmony_ci/** 26e80583aSopenharmony_ci * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 36e80583aSopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 46e80583aSopenharmony_ci * you may not use this file except in compliance with the License. 56e80583aSopenharmony_ci * You may obtain a copy of the License at 66e80583aSopenharmony_ci * 76e80583aSopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 86e80583aSopenharmony_ci * 96e80583aSopenharmony_ci * Unless required by applicable law or agreed to in writing, software 106e80583aSopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 116e80583aSopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 126e80583aSopenharmony_ci * See the License for the specific language governing permissions and 136e80583aSopenharmony_ci * limitations under the License. 146e80583aSopenharmony_ci */ 156e80583aSopenharmony_ci 166e80583aSopenharmony_ci/** 176e80583aSopenharmony_ci * Pinyin. 186e80583aSopenharmony_ci * 196e80583aSopenharmony_ci * @typedef Option. 206e80583aSopenharmony_ci * @type Object. 216e80583aSopenharmony_ci * @property {Boolean} [checkPolyphone=false] Whether to check for polyphonic words. 226e80583aSopenharmony_ci * @property {Number} [charCase=0] Output pinyin case mode, 0- first letter capitalization; 1- All lowercase; 2 - all uppercase. 236e80583aSopenharmony_ci */ 246e80583aSopenharmony_ciimport { PinyinDict } from './PinyinDict'; 256e80583aSopenharmony_ciimport { Log } from './Log'; 266e80583aSopenharmony_ci 276e80583aSopenharmony_ciconst TAG = 'Pinyin'; 286e80583aSopenharmony_ci 296e80583aSopenharmony_ciclass Pinyin { 306e80583aSopenharmony_ci private options; 316e80583aSopenharmony_ci private char_dict; 326e80583aSopenharmony_ci private full_dict; 336e80583aSopenharmony_ci private polyphone; 346e80583aSopenharmony_ci 356e80583aSopenharmony_ci /** 366e80583aSopenharmony_ci * Constructor. 376e80583aSopenharmony_ci * 386e80583aSopenharmony_ci * @param {object} options - the options for chinese transform to pinyin 396e80583aSopenharmony_ci */ 406e80583aSopenharmony_ci constructor(options) { 416e80583aSopenharmony_ci this.setOptions(options); 426e80583aSopenharmony_ci this.initialize(); 436e80583aSopenharmony_ci } 446e80583aSopenharmony_ci 456e80583aSopenharmony_ci /** 466e80583aSopenharmony_ci * set params. 476e80583aSopenharmony_ci * 486e80583aSopenharmony_ci * @param {object} options - the options for chinese transform to pinyin 496e80583aSopenharmony_ci */ 506e80583aSopenharmony_ci setOptions(options) { 516e80583aSopenharmony_ci options = options || {}; 526e80583aSopenharmony_ci this.options = Object.assign({ checkPolyphone: false, charCase: 0 }, options); 536e80583aSopenharmony_ci } 546e80583aSopenharmony_ci 556e80583aSopenharmony_ci /** 566e80583aSopenharmony_ci * initialize data. 576e80583aSopenharmony_ci * 586e80583aSopenharmony_ci */ 596e80583aSopenharmony_ci initialize() { 606e80583aSopenharmony_ci this.char_dict = PinyinDict.char_dict; 616e80583aSopenharmony_ci this.full_dict = PinyinDict.full_dict; 626e80583aSopenharmony_ci this.polyphone = PinyinDict.polyphone; 636e80583aSopenharmony_ci } 646e80583aSopenharmony_ci 656e80583aSopenharmony_ci /** 666e80583aSopenharmony_ci * Get the initials of pinyin. 676e80583aSopenharmony_ci * 686e80583aSopenharmony_ci * @param {string} str - The input Chinese string 696e80583aSopenharmony_ci * @return {object} - result for CamelChars. 706e80583aSopenharmony_ci */ 716e80583aSopenharmony_ci getCamelChars(str) { 726e80583aSopenharmony_ci if (typeof (str) != 'string') { 736e80583aSopenharmony_ci Log.showError(TAG, 'getCamelChars need string param!'); 746e80583aSopenharmony_ci return; 756e80583aSopenharmony_ci } 766e80583aSopenharmony_ci const chars = []; 776e80583aSopenharmony_ci let i = 0; 786e80583aSopenharmony_ci while (i< str.length) { 796e80583aSopenharmony_ci //get unicode 806e80583aSopenharmony_ci const ch = str.charAt(i); 816e80583aSopenharmony_ci //Check whether the Unicode code is within the range of processing, if it returns the pinyin first letter of the Chinese character reflected by the code, if it is not, call other functions to process 826e80583aSopenharmony_ci chars.push(this.getChar(ch)); 836e80583aSopenharmony_ci i++; 846e80583aSopenharmony_ci } 856e80583aSopenharmony_ci 866e80583aSopenharmony_ci let result = this.getResult(chars); 876e80583aSopenharmony_ci 886e80583aSopenharmony_ci switch (this.options.charCase) { 896e80583aSopenharmony_ci case 1: 906e80583aSopenharmony_ci result = result.toLowerCase(); 916e80583aSopenharmony_ci break; 926e80583aSopenharmony_ci case 2: 936e80583aSopenharmony_ci result = result.toUpperCase(); 946e80583aSopenharmony_ci break; 956e80583aSopenharmony_ci default: {}; 966e80583aSopenharmony_ci break; 976e80583aSopenharmony_ci } 986e80583aSopenharmony_ci return result; 996e80583aSopenharmony_ci } 1006e80583aSopenharmony_ci 1016e80583aSopenharmony_ci /** 1026e80583aSopenharmony_ci * Get Pinyin. 1036e80583aSopenharmony_ci * 1046e80583aSopenharmony_ci * @param {string} str - The input Chinese string. 1056e80583aSopenharmony_ci * @return {object} result for FullChars. 1066e80583aSopenharmony_ci */ 1076e80583aSopenharmony_ci getFullChars(str) { 1086e80583aSopenharmony_ci let result = ''; 1096e80583aSopenharmony_ci const reg = new RegExp('[a-zA-Z0-9\- ]'); 1106e80583aSopenharmony_ci let i = 0; 1116e80583aSopenharmony_ci while (i < str.length) { 1126e80583aSopenharmony_ci const ch = str.substr(i, 1); 1136e80583aSopenharmony_ci const unicode = ch.charCodeAt(0); 1146e80583aSopenharmony_ci if (unicode > 19968 && unicode < 40869) { 1156e80583aSopenharmony_ci const name = this.getFullChar(ch); 1166e80583aSopenharmony_ci if (name !== false) { 1176e80583aSopenharmony_ci result += name; 1186e80583aSopenharmony_ci } 1196e80583aSopenharmony_ci }else { 1206e80583aSopenharmony_ci result += ch; 1216e80583aSopenharmony_ci } 1226e80583aSopenharmony_ci i++; 1236e80583aSopenharmony_ci } 1246e80583aSopenharmony_ci 1256e80583aSopenharmony_ci switch (this.options.charCase) { 1266e80583aSopenharmony_ci case 1: 1276e80583aSopenharmony_ci result = result.toLowerCase(); 1286e80583aSopenharmony_ci break; 1296e80583aSopenharmony_ci case 2: 1306e80583aSopenharmony_ci result = result.toUpperCase(); 1316e80583aSopenharmony_ci break; 1326e80583aSopenharmony_ci default: {}; 1336e80583aSopenharmony_ci break; 1346e80583aSopenharmony_ci } 1356e80583aSopenharmony_ci return result; 1366e80583aSopenharmony_ci } 1376e80583aSopenharmony_ci 1386e80583aSopenharmony_ci getFullChar(ch) { 1396e80583aSopenharmony_ci for (const key in this.full_dict) { 1406e80583aSopenharmony_ci if (this.full_dict[key].indexOf(ch) != -1) { 1416e80583aSopenharmony_ci return this.capitalize(key); 1426e80583aSopenharmony_ci } 1436e80583aSopenharmony_ci } 1446e80583aSopenharmony_ci return false; 1456e80583aSopenharmony_ci } 1466e80583aSopenharmony_ci 1476e80583aSopenharmony_ci capitalize(str) { 1486e80583aSopenharmony_ci if (str.length <= 0) { 1496e80583aSopenharmony_ci Log.showError(TAG, 'The length of str should be greater than 0!'); 1506e80583aSopenharmony_ci return; 1516e80583aSopenharmony_ci } 1526e80583aSopenharmony_ci const first = str.substr(0, 1).toUpperCase(); 1536e80583aSopenharmony_ci const spare = str.substr(1, str.length); 1546e80583aSopenharmony_ci return first + spare; 1556e80583aSopenharmony_ci } 1566e80583aSopenharmony_ci 1576e80583aSopenharmony_ci getChar(ch) { 1586e80583aSopenharmony_ci const unicode = ch.charCodeAt(0); 1596e80583aSopenharmony_ci // Determine whether it is within the range of Chinese character processing 1606e80583aSopenharmony_ci if (unicode > 19968 && unicode < 40869) { 1616e80583aSopenharmony_ci //To check if it is polyphonic, it is polyphonic rather than looking for the corresponding letter in the string strChineseFirstPY 1626e80583aSopenharmony_ci if (!this.options.checkPolyphone) { 1636e80583aSopenharmony_ci return this.char_dict.charAt(unicode - 19968); 1646e80583aSopenharmony_ci } 1656e80583aSopenharmony_ci return this.polyphone[unicode] ? this.polyphone[unicode] : this.char_dict.charAt(unicode - 19968); 1666e80583aSopenharmony_ci } else { 1676e80583aSopenharmony_ci // If it is not a kanji, return an atomic string 1686e80583aSopenharmony_ci return ch; 1696e80583aSopenharmony_ci } 1706e80583aSopenharmony_ci } 1716e80583aSopenharmony_ci 1726e80583aSopenharmony_ci getResult(chars) { 1736e80583aSopenharmony_ci if (!this.options.checkPolyphone) { 1746e80583aSopenharmony_ci return chars.join(''); 1756e80583aSopenharmony_ci } 1766e80583aSopenharmony_ci let result = ['']; 1776e80583aSopenharmony_ci let i= 0; 1786e80583aSopenharmony_ci let len = chars.length; 1796e80583aSopenharmony_ci while (i < len) { 1806e80583aSopenharmony_ci const str = chars[i]; 1816e80583aSopenharmony_ci const strlen = str.length; 1826e80583aSopenharmony_ci if (strlen == 1) { 1836e80583aSopenharmony_ci for (let j = 0; j < result.length; j++) { 1846e80583aSopenharmony_ci result[j] += str; 1856e80583aSopenharmony_ci } 1866e80583aSopenharmony_ci } else { 1876e80583aSopenharmony_ci const swap1 = result.slice(0); 1886e80583aSopenharmony_ci result = []; 1896e80583aSopenharmony_ci for (let j = 0; j < strlen; j++) { 1906e80583aSopenharmony_ci const swap2 = swap1.slice(0); 1916e80583aSopenharmony_ci for (let k = 0; k < swap2.length; k++) { 1926e80583aSopenharmony_ci swap2[k] += str.charAt(j); 1936e80583aSopenharmony_ci } 1946e80583aSopenharmony_ci result = result.concat(swap2); 1956e80583aSopenharmony_ci } 1966e80583aSopenharmony_ci } 1976e80583aSopenharmony_ci i++; 1986e80583aSopenharmony_ci } 1996e80583aSopenharmony_ci return result; 2006e80583aSopenharmony_ci } 2016e80583aSopenharmony_ci} 2026e80583aSopenharmony_ci 2036e80583aSopenharmony_ciexport default Pinyin;