16e80583aSopenharmony_ci/**
26e80583aSopenharmony_ci * Copyright (c) 2021-2022 Huawei Device Co., Ltd.
36e80583aSopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License");
46e80583aSopenharmony_ci * you may not use this file except in compliance with the License.
56e80583aSopenharmony_ci * You may obtain a copy of the License at
66e80583aSopenharmony_ci *
76e80583aSopenharmony_ci *     http://www.apache.org/licenses/LICENSE-2.0
86e80583aSopenharmony_ci *
96e80583aSopenharmony_ci * Unless required by applicable law or agreed to in writing, software
106e80583aSopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS,
116e80583aSopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
126e80583aSopenharmony_ci * See the License for the specific language governing permissions and
136e80583aSopenharmony_ci * limitations under the License.
146e80583aSopenharmony_ci */
156e80583aSopenharmony_ci
166e80583aSopenharmony_ci/**
176e80583aSopenharmony_ci * Pinyin.
186e80583aSopenharmony_ci *
196e80583aSopenharmony_ci * @typedef Option.
206e80583aSopenharmony_ci * @type Object.
216e80583aSopenharmony_ci * @property {Boolean} [checkPolyphone=false] Whether to check for polyphonic words.
226e80583aSopenharmony_ci * @property {Number} [charCase=0] Output pinyin case mode, 0- first letter capitalization; 1- All lowercase; 2 - all uppercase.
236e80583aSopenharmony_ci */
246e80583aSopenharmony_ciimport { PinyinDict } from './PinyinDict';
256e80583aSopenharmony_ciimport { Log } from './Log';
266e80583aSopenharmony_ci
276e80583aSopenharmony_ciconst TAG = 'Pinyin';
286e80583aSopenharmony_ci
296e80583aSopenharmony_ciclass Pinyin {
306e80583aSopenharmony_ci  private options;
316e80583aSopenharmony_ci  private char_dict;
326e80583aSopenharmony_ci  private full_dict;
336e80583aSopenharmony_ci  private polyphone;
346e80583aSopenharmony_ci
356e80583aSopenharmony_ci  /**
366e80583aSopenharmony_ci   * Constructor.
376e80583aSopenharmony_ci   *
386e80583aSopenharmony_ci   * @param {object} options - the options for chinese transform to pinyin
396e80583aSopenharmony_ci   */
406e80583aSopenharmony_ci  constructor(options) {
416e80583aSopenharmony_ci    this.setOptions(options);
426e80583aSopenharmony_ci    this.initialize();
436e80583aSopenharmony_ci  }
446e80583aSopenharmony_ci
456e80583aSopenharmony_ci  /**
466e80583aSopenharmony_ci   * set params.
476e80583aSopenharmony_ci   *
486e80583aSopenharmony_ci   * @param {object} options - the options for chinese transform to pinyin
496e80583aSopenharmony_ci   */
506e80583aSopenharmony_ci  setOptions(options) {
516e80583aSopenharmony_ci    options = options || {};
526e80583aSopenharmony_ci    this.options = Object.assign({ checkPolyphone: false, charCase: 0 }, options);
536e80583aSopenharmony_ci  }
546e80583aSopenharmony_ci
556e80583aSopenharmony_ci  /**
566e80583aSopenharmony_ci   * initialize data.
576e80583aSopenharmony_ci   *
586e80583aSopenharmony_ci   */
596e80583aSopenharmony_ci  initialize() {
606e80583aSopenharmony_ci    this.char_dict = PinyinDict.char_dict;
616e80583aSopenharmony_ci    this.full_dict = PinyinDict.full_dict;
626e80583aSopenharmony_ci    this.polyphone = PinyinDict.polyphone;
636e80583aSopenharmony_ci  }
646e80583aSopenharmony_ci
656e80583aSopenharmony_ci  /**
666e80583aSopenharmony_ci   * Get the initials of pinyin.
676e80583aSopenharmony_ci   *
686e80583aSopenharmony_ci   * @param {string} str - The input Chinese string
696e80583aSopenharmony_ci   * @return {object} - result for CamelChars.
706e80583aSopenharmony_ci   */
716e80583aSopenharmony_ci  getCamelChars(str) {
726e80583aSopenharmony_ci    if (typeof (str) != 'string') {
736e80583aSopenharmony_ci      Log.showError(TAG, 'getCamelChars need string param!');
746e80583aSopenharmony_ci      return;
756e80583aSopenharmony_ci    }
766e80583aSopenharmony_ci    const chars = [];
776e80583aSopenharmony_ci    let i = 0;
786e80583aSopenharmony_ci    while (i< str.length) {
796e80583aSopenharmony_ci      //get unicode
806e80583aSopenharmony_ci      const ch = str.charAt(i);
816e80583aSopenharmony_ci      //Check whether the Unicode code is within the range of processing, if it returns the pinyin first letter of the Chinese character reflected by the code, if it is not, call other functions to process
826e80583aSopenharmony_ci      chars.push(this.getChar(ch));
836e80583aSopenharmony_ci      i++;
846e80583aSopenharmony_ci    }
856e80583aSopenharmony_ci
866e80583aSopenharmony_ci    let result = this.getResult(chars);
876e80583aSopenharmony_ci
886e80583aSopenharmony_ci    switch (this.options.charCase) {
896e80583aSopenharmony_ci      case 1:
906e80583aSopenharmony_ci        result = result.toLowerCase();
916e80583aSopenharmony_ci        break;
926e80583aSopenharmony_ci      case 2:
936e80583aSopenharmony_ci        result = result.toUpperCase();
946e80583aSopenharmony_ci        break;
956e80583aSopenharmony_ci      default: {};
966e80583aSopenharmony_ci        break;
976e80583aSopenharmony_ci    }
986e80583aSopenharmony_ci    return result;
996e80583aSopenharmony_ci  }
1006e80583aSopenharmony_ci
1016e80583aSopenharmony_ci  /**
1026e80583aSopenharmony_ci   * Get Pinyin.
1036e80583aSopenharmony_ci   *
1046e80583aSopenharmony_ci   * @param {string} str - The input Chinese string.
1056e80583aSopenharmony_ci   * @return {object} result for FullChars.
1066e80583aSopenharmony_ci   */
1076e80583aSopenharmony_ci  getFullChars(str) {
1086e80583aSopenharmony_ci    let result = '';
1096e80583aSopenharmony_ci    const reg = new RegExp('[a-zA-Z0-9\- ]');
1106e80583aSopenharmony_ci    let i = 0;
1116e80583aSopenharmony_ci    while (i < str.length) {
1126e80583aSopenharmony_ci      const ch = str.substr(i, 1);
1136e80583aSopenharmony_ci      const unicode = ch.charCodeAt(0);
1146e80583aSopenharmony_ci      if (unicode > 19968 && unicode < 40869) {
1156e80583aSopenharmony_ci        const name = this.getFullChar(ch);
1166e80583aSopenharmony_ci        if (name !== false) {
1176e80583aSopenharmony_ci          result += name;
1186e80583aSopenharmony_ci        }
1196e80583aSopenharmony_ci      }else {
1206e80583aSopenharmony_ci        result += ch;
1216e80583aSopenharmony_ci      }
1226e80583aSopenharmony_ci      i++;
1236e80583aSopenharmony_ci    }
1246e80583aSopenharmony_ci
1256e80583aSopenharmony_ci    switch (this.options.charCase) {
1266e80583aSopenharmony_ci      case 1:
1276e80583aSopenharmony_ci        result = result.toLowerCase();
1286e80583aSopenharmony_ci        break;
1296e80583aSopenharmony_ci      case 2:
1306e80583aSopenharmony_ci        result = result.toUpperCase();
1316e80583aSopenharmony_ci        break;
1326e80583aSopenharmony_ci      default: {};
1336e80583aSopenharmony_ci        break;
1346e80583aSopenharmony_ci    }
1356e80583aSopenharmony_ci    return result;
1366e80583aSopenharmony_ci  }
1376e80583aSopenharmony_ci
1386e80583aSopenharmony_ci  getFullChar(ch) {
1396e80583aSopenharmony_ci    for (const key in this.full_dict) {
1406e80583aSopenharmony_ci      if (this.full_dict[key].indexOf(ch) != -1) {
1416e80583aSopenharmony_ci        return this.capitalize(key);
1426e80583aSopenharmony_ci      }
1436e80583aSopenharmony_ci    }
1446e80583aSopenharmony_ci    return false;
1456e80583aSopenharmony_ci  }
1466e80583aSopenharmony_ci
1476e80583aSopenharmony_ci  capitalize(str) {
1486e80583aSopenharmony_ci    if (str.length <= 0) {
1496e80583aSopenharmony_ci      Log.showError(TAG, 'The length of str should be greater than 0!');
1506e80583aSopenharmony_ci      return;
1516e80583aSopenharmony_ci    }
1526e80583aSopenharmony_ci    const first = str.substr(0, 1).toUpperCase();
1536e80583aSopenharmony_ci    const spare = str.substr(1, str.length);
1546e80583aSopenharmony_ci    return first + spare;
1556e80583aSopenharmony_ci  }
1566e80583aSopenharmony_ci
1576e80583aSopenharmony_ci  getChar(ch) {
1586e80583aSopenharmony_ci    const unicode = ch.charCodeAt(0);
1596e80583aSopenharmony_ci    // Determine whether it is within the range of Chinese character processing
1606e80583aSopenharmony_ci    if (unicode > 19968 && unicode < 40869) {
1616e80583aSopenharmony_ci      //To check if it is polyphonic, it is polyphonic rather than looking for the corresponding letter in the string strChineseFirstPY
1626e80583aSopenharmony_ci      if (!this.options.checkPolyphone) {
1636e80583aSopenharmony_ci        return this.char_dict.charAt(unicode - 19968);
1646e80583aSopenharmony_ci      }
1656e80583aSopenharmony_ci      return this.polyphone[unicode] ? this.polyphone[unicode] : this.char_dict.charAt(unicode - 19968);
1666e80583aSopenharmony_ci    } else {
1676e80583aSopenharmony_ci      // If it is not a kanji, return an atomic string
1686e80583aSopenharmony_ci      return ch;
1696e80583aSopenharmony_ci    }
1706e80583aSopenharmony_ci  }
1716e80583aSopenharmony_ci
1726e80583aSopenharmony_ci  getResult(chars) {
1736e80583aSopenharmony_ci    if (!this.options.checkPolyphone) {
1746e80583aSopenharmony_ci      return chars.join('');
1756e80583aSopenharmony_ci    }
1766e80583aSopenharmony_ci    let result = [''];
1776e80583aSopenharmony_ci    let i= 0;
1786e80583aSopenharmony_ci    let len = chars.length;
1796e80583aSopenharmony_ci    while (i < len) {
1806e80583aSopenharmony_ci      const str = chars[i];
1816e80583aSopenharmony_ci      const strlen = str.length;
1826e80583aSopenharmony_ci      if (strlen == 1) {
1836e80583aSopenharmony_ci        for (let j = 0; j < result.length; j++) {
1846e80583aSopenharmony_ci          result[j] += str;
1856e80583aSopenharmony_ci        }
1866e80583aSopenharmony_ci      } else {
1876e80583aSopenharmony_ci        const swap1 = result.slice(0);
1886e80583aSopenharmony_ci        result = [];
1896e80583aSopenharmony_ci        for (let j = 0; j < strlen; j++) {
1906e80583aSopenharmony_ci          const swap2 = swap1.slice(0);
1916e80583aSopenharmony_ci          for (let k = 0; k < swap2.length; k++) {
1926e80583aSopenharmony_ci            swap2[k] += str.charAt(j);
1936e80583aSopenharmony_ci          }
1946e80583aSopenharmony_ci          result = result.concat(swap2);
1956e80583aSopenharmony_ci        }
1966e80583aSopenharmony_ci      }
1976e80583aSopenharmony_ci      i++;
1986e80583aSopenharmony_ci    }
1996e80583aSopenharmony_ci    return result;
2006e80583aSopenharmony_ci  }
2016e80583aSopenharmony_ci}
2026e80583aSopenharmony_ci
2036e80583aSopenharmony_ciexport default Pinyin;