17c804472Sopenharmony_ci/* 27c804472Sopenharmony_ci * Copyright (c) 2023 Huawei Device Co., Ltd. 37c804472Sopenharmony_ci * Licensed under the Apache License, Version 2.0 (the "License"); 47c804472Sopenharmony_ci * you may not use this file except in compliance with the License. 57c804472Sopenharmony_ci * You may obtain a copy of the License at 67c804472Sopenharmony_ci * 77c804472Sopenharmony_ci * http://www.apache.org/licenses/LICENSE-2.0 87c804472Sopenharmony_ci * 97c804472Sopenharmony_ci * Unless required by applicable law or agreed to in writing, software 107c804472Sopenharmony_ci * distributed under the License is distributed on an "AS IS" BASIS, 117c804472Sopenharmony_ci * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 127c804472Sopenharmony_ci * See the License for the specific language governing permissions and 137c804472Sopenharmony_ci * limitations under the License. 147c804472Sopenharmony_ci */ 157c804472Sopenharmony_ci 167c804472Sopenharmony_ci#ifndef STRINGHELPER_H 177c804472Sopenharmony_ci#define STRINGHELPER_H 187c804472Sopenharmony_ci 197c804472Sopenharmony_ci#pragma once 207c804472Sopenharmony_ci#include <string> 217c804472Sopenharmony_ci 227c804472Sopenharmony_ciclass StringHelper { 237c804472Sopenharmony_cipublic: 247c804472Sopenharmony_ci enum Encode { ANSI = 1, UTF16_LE, UTF16_BE, UTF8_BOM, UTF8 }; 257c804472Sopenharmony_ci static Encode IsUtf8Data(const uint8_t* data, size_t size) 267c804472Sopenharmony_ci { 277c804472Sopenharmony_ci bool bAnsi = true; 287c804472Sopenharmony_ci uint8_t ch = 0x00; 297c804472Sopenharmony_ci int32_t nBytes = 0; 307c804472Sopenharmony_ci int32_t byteNumber[] = {0, 1, 2, 3, 4, 5, 6}; 317c804472Sopenharmony_ci for (auto i = 0; i < size; i++) { 327c804472Sopenharmony_ci ch = *(data + i); 337c804472Sopenharmony_ci if ((ch & 0x80) != 0x00) { // The first digit of the byte is 0:0XXX_XXXX 347c804472Sopenharmony_ci bAnsi = false; 357c804472Sopenharmony_ci } 367c804472Sopenharmony_ci if (nBytes == byteNumber[0]) { 377c804472Sopenharmony_ci if (ch < 0x80) { 387c804472Sopenharmony_ci continue; 397c804472Sopenharmony_ci } 407c804472Sopenharmony_ci if (ch >= 0xFC && ch <= 0xFD) { 417c804472Sopenharmony_ci // The char has 6 bytes:1111_110X 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 427c804472Sopenharmony_ci nBytes = byteNumber[6]; 437c804472Sopenharmony_ci } else if (ch >= 0xF8) { 447c804472Sopenharmony_ci // The char has 5 bytes:1111_10XX 10XX_XXXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 457c804472Sopenharmony_ci nBytes = byteNumber[5]; 467c804472Sopenharmony_ci } else if (ch >= 0xF0) { 477c804472Sopenharmony_ci // The char has 4 bytes:1111_0XXX 10XX_XXXX 10XX_XXXX 10XX_XXXX 487c804472Sopenharmony_ci nBytes = byteNumber[4]; 497c804472Sopenharmony_ci } else if (ch >= 0xE0) { 507c804472Sopenharmony_ci nBytes = byteNumber[3]; // The char has 3 bytes:1110_XXXX 10XX_XXXX 10XX_XXXX 517c804472Sopenharmony_ci } else if (ch >= 0xC0) { 527c804472Sopenharmony_ci nBytes = byteNumber[2]; // The char has 2 bytes:110X_XXXX 10XX_XXXX 537c804472Sopenharmony_ci } else { 547c804472Sopenharmony_ci return Encode::ANSI; 557c804472Sopenharmony_ci } 567c804472Sopenharmony_ci nBytes--; 577c804472Sopenharmony_ci } else { 587c804472Sopenharmony_ci if ((ch & 0xC0) != 0x80) { 597c804472Sopenharmony_ci return Encode::ANSI; 607c804472Sopenharmony_ci } 617c804472Sopenharmony_ci nBytes--; 627c804472Sopenharmony_ci } 637c804472Sopenharmony_ci } 647c804472Sopenharmony_ci if (nBytes > byteNumber[0] || bAnsi) { 657c804472Sopenharmony_ci return Encode::ANSI; 667c804472Sopenharmony_ci } 677c804472Sopenharmony_ci return Encode::UTF8; 687c804472Sopenharmony_ci }; 697c804472Sopenharmony_ci 707c804472Sopenharmony_ci static Encode DetectEncode(const uint8_t* data, size_t size) 717c804472Sopenharmony_ci { 727c804472Sopenharmony_ci int utf16HeadLen = 2; 737c804472Sopenharmony_ci int utf8HeadLen = 3; 747c804472Sopenharmony_ci // Detect the size and the first and second bytes 757c804472Sopenharmony_ci if (size > utf16HeadLen && data[0] == 0xFF && data[1] == 0xFE) { 767c804472Sopenharmony_ci return Encode::UTF16_LE; 777c804472Sopenharmony_ci // Detect the size and the first and second bytes 787c804472Sopenharmony_ci } else if (size > utf16HeadLen && data[0] == 0xFE && data[1] == 0xFF) { 797c804472Sopenharmony_ci return Encode::UTF16_BE; 807c804472Sopenharmony_ci // Detect the size and the first, second, and third bytes 817c804472Sopenharmony_ci } else if (size > utf8HeadLen && data[0] == 0xEF && data[1] == 0xBB&& data[2] == 0xBF) { // NOLINT 827c804472Sopenharmony_ci return Encode::UTF8_BOM; 837c804472Sopenharmony_ci } else { 847c804472Sopenharmony_ci return IsUtf8Data(data, size); 857c804472Sopenharmony_ci } 867c804472Sopenharmony_ci }; 877c804472Sopenharmony_ci 887c804472Sopenharmony_ci static std::string StringToUtf8(const std::string& str); 897c804472Sopenharmony_ci static std::string Utf8ToString(const std::string& str); 907c804472Sopenharmony_ci}; 917c804472Sopenharmony_ci 927c804472Sopenharmony_ci#endif // STRINGHELPER_H