11cb0ef41Sopenharmony_ci// © 2020 and later: Unicode, Inc. and others. 21cb0ef41Sopenharmony_ci// License & terms of use: http://www.unicode.org/copyright.html 31cb0ef41Sopenharmony_ci 41cb0ef41Sopenharmony_ci// uniquecharstr.h 51cb0ef41Sopenharmony_ci// created: 2020sep01 Frank Yung-Fong Tang 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#ifndef __UNIQUECHARSTR_H__ 81cb0ef41Sopenharmony_ci#define __UNIQUECHARSTR_H__ 91cb0ef41Sopenharmony_ci 101cb0ef41Sopenharmony_ci#include "charstr.h" 111cb0ef41Sopenharmony_ci#include "uassert.h" 121cb0ef41Sopenharmony_ci#include "uhash.h" 131cb0ef41Sopenharmony_ci#include "cmemory.h" 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ciU_NAMESPACE_BEGIN 161cb0ef41Sopenharmony_ci 171cb0ef41Sopenharmony_ci/** 181cb0ef41Sopenharmony_ci * Stores NUL-terminated strings with duplicate elimination. 191cb0ef41Sopenharmony_ci * Checks for unique UTF-16 string pointers and converts to invariant characters. 201cb0ef41Sopenharmony_ci * 211cb0ef41Sopenharmony_ci * Intended to be stack-allocated. Add strings, get a unique number for each, 221cb0ef41Sopenharmony_ci * freeze the object, get a char * pointer for each string, 231cb0ef41Sopenharmony_ci * call orphanCharStrings() to capture the string storage, and let this object go out of scope. 241cb0ef41Sopenharmony_ci */ 251cb0ef41Sopenharmony_ciclass UniqueCharStrings { 261cb0ef41Sopenharmony_cipublic: 271cb0ef41Sopenharmony_ci UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) { 281cb0ef41Sopenharmony_ci // Note: We hash on string contents but store stable char16_t * pointers. 291cb0ef41Sopenharmony_ci // If the strings are stored in resource bundles which should be built with 301cb0ef41Sopenharmony_ci // duplicate elimination, then we should be able to hash on just the pointer values. 311cb0ef41Sopenharmony_ci uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode); 321cb0ef41Sopenharmony_ci if (U_FAILURE(errorCode)) { return; } 331cb0ef41Sopenharmony_ci strings = new CharString(); 341cb0ef41Sopenharmony_ci if (strings == nullptr) { 351cb0ef41Sopenharmony_ci errorCode = U_MEMORY_ALLOCATION_ERROR; 361cb0ef41Sopenharmony_ci } 371cb0ef41Sopenharmony_ci } 381cb0ef41Sopenharmony_ci ~UniqueCharStrings() { 391cb0ef41Sopenharmony_ci uhash_close(&map); 401cb0ef41Sopenharmony_ci delete strings; 411cb0ef41Sopenharmony_ci } 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ci /** Returns/orphans the CharString that contains all strings. */ 441cb0ef41Sopenharmony_ci CharString *orphanCharStrings() { 451cb0ef41Sopenharmony_ci CharString *result = strings; 461cb0ef41Sopenharmony_ci strings = nullptr; 471cb0ef41Sopenharmony_ci return result; 481cb0ef41Sopenharmony_ci } 491cb0ef41Sopenharmony_ci 501cb0ef41Sopenharmony_ci /** 511cb0ef41Sopenharmony_ci * Adds a NUL-terminated string and returns a unique number for it. 521cb0ef41Sopenharmony_ci * The string must not change, nor move around in memory, 531cb0ef41Sopenharmony_ci * while this UniqueCharStrings is in use. 541cb0ef41Sopenharmony_ci * 551cb0ef41Sopenharmony_ci * Best used with string data in a stable storage, such as strings returned 561cb0ef41Sopenharmony_ci * by resource bundle functions. 571cb0ef41Sopenharmony_ci */ 581cb0ef41Sopenharmony_ci int32_t add(const char16_t*p, UErrorCode &errorCode) { 591cb0ef41Sopenharmony_ci if (U_FAILURE(errorCode)) { return -1; } 601cb0ef41Sopenharmony_ci if (isFrozen) { 611cb0ef41Sopenharmony_ci errorCode = U_NO_WRITE_PERMISSION; 621cb0ef41Sopenharmony_ci return -1; 631cb0ef41Sopenharmony_ci } 641cb0ef41Sopenharmony_ci // The string points into the resource bundle. 651cb0ef41Sopenharmony_ci int32_t oldIndex = uhash_geti(&map, p); 661cb0ef41Sopenharmony_ci if (oldIndex != 0) { // found duplicate 671cb0ef41Sopenharmony_ci return oldIndex; 681cb0ef41Sopenharmony_ci } 691cb0ef41Sopenharmony_ci // Explicit NUL terminator for the previous string. 701cb0ef41Sopenharmony_ci // The strings object is also terminated with one implicit NUL. 711cb0ef41Sopenharmony_ci strings->append(0, errorCode); 721cb0ef41Sopenharmony_ci int32_t newIndex = strings->length(); 731cb0ef41Sopenharmony_ci strings->appendInvariantChars(p, u_strlen(p), errorCode); 741cb0ef41Sopenharmony_ci uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode); 751cb0ef41Sopenharmony_ci return newIndex; 761cb0ef41Sopenharmony_ci } 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci /** 791cb0ef41Sopenharmony_ci * Adds a unicode string by value and returns a unique number for it. 801cb0ef41Sopenharmony_ci */ 811cb0ef41Sopenharmony_ci int32_t addByValue(UnicodeString s, UErrorCode &errorCode) { 821cb0ef41Sopenharmony_ci if (U_FAILURE(errorCode)) { return -1; } 831cb0ef41Sopenharmony_ci if (isFrozen) { 841cb0ef41Sopenharmony_ci errorCode = U_NO_WRITE_PERMISSION; 851cb0ef41Sopenharmony_ci return -1; 861cb0ef41Sopenharmony_ci } 871cb0ef41Sopenharmony_ci int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer()); 881cb0ef41Sopenharmony_ci if (oldIndex != 0) { // found duplicate 891cb0ef41Sopenharmony_ci return oldIndex; 901cb0ef41Sopenharmony_ci } 911cb0ef41Sopenharmony_ci // We need to store the string content of the UnicodeString. 921cb0ef41Sopenharmony_ci UnicodeString *key = keyStore.create(s); 931cb0ef41Sopenharmony_ci if (key == nullptr) { 941cb0ef41Sopenharmony_ci errorCode = U_MEMORY_ALLOCATION_ERROR; 951cb0ef41Sopenharmony_ci return -1; 961cb0ef41Sopenharmony_ci } 971cb0ef41Sopenharmony_ci return add(key->getTerminatedBuffer(), errorCode); 981cb0ef41Sopenharmony_ci } 991cb0ef41Sopenharmony_ci 1001cb0ef41Sopenharmony_ci void freeze() { isFrozen = true; } 1011cb0ef41Sopenharmony_ci 1021cb0ef41Sopenharmony_ci /** 1031cb0ef41Sopenharmony_ci * Returns a string pointer for its unique number, if this object is frozen. 1041cb0ef41Sopenharmony_ci * Otherwise nullptr. 1051cb0ef41Sopenharmony_ci */ 1061cb0ef41Sopenharmony_ci const char *get(int32_t i) const { 1071cb0ef41Sopenharmony_ci U_ASSERT(isFrozen); 1081cb0ef41Sopenharmony_ci return isFrozen && i > 0 ? strings->data() + i : nullptr; 1091cb0ef41Sopenharmony_ci } 1101cb0ef41Sopenharmony_ci 1111cb0ef41Sopenharmony_ciprivate: 1121cb0ef41Sopenharmony_ci UHashtable map; 1131cb0ef41Sopenharmony_ci CharString *strings; 1141cb0ef41Sopenharmony_ci MemoryPool<UnicodeString> keyStore; 1151cb0ef41Sopenharmony_ci bool isFrozen = false; 1161cb0ef41Sopenharmony_ci}; 1171cb0ef41Sopenharmony_ci 1181cb0ef41Sopenharmony_ciU_NAMESPACE_END 1191cb0ef41Sopenharmony_ci 1201cb0ef41Sopenharmony_ci#endif // __UNIQUECHARSTR_H__ 121