1// © 2020 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3
4// uniquecharstr.h
5// created: 2020sep01 Frank Yung-Fong Tang
6
7#ifndef __UNIQUECHARSTR_H__
8#define __UNIQUECHARSTR_H__
9
10#include "charstr.h"
11#include "uassert.h"
12#include "uhash.h"
13#include "cmemory.h"
14
15U_NAMESPACE_BEGIN
16
17/**
18 * Stores NUL-terminated strings with duplicate elimination.
19 * Checks for unique UTF-16 string pointers and converts to invariant characters.
20 *
21 * Intended to be stack-allocated. Add strings, get a unique number for each,
22 * freeze the object, get a char * pointer for each string,
23 * call orphanCharStrings() to capture the string storage, and let this object go out of scope.
24 */
25class UniqueCharStrings {
26public:
27    UniqueCharStrings(UErrorCode &errorCode) : strings(nullptr) {
28        // Note: We hash on string contents but store stable char16_t * pointers.
29        // If the strings are stored in resource bundles which should be built with
30        // duplicate elimination, then we should be able to hash on just the pointer values.
31        uhash_init(&map, uhash_hashUChars, uhash_compareUChars, uhash_compareLong, &errorCode);
32        if (U_FAILURE(errorCode)) { return; }
33        strings = new CharString();
34        if (strings == nullptr) {
35            errorCode = U_MEMORY_ALLOCATION_ERROR;
36        }
37    }
38    ~UniqueCharStrings() {
39        uhash_close(&map);
40        delete strings;
41    }
42
43    /** Returns/orphans the CharString that contains all strings. */
44    CharString *orphanCharStrings() {
45        CharString *result = strings;
46        strings = nullptr;
47        return result;
48    }
49
50    /**
51     * Adds a NUL-terminated string and returns a unique number for it.
52     * The string must not change, nor move around in memory,
53     * while this UniqueCharStrings is in use.
54     *
55     * Best used with string data in a stable storage, such as strings returned
56     * by resource bundle functions.
57     */
58    int32_t add(const char16_t*p, UErrorCode &errorCode) {
59        if (U_FAILURE(errorCode)) { return -1; }
60        if (isFrozen) {
61            errorCode = U_NO_WRITE_PERMISSION;
62            return -1;
63        }
64        // The string points into the resource bundle.
65        int32_t oldIndex = uhash_geti(&map, p);
66        if (oldIndex != 0) {  // found duplicate
67            return oldIndex;
68        }
69        // Explicit NUL terminator for the previous string.
70        // The strings object is also terminated with one implicit NUL.
71        strings->append(0, errorCode);
72        int32_t newIndex = strings->length();
73        strings->appendInvariantChars(p, u_strlen(p), errorCode);
74        uhash_puti(&map, const_cast<char16_t *>(p), newIndex, &errorCode);
75        return newIndex;
76    }
77
78    /**
79     * Adds a unicode string by value and returns a unique number for it.
80     */
81    int32_t addByValue(UnicodeString s, UErrorCode &errorCode) {
82        if (U_FAILURE(errorCode)) { return -1; }
83        if (isFrozen) {
84            errorCode = U_NO_WRITE_PERMISSION;
85            return -1;
86        }
87        int32_t oldIndex = uhash_geti(&map, s.getTerminatedBuffer());
88        if (oldIndex != 0) {  // found duplicate
89            return oldIndex;
90        }
91        // We need to store the string content of the UnicodeString.
92        UnicodeString *key = keyStore.create(s);
93        if (key == nullptr) {
94            errorCode = U_MEMORY_ALLOCATION_ERROR;
95            return -1;
96        }
97        return add(key->getTerminatedBuffer(), errorCode);
98    }
99
100    void freeze() { isFrozen = true; }
101
102    /**
103     * Returns a string pointer for its unique number, if this object is frozen.
104     * Otherwise nullptr.
105     */
106    const char *get(int32_t i) const {
107        U_ASSERT(isFrozen);
108        return isFrozen && i > 0 ? strings->data() + i : nullptr;
109    }
110
111private:
112    UHashtable map;
113    CharString *strings;
114    MemoryPool<UnicodeString> keyStore;
115    bool isFrozen = false;
116};
117
118U_NAMESPACE_END
119
120#endif  // __UNIQUECHARSTR_H__
121