1// © 2017 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3 4// bytesinkutil.cpp 5// created: 2017sep14 Markus W. Scherer 6 7#include "unicode/utypes.h" 8#include "unicode/bytestream.h" 9#include "unicode/edits.h" 10#include "unicode/stringoptions.h" 11#include "unicode/utf8.h" 12#include "unicode/utf16.h" 13#include "bytesinkutil.h" 14#include "charstr.h" 15#include "cmemory.h" 16#include "uassert.h" 17 18U_NAMESPACE_BEGIN 19 20UBool 21ByteSinkUtil::appendChange(int32_t length, const char16_t *s16, int32_t s16Length, 22 ByteSink &sink, Edits *edits, UErrorCode &errorCode) { 23 if (U_FAILURE(errorCode)) { return false; } 24 char scratch[200]; 25 int32_t s8Length = 0; 26 for (int32_t i = 0; i < s16Length;) { 27 int32_t capacity; 28 int32_t desiredCapacity = s16Length - i; 29 if (desiredCapacity < (INT32_MAX / 3)) { 30 desiredCapacity *= 3; // max 3 UTF-8 bytes per UTF-16 code unit 31 } else if (desiredCapacity < (INT32_MAX / 2)) { 32 desiredCapacity *= 2; 33 } else { 34 desiredCapacity = INT32_MAX; 35 } 36 char *buffer = sink.GetAppendBuffer(U8_MAX_LENGTH, desiredCapacity, 37 scratch, UPRV_LENGTHOF(scratch), &capacity); 38 capacity -= U8_MAX_LENGTH - 1; 39 int32_t j = 0; 40 for (; i < s16Length && j < capacity;) { 41 UChar32 c; 42 U16_NEXT_UNSAFE(s16, i, c); 43 U8_APPEND_UNSAFE(buffer, j, c); 44 } 45 if (j > (INT32_MAX - s8Length)) { 46 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 47 return false; 48 } 49 sink.Append(buffer, j); 50 s8Length += j; 51 } 52 if (edits != nullptr) { 53 edits->addReplace(length, s8Length); 54 } 55 return true; 56} 57 58UBool 59ByteSinkUtil::appendChange(const uint8_t *s, const uint8_t *limit, 60 const char16_t *s16, int32_t s16Length, 61 ByteSink &sink, Edits *edits, UErrorCode &errorCode) { 62 if (U_FAILURE(errorCode)) { return false; } 63 if ((limit - s) > INT32_MAX) { 64 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 65 return false; 66 } 67 return appendChange((int32_t)(limit - s), s16, s16Length, sink, edits, errorCode); 68} 69 70void 71ByteSinkUtil::appendCodePoint(int32_t length, UChar32 c, ByteSink &sink, Edits *edits) { 72 char s8[U8_MAX_LENGTH]; 73 int32_t s8Length = 0; 74 U8_APPEND_UNSAFE(s8, s8Length, c); 75 if (edits != nullptr) { 76 edits->addReplace(length, s8Length); 77 } 78 sink.Append(s8, s8Length); 79} 80 81namespace { 82 83// See unicode/utf8.h U8_APPEND_UNSAFE(). 84inline uint8_t getTwoByteLead(UChar32 c) { return (uint8_t)((c >> 6) | 0xc0); } 85inline uint8_t getTwoByteTrail(UChar32 c) { return (uint8_t)((c & 0x3f) | 0x80); } 86 87} // namespace 88 89void 90ByteSinkUtil::appendTwoBytes(UChar32 c, ByteSink &sink) { 91 U_ASSERT(0x80 <= c && c <= 0x7ff); // 2-byte UTF-8 92 char s8[2] = { (char)getTwoByteLead(c), (char)getTwoByteTrail(c) }; 93 sink.Append(s8, 2); 94} 95 96void 97ByteSinkUtil::appendNonEmptyUnchanged(const uint8_t *s, int32_t length, 98 ByteSink &sink, uint32_t options, Edits *edits) { 99 U_ASSERT(length > 0); 100 if (edits != nullptr) { 101 edits->addUnchanged(length); 102 } 103 if ((options & U_OMIT_UNCHANGED_TEXT) == 0) { 104 sink.Append(reinterpret_cast<const char *>(s), length); 105 } 106} 107 108UBool 109ByteSinkUtil::appendUnchanged(const uint8_t *s, const uint8_t *limit, 110 ByteSink &sink, uint32_t options, Edits *edits, 111 UErrorCode &errorCode) { 112 if (U_FAILURE(errorCode)) { return false; } 113 if ((limit - s) > INT32_MAX) { 114 errorCode = U_INDEX_OUTOFBOUNDS_ERROR; 115 return false; 116 } 117 int32_t length = (int32_t)(limit - s); 118 if (length > 0) { 119 appendNonEmptyUnchanged(s, length, sink, options, edits); 120 } 121 return true; 122} 123 124CharStringByteSink::CharStringByteSink(CharString* dest) : dest_(*dest) { 125} 126 127CharStringByteSink::~CharStringByteSink() = default; 128 129void 130CharStringByteSink::Append(const char* bytes, int32_t n) { 131 UErrorCode status = U_ZERO_ERROR; 132 dest_.append(bytes, n, status); 133 // Any errors are silently ignored. 134} 135 136char* 137CharStringByteSink::GetAppendBuffer(int32_t min_capacity, 138 int32_t desired_capacity_hint, 139 char* scratch, 140 int32_t scratch_capacity, 141 int32_t* result_capacity) { 142 if (min_capacity < 1 || scratch_capacity < min_capacity) { 143 *result_capacity = 0; 144 return nullptr; 145 } 146 147 UErrorCode status = U_ZERO_ERROR; 148 char* result = dest_.getAppendBuffer( 149 min_capacity, 150 desired_capacity_hint, 151 *result_capacity, 152 status); 153 if (U_SUCCESS(status)) { 154 return result; 155 } 156 157 *result_capacity = scratch_capacity; 158 return scratch; 159} 160 161U_NAMESPACE_END 162