11cb0ef41Sopenharmony_ci// Copyright 2016 the V8 project authors. All rights reserved. 21cb0ef41Sopenharmony_ci// Use of this source code is governed by a BSD-style license that can be 31cb0ef41Sopenharmony_ci// found in the LICENSE file. 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci#include "src/strings/string-case.h" 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ci#include "src/base/logging.h" 81cb0ef41Sopenharmony_ci#include "src/common/assert-scope.h" 91cb0ef41Sopenharmony_ci#include "src/common/globals.h" 101cb0ef41Sopenharmony_ci#include "src/utils/utils.h" 111cb0ef41Sopenharmony_ci 121cb0ef41Sopenharmony_cinamespace v8 { 131cb0ef41Sopenharmony_cinamespace internal { 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ci// FastAsciiConvert tries to do character processing on a word_t basis if 161cb0ef41Sopenharmony_ci// source and destination strings are properly aligned. Natural alignment of 171cb0ef41Sopenharmony_ci// string data depends on kTaggedSize so we define word_t via Tagged_t. 181cb0ef41Sopenharmony_ciusing word_t = std::make_unsigned<Tagged_t>::type; 191cb0ef41Sopenharmony_ci 201cb0ef41Sopenharmony_ciconst word_t kWordTAllBitsSet = std::numeric_limits<word_t>::max(); 211cb0ef41Sopenharmony_ciconst word_t kOneInEveryByte = kWordTAllBitsSet / 0xFF; 221cb0ef41Sopenharmony_ciconst word_t kAsciiMask = kOneInEveryByte << 7; 231cb0ef41Sopenharmony_ci 241cb0ef41Sopenharmony_ci#ifdef DEBUG 251cb0ef41Sopenharmony_cibool CheckFastAsciiConvert(char* dst, const char* src, int length, bool changed, 261cb0ef41Sopenharmony_ci bool is_to_lower) { 271cb0ef41Sopenharmony_ci bool expected_changed = false; 281cb0ef41Sopenharmony_ci for (int i = 0; i < length; i++) { 291cb0ef41Sopenharmony_ci if (dst[i] == src[i]) continue; 301cb0ef41Sopenharmony_ci expected_changed = true; 311cb0ef41Sopenharmony_ci if (is_to_lower) { 321cb0ef41Sopenharmony_ci DCHECK('A' <= src[i] && src[i] <= 'Z'); 331cb0ef41Sopenharmony_ci DCHECK(dst[i] == src[i] + ('a' - 'A')); 341cb0ef41Sopenharmony_ci } else { 351cb0ef41Sopenharmony_ci DCHECK('a' <= src[i] && src[i] <= 'z'); 361cb0ef41Sopenharmony_ci DCHECK(dst[i] == src[i] - ('a' - 'A')); 371cb0ef41Sopenharmony_ci } 381cb0ef41Sopenharmony_ci } 391cb0ef41Sopenharmony_ci return (expected_changed == changed); 401cb0ef41Sopenharmony_ci} 411cb0ef41Sopenharmony_ci#endif 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ci// Given a word and two range boundaries returns a word with high bit 441cb0ef41Sopenharmony_ci// set in every byte iff the corresponding input byte was strictly in 451cb0ef41Sopenharmony_ci// the range (m, n). All the other bits in the result are cleared. 461cb0ef41Sopenharmony_ci// This function is only useful when it can be inlined and the 471cb0ef41Sopenharmony_ci// boundaries are statically known. 481cb0ef41Sopenharmony_ci// Requires: all bytes in the input word and the boundaries must be 491cb0ef41Sopenharmony_ci// ASCII (less than 0x7F). 501cb0ef41Sopenharmony_cistatic inline word_t AsciiRangeMask(word_t w, char m, char n) { 511cb0ef41Sopenharmony_ci // Use strict inequalities since in edge cases the function could be 521cb0ef41Sopenharmony_ci // further simplified. 531cb0ef41Sopenharmony_ci DCHECK(0 < m && m < n); 541cb0ef41Sopenharmony_ci // Has high bit set in every w byte less than n. 551cb0ef41Sopenharmony_ci word_t tmp1 = kOneInEveryByte * (0x7F + n) - w; 561cb0ef41Sopenharmony_ci // Has high bit set in every w byte greater than m. 571cb0ef41Sopenharmony_ci word_t tmp2 = w + kOneInEveryByte * (0x7F - m); 581cb0ef41Sopenharmony_ci return (tmp1 & tmp2 & (kOneInEveryByte * 0x80)); 591cb0ef41Sopenharmony_ci} 601cb0ef41Sopenharmony_ci 611cb0ef41Sopenharmony_citemplate <bool is_lower> 621cb0ef41Sopenharmony_ciint FastAsciiConvert(char* dst, const char* src, int length, 631cb0ef41Sopenharmony_ci bool* changed_out) { 641cb0ef41Sopenharmony_ci#ifdef DEBUG 651cb0ef41Sopenharmony_ci char* saved_dst = dst; 661cb0ef41Sopenharmony_ci#endif 671cb0ef41Sopenharmony_ci const char* saved_src = src; 681cb0ef41Sopenharmony_ci DisallowGarbageCollection no_gc; 691cb0ef41Sopenharmony_ci // We rely on the distance between upper and lower case letters 701cb0ef41Sopenharmony_ci // being a known power of 2. 711cb0ef41Sopenharmony_ci DCHECK_EQ('a' - 'A', 1 << 5); 721cb0ef41Sopenharmony_ci // Boundaries for the range of input characters than require conversion. 731cb0ef41Sopenharmony_ci static const char lo = is_lower ? 'A' - 1 : 'a' - 1; 741cb0ef41Sopenharmony_ci static const char hi = is_lower ? 'Z' + 1 : 'z' + 1; 751cb0ef41Sopenharmony_ci bool changed = false; 761cb0ef41Sopenharmony_ci const char* const limit = src + length; 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci // dst is newly allocated and always aligned. 791cb0ef41Sopenharmony_ci DCHECK(IsAligned(reinterpret_cast<Address>(dst), sizeof(word_t))); 801cb0ef41Sopenharmony_ci // Only attempt processing one word at a time if src is also aligned. 811cb0ef41Sopenharmony_ci if (IsAligned(reinterpret_cast<Address>(src), sizeof(word_t))) { 821cb0ef41Sopenharmony_ci // Process the prefix of the input that requires no conversion one aligned 831cb0ef41Sopenharmony_ci // (machine) word at a time. 841cb0ef41Sopenharmony_ci while (src <= limit - sizeof(word_t)) { 851cb0ef41Sopenharmony_ci const word_t w = *reinterpret_cast<const word_t*>(src); 861cb0ef41Sopenharmony_ci if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src); 871cb0ef41Sopenharmony_ci if (AsciiRangeMask(w, lo, hi) != 0) { 881cb0ef41Sopenharmony_ci changed = true; 891cb0ef41Sopenharmony_ci break; 901cb0ef41Sopenharmony_ci } 911cb0ef41Sopenharmony_ci *reinterpret_cast<word_t*>(dst) = w; 921cb0ef41Sopenharmony_ci src += sizeof(word_t); 931cb0ef41Sopenharmony_ci dst += sizeof(word_t); 941cb0ef41Sopenharmony_ci } 951cb0ef41Sopenharmony_ci // Process the remainder of the input performing conversion when 961cb0ef41Sopenharmony_ci // required one word at a time. 971cb0ef41Sopenharmony_ci while (src <= limit - sizeof(word_t)) { 981cb0ef41Sopenharmony_ci const word_t w = *reinterpret_cast<const word_t*>(src); 991cb0ef41Sopenharmony_ci if ((w & kAsciiMask) != 0) return static_cast<int>(src - saved_src); 1001cb0ef41Sopenharmony_ci word_t m = AsciiRangeMask(w, lo, hi); 1011cb0ef41Sopenharmony_ci // The mask has high (7th) bit set in every byte that needs 1021cb0ef41Sopenharmony_ci // conversion and we know that the distance between cases is 1031cb0ef41Sopenharmony_ci // 1 << 5. 1041cb0ef41Sopenharmony_ci *reinterpret_cast<word_t*>(dst) = w ^ (m >> 2); 1051cb0ef41Sopenharmony_ci src += sizeof(word_t); 1061cb0ef41Sopenharmony_ci dst += sizeof(word_t); 1071cb0ef41Sopenharmony_ci } 1081cb0ef41Sopenharmony_ci } 1091cb0ef41Sopenharmony_ci // Process the last few bytes of the input (or the whole input if 1101cb0ef41Sopenharmony_ci // unaligned access is not supported). 1111cb0ef41Sopenharmony_ci while (src < limit) { 1121cb0ef41Sopenharmony_ci char c = *src; 1131cb0ef41Sopenharmony_ci if ((c & kAsciiMask) != 0) return static_cast<int>(src - saved_src); 1141cb0ef41Sopenharmony_ci if (lo < c && c < hi) { 1151cb0ef41Sopenharmony_ci c ^= (1 << 5); 1161cb0ef41Sopenharmony_ci changed = true; 1171cb0ef41Sopenharmony_ci } 1181cb0ef41Sopenharmony_ci *dst = c; 1191cb0ef41Sopenharmony_ci ++src; 1201cb0ef41Sopenharmony_ci ++dst; 1211cb0ef41Sopenharmony_ci } 1221cb0ef41Sopenharmony_ci 1231cb0ef41Sopenharmony_ci DCHECK( 1241cb0ef41Sopenharmony_ci CheckFastAsciiConvert(saved_dst, saved_src, length, changed, is_lower)); 1251cb0ef41Sopenharmony_ci 1261cb0ef41Sopenharmony_ci *changed_out = changed; 1271cb0ef41Sopenharmony_ci return length; 1281cb0ef41Sopenharmony_ci} 1291cb0ef41Sopenharmony_ci 1301cb0ef41Sopenharmony_citemplate int FastAsciiConvert<false>(char* dst, const char* src, int length, 1311cb0ef41Sopenharmony_ci bool* changed_out); 1321cb0ef41Sopenharmony_citemplate int FastAsciiConvert<true>(char* dst, const char* src, int length, 1331cb0ef41Sopenharmony_ci bool* changed_out); 1341cb0ef41Sopenharmony_ci 1351cb0ef41Sopenharmony_ci} // namespace internal 1361cb0ef41Sopenharmony_ci} // namespace v8 137