1// Copyright 2011 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_ 6#define V8_STRINGS_CHAR_PREDICATES_INL_H_ 7 8#include "src/base/bounds.h" 9#include "src/strings/char-predicates.h" 10#include "src/utils/utils.h" 11 12namespace v8 { 13namespace internal { 14 15// If c is in 'A'-'Z' or 'a'-'z', return its lower-case. 16// Else, return something outside of 'A'-'Z' and 'a'-'z'. 17// Note: it ignores LOCALE. 18inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; } 19 20inline constexpr bool IsCarriageReturn(base::uc32 c) { return c == 0x000D; } 21 22inline constexpr bool IsLineFeed(base::uc32 c) { return c == 0x000A; } 23 24inline constexpr bool IsAsciiIdentifier(base::uc32 c) { 25 return IsAlphaNumeric(c) || c == '$' || c == '_'; 26} 27 28inline constexpr bool IsAlphaNumeric(base::uc32 c) { 29 return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c); 30} 31 32inline constexpr bool IsDecimalDigit(base::uc32 c) { 33 // ECMA-262, 3rd, 7.8.3 (p 16) 34 return base::IsInRange(c, '0', '9'); 35} 36 37inline constexpr bool IsHexDigit(base::uc32 c) { 38 // ECMA-262, 3rd, 7.6 (p 15) 39 return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f'); 40} 41 42inline constexpr bool IsOctalDigit(base::uc32 c) { 43 // ECMA-262, 6th, 7.8.3 44 return base::IsInRange(c, '0', '7'); 45} 46 47inline constexpr bool IsNonOctalDecimalDigit(base::uc32 c) { 48 return base::IsInRange(c, '8', '9'); 49} 50 51inline constexpr bool IsBinaryDigit(base::uc32 c) { 52 // ECMA-262, 6th, 7.8.3 53 return c == '0' || c == '1'; 54} 55 56inline constexpr bool IsAsciiLower(base::uc32 c) { 57 return base::IsInRange(c, 'a', 'z'); 58} 59 60inline constexpr bool IsAsciiUpper(base::uc32 c) { 61 return base::IsInRange(c, 'A', 'Z'); 62} 63 64inline constexpr base::uc32 ToAsciiUpper(base::uc32 c) { 65 return c & ~(IsAsciiLower(c) << 5); 66} 67 68inline constexpr base::uc32 ToAsciiLower(base::uc32 c) { 69 return c | (IsAsciiUpper(c) << 5); 70} 71 72inline constexpr bool IsRegExpWord(base::uc32 c) { 73 return IsAlphaNumeric(c) || c == '_'; 74} 75 76// Constexpr cache table for character flags. 77enum OneByteCharFlags { 78 kIsIdentifierStart = 1 << 0, 79 kIsIdentifierPart = 1 << 1, 80 kIsWhiteSpace = 1 << 2, 81 kIsWhiteSpaceOrLineTerminator = 1 << 3, 82 kMaybeLineEnd = 1 << 4 83}; 84 85// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt 86// ID_Start. Additionally includes '_' and '$'. 87constexpr bool IsOneByteIDStart(base::uc32 c) { 88 return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F || 89 (c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 || 90 c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) || 91 (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF); 92} 93 94// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt 95// ID_Continue. Additionally includes '_' and '$'. 96constexpr bool IsOneByteIDContinue(base::uc32 c) { 97 return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F || 98 (c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) || 99 c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA || 100 (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) || 101 (c >= 0x00F8 && c <= 0x00FF); 102} 103 104constexpr bool IsOneByteWhitespace(base::uc32 c) { 105 return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0'; 106} 107 108constexpr uint8_t BuildOneByteCharFlags(base::uc32 c) { 109 uint8_t result = 0; 110 if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart; 111 if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart; 112 if (IsOneByteWhitespace(c)) { 113 result |= kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator; 114 } 115 if (c == '\r' || c == '\n') { 116 result |= kIsWhiteSpaceOrLineTerminator | kMaybeLineEnd; 117 } 118 // Add markers to identify 0x2028 and 0x2029. 119 if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) { 120 result |= kMaybeLineEnd; 121 } 122 return result; 123} 124const constexpr uint8_t kOneByteCharFlags[256] = { 125#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N), 126 INT_0_TO_127_LIST(BUILD_CHAR_FLAGS) 127#undef BUILD_CHAR_FLAGS 128#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128), 129 INT_0_TO_127_LIST(BUILD_CHAR_FLAGS) 130#undef BUILD_CHAR_FLAGS 131}; 132 133bool IsIdentifierStart(base::uc32 c) { 134 if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c); 135 DCHECK_EQ(IsIdentifierStartSlow(c), 136 static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart)); 137 return kOneByteCharFlags[c] & kIsIdentifierStart; 138} 139 140bool IsIdentifierPart(base::uc32 c) { 141 if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c); 142 DCHECK_EQ(IsIdentifierPartSlow(c), 143 static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart)); 144 return kOneByteCharFlags[c] & kIsIdentifierPart; 145} 146 147bool IsWhiteSpace(base::uc32 c) { 148 if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c); 149 DCHECK_EQ(IsWhiteSpaceSlow(c), 150 static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace)); 151 return kOneByteCharFlags[c] & kIsWhiteSpace; 152} 153 154bool IsWhiteSpaceOrLineTerminator(base::uc32 c) { 155 if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceOrLineTerminatorSlow(c); 156 DCHECK_EQ( 157 IsWhiteSpaceOrLineTerminatorSlow(c), 158 static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator)); 159 return kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator; 160} 161 162bool IsLineTerminatorSequence(base::uc32 c, base::uc32 next) { 163 if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) { 164 if (c == '\n') return true; 165 if (c == '\r') return next != '\n'; 166 return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u); 167 } 168 return false; 169} 170 171} // namespace internal 172 173} // namespace v8 174 175#endif // V8_STRINGS_CHAR_PREDICATES_INL_H_ 176