1// Copyright 2011 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef V8_STRINGS_CHAR_PREDICATES_INL_H_
6#define V8_STRINGS_CHAR_PREDICATES_INL_H_
7
8#include "src/base/bounds.h"
9#include "src/strings/char-predicates.h"
10#include "src/utils/utils.h"
11
12namespace v8 {
13namespace internal {
14
15// If c is in 'A'-'Z' or 'a'-'z', return its lower-case.
16// Else, return something outside of 'A'-'Z' and 'a'-'z'.
17// Note: it ignores LOCALE.
18inline constexpr int AsciiAlphaToLower(base::uc32 c) { return c | 0x20; }
19
20inline constexpr bool IsCarriageReturn(base::uc32 c) { return c == 0x000D; }
21
22inline constexpr bool IsLineFeed(base::uc32 c) { return c == 0x000A; }
23
24inline constexpr bool IsAsciiIdentifier(base::uc32 c) {
25  return IsAlphaNumeric(c) || c == '$' || c == '_';
26}
27
28inline constexpr bool IsAlphaNumeric(base::uc32 c) {
29  return base::IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
30}
31
32inline constexpr bool IsDecimalDigit(base::uc32 c) {
33  // ECMA-262, 3rd, 7.8.3 (p 16)
34  return base::IsInRange(c, '0', '9');
35}
36
37inline constexpr bool IsHexDigit(base::uc32 c) {
38  // ECMA-262, 3rd, 7.6 (p 15)
39  return IsDecimalDigit(c) || base::IsInRange(AsciiAlphaToLower(c), 'a', 'f');
40}
41
42inline constexpr bool IsOctalDigit(base::uc32 c) {
43  // ECMA-262, 6th, 7.8.3
44  return base::IsInRange(c, '0', '7');
45}
46
47inline constexpr bool IsNonOctalDecimalDigit(base::uc32 c) {
48  return base::IsInRange(c, '8', '9');
49}
50
51inline constexpr bool IsBinaryDigit(base::uc32 c) {
52  // ECMA-262, 6th, 7.8.3
53  return c == '0' || c == '1';
54}
55
56inline constexpr bool IsAsciiLower(base::uc32 c) {
57  return base::IsInRange(c, 'a', 'z');
58}
59
60inline constexpr bool IsAsciiUpper(base::uc32 c) {
61  return base::IsInRange(c, 'A', 'Z');
62}
63
64inline constexpr base::uc32 ToAsciiUpper(base::uc32 c) {
65  return c & ~(IsAsciiLower(c) << 5);
66}
67
68inline constexpr base::uc32 ToAsciiLower(base::uc32 c) {
69  return c | (IsAsciiUpper(c) << 5);
70}
71
72inline constexpr bool IsRegExpWord(base::uc32 c) {
73  return IsAlphaNumeric(c) || c == '_';
74}
75
76// Constexpr cache table for character flags.
77enum OneByteCharFlags {
78  kIsIdentifierStart = 1 << 0,
79  kIsIdentifierPart = 1 << 1,
80  kIsWhiteSpace = 1 << 2,
81  kIsWhiteSpaceOrLineTerminator = 1 << 3,
82  kMaybeLineEnd = 1 << 4
83};
84
85// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
86// ID_Start. Additionally includes '_' and '$'.
87constexpr bool IsOneByteIDStart(base::uc32 c) {
88  return c == 0x0024 || (c >= 0x0041 && c <= 0x005A) || c == 0x005F ||
89         (c >= 0x0061 && c <= 0x007A) || c == 0x00AA || c == 0x00B5 ||
90         c == 0x00BA || (c >= 0x00C0 && c <= 0x00D6) ||
91         (c >= 0x00D8 && c <= 0x00F6) || (c >= 0x00F8 && c <= 0x00FF);
92}
93
94// See http://www.unicode.org/Public/UCD/latest/ucd/DerivedCoreProperties.txt
95// ID_Continue. Additionally includes '_' and '$'.
96constexpr bool IsOneByteIDContinue(base::uc32 c) {
97  return c == 0x0024 || (c >= 0x0030 && c <= 0x0039) || c == 0x005F ||
98         (c >= 0x0041 && c <= 0x005A) || (c >= 0x0061 && c <= 0x007A) ||
99         c == 0x00AA || c == 0x00B5 || c == 0x00B7 || c == 0x00BA ||
100         (c >= 0x00C0 && c <= 0x00D6) || (c >= 0x00D8 && c <= 0x00F6) ||
101         (c >= 0x00F8 && c <= 0x00FF);
102}
103
104constexpr bool IsOneByteWhitespace(base::uc32 c) {
105  return c == '\t' || c == '\v' || c == '\f' || c == ' ' || c == u'\xa0';
106}
107
108constexpr uint8_t BuildOneByteCharFlags(base::uc32 c) {
109  uint8_t result = 0;
110  if (IsOneByteIDStart(c) || c == '\\') result |= kIsIdentifierStart;
111  if (IsOneByteIDContinue(c) || c == '\\') result |= kIsIdentifierPart;
112  if (IsOneByteWhitespace(c)) {
113    result |= kIsWhiteSpace | kIsWhiteSpaceOrLineTerminator;
114  }
115  if (c == '\r' || c == '\n') {
116    result |= kIsWhiteSpaceOrLineTerminator | kMaybeLineEnd;
117  }
118  // Add markers to identify 0x2028 and 0x2029.
119  if (c == static_cast<uint8_t>(0x2028) || c == static_cast<uint8_t>(0x2029)) {
120    result |= kMaybeLineEnd;
121  }
122  return result;
123}
124const constexpr uint8_t kOneByteCharFlags[256] = {
125#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N),
126    INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
127#undef BUILD_CHAR_FLAGS
128#define BUILD_CHAR_FLAGS(N) BuildOneByteCharFlags(N + 128),
129        INT_0_TO_127_LIST(BUILD_CHAR_FLAGS)
130#undef BUILD_CHAR_FLAGS
131};
132
133bool IsIdentifierStart(base::uc32 c) {
134  if (!base::IsInRange(c, 0, 255)) return IsIdentifierStartSlow(c);
135  DCHECK_EQ(IsIdentifierStartSlow(c),
136            static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierStart));
137  return kOneByteCharFlags[c] & kIsIdentifierStart;
138}
139
140bool IsIdentifierPart(base::uc32 c) {
141  if (!base::IsInRange(c, 0, 255)) return IsIdentifierPartSlow(c);
142  DCHECK_EQ(IsIdentifierPartSlow(c),
143            static_cast<bool>(kOneByteCharFlags[c] & kIsIdentifierPart));
144  return kOneByteCharFlags[c] & kIsIdentifierPart;
145}
146
147bool IsWhiteSpace(base::uc32 c) {
148  if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceSlow(c);
149  DCHECK_EQ(IsWhiteSpaceSlow(c),
150            static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpace));
151  return kOneByteCharFlags[c] & kIsWhiteSpace;
152}
153
154bool IsWhiteSpaceOrLineTerminator(base::uc32 c) {
155  if (!base::IsInRange(c, 0, 255)) return IsWhiteSpaceOrLineTerminatorSlow(c);
156  DCHECK_EQ(
157      IsWhiteSpaceOrLineTerminatorSlow(c),
158      static_cast<bool>(kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator));
159  return kOneByteCharFlags[c] & kIsWhiteSpaceOrLineTerminator;
160}
161
162bool IsLineTerminatorSequence(base::uc32 c, base::uc32 next) {
163  if (kOneByteCharFlags[static_cast<uint8_t>(c)] & kMaybeLineEnd) {
164    if (c == '\n') return true;
165    if (c == '\r') return next != '\n';
166    return base::IsInRange(static_cast<unsigned int>(c), 0x2028u, 0x2029u);
167  }
168  return false;
169}
170
171}  // namespace internal
172
173}  // namespace v8
174
175#endif  // V8_STRINGS_CHAR_PREDICATES_INL_H_
176