1 // Copyright 2011 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #ifndef V8_STRINGS_CHAR_PREDICATES_H_
6 #define V8_STRINGS_CHAR_PREDICATES_H_
7 
8 #include "src/base/strings.h"
9 #include "src/common/globals.h"
10 #include "src/strings/unicode.h"
11 
12 namespace v8 {
13 namespace internal {
14 
15 // Unicode character predicates as defined by ECMA-262, 3rd,
16 // used for lexical analysis.
17 
18 inline constexpr int AsciiAlphaToLower(base::uc32 c);
19 inline constexpr bool IsCarriageReturn(base::uc32 c);
20 inline constexpr bool IsLineFeed(base::uc32 c);
21 inline constexpr bool IsAsciiIdentifier(base::uc32 c);
22 inline constexpr bool IsAlphaNumeric(base::uc32 c);
23 inline constexpr bool IsDecimalDigit(base::uc32 c);
24 inline constexpr bool IsHexDigit(base::uc32 c);
25 inline constexpr bool IsOctalDigit(base::uc32 c);
26 inline constexpr bool IsBinaryDigit(base::uc32 c);
27 inline constexpr bool IsRegExpWord(base::uc32 c);
28 
29 inline constexpr bool IsAsciiLower(base::uc32 ch);
30 inline constexpr bool IsAsciiUpper(base::uc32 ch);
31 
32 inline constexpr base::uc32 ToAsciiUpper(base::uc32 ch);
33 inline constexpr base::uc32 ToAsciiLower(base::uc32 ch);
34 
35 // ES#sec-names-and-keywords
36 // This includes '_', '$' and '\', and ID_Start according to
37 // http://www.unicode.org/reports/tr31/, which consists of categories
38 // 'Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl', but excluding properties
39 // 'Pattern_Syntax' or 'Pattern_White_Space'.
40 inline bool IsIdentifierStart(base::uc32 c);
41 #ifdef V8_INTL_SUPPORT
42 V8_EXPORT_PRIVATE bool IsIdentifierStartSlow(base::uc32 c);
43 #else
IsIdentifierStartSlow(base::uc32 c)44 inline bool IsIdentifierStartSlow(base::uc32 c) {
45   // Non-BMP characters are not supported without I18N.
46   return (c <= 0xFFFF) ? unibrow::ID_Start::Is(c) : false;
47 }
48 #endif
49 
50 // ES#sec-names-and-keywords
51 // This includes \u200c and \u200d, and ID_Continue according to
52 // http://www.unicode.org/reports/tr31/, which consists of ID_Start,
53 // the categories 'Mn', 'Mc', 'Nd', 'Pc', but excluding properties
54 // 'Pattern_Syntax' or 'Pattern_White_Space'.
55 inline bool IsIdentifierPart(base::uc32 c);
56 #ifdef V8_INTL_SUPPORT
57 V8_EXPORT_PRIVATE bool IsIdentifierPartSlow(base::uc32 c);
58 #else
IsIdentifierPartSlow(base::uc32 c)59 inline bool IsIdentifierPartSlow(base::uc32 c) {
60   // Non-BMP charaacters are not supported without I18N.
61   if (c <= 0xFFFF) {
62     return unibrow::ID_Start::Is(c) || unibrow::ID_Continue::Is(c);
63   }
64   return false;
65 }
66 #endif
67 
68 // ES6 draft section 11.2
69 // This includes all code points of Unicode category 'Zs'.
70 // Further included are \u0009, \u000b, \u000c, and \ufeff.
71 inline bool IsWhiteSpace(base::uc32 c);
72 #ifdef V8_INTL_SUPPORT
73 V8_EXPORT_PRIVATE bool IsWhiteSpaceSlow(base::uc32 c);
74 #else
IsWhiteSpaceSlow(base::uc32 c)75 inline bool IsWhiteSpaceSlow(base::uc32 c) {
76   return unibrow::WhiteSpace::Is(c);
77 }
78 #endif
79 
80 // WhiteSpace and LineTerminator according to ES6 draft section 11.2 and 11.3
81 // This includes all the characters with Unicode category 'Z' (= Zs+Zl+Zp)
82 // as well as \u0009 - \u000d and \ufeff.
83 inline bool IsWhiteSpaceOrLineTerminator(base::uc32 c);
IsWhiteSpaceOrLineTerminatorSlow(base::uc32 c)84 inline bool IsWhiteSpaceOrLineTerminatorSlow(base::uc32 c) {
85   return IsWhiteSpaceSlow(c) || unibrow::IsLineTerminator(c);
86 }
87 
88 inline bool IsLineTerminatorSequence(base::uc32 c, base::uc32 next);
89 
90 }  // namespace internal
91 }  // namespace v8
92 
93 #endif  // V8_STRINGS_CHAR_PREDICATES_H_
94