1/**
2 * Copyright (c) 2021-2024 Huawei Device Co., Ltd.
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 * See the License for the specific language governing permissions and
13 * limitations under the License.
14 */
15
16#include "regexp.h"
17
18#include "lexer/token/letters.h"
19#include "unicode/uchar.h"
20
21#include <iostream>
22
23namespace ark::es2panda::lexer {
24RegExp::RegExp(util::StringView p, util::StringView f, RegExpFlags reFlags) : patternStr(p), flagsStr(f), flags(reFlags)
25{
26}
27
28RegExpParser::RegExpParser(const RegExp &re, ArenaAllocator *allocator, const parser::ParserImpl &parser)
29    : re_(re), allocator_ {allocator}, iter_(re_.patternStr), parser_(parser)
30{
31}
32
33bool RegExpParser::Unicode() const
34{
35    return (re_.flags & RegExpFlags::UNICODE) != 0;
36}
37
38char32_t RegExpParser::Peek() const
39{
40    return iter_.Peek();
41}
42
43char32_t RegExpParser::Next()
44{
45    return iter_.Next();
46}
47
48static bool IsDecimalDigit(char32_t cp)
49{
50    return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_9);
51}
52
53static bool IsOctalDigit(char32_t cp)
54{
55    return (cp >= LEX_CHAR_0 && cp <= LEX_CHAR_7);
56}
57
58static bool IsHexDigit(char32_t cp)
59{
60    return IsDecimalDigit(cp) || (cp >= LEX_CHAR_LOWERCASE_A && cp <= LEX_CHAR_LOWERCASE_F) ||
61           (cp >= LEX_CHAR_UPPERCASE_A && cp <= LEX_CHAR_UPPERCASE_F);
62}
63
64static uint32_t DigitValue(char32_t cp)
65{
66    return (cp - LEX_CHAR_0);
67}
68
69static uint32_t HexValue(char32_t cp)
70{
71    if (IsDecimalDigit(cp)) {
72        return DigitValue(cp);
73    }
74
75    constexpr auto OFFSET = 10;
76
77    if (cp < LEX_CHAR_LOWERCASE_A) {
78        return cp - LEX_CHAR_UPPERCASE_A + OFFSET;
79    }
80
81    return (cp - LEX_CHAR_LOWERCASE_A + OFFSET);
82}
83
84void RegExpParser::ParsePattern()
85{
86    ParseDisjunction();
87
88    if (iter_.HasNext()) {
89        parser_.ThrowSyntaxError("Invalid closing parenthesis");
90    }
91
92    if (!backReferences_.empty() && !groupNames_.empty()) {
93        for (const auto it : backReferences_) {
94            auto result = groupNames_.find(it);
95            if (result == groupNames_.end()) {
96                parser_.ThrowSyntaxError("Invalid capturing group");
97            }
98        }
99    }
100}
101
102void RegExpParser::ParseDisjunction()
103{
104    while (true) {
105        ParseAlternatives();
106
107        if (Peek() != LEX_CHAR_VLINE) {
108            break;
109        }
110
111        Next();
112    };
113}
114
115void RegExpParser::ParseAlternative()
116{
117    switch (Peek()) {
118        case LEX_CHAR_BACKSLASH: {
119            Next();
120            char32_t cp = Peek();
121            if (cp == LEX_CHAR_LOWERCASE_B || cp == LEX_CHAR_UPPERCASE_B) {
122                /* assertion */
123                Next();
124                return;
125            }
126
127            ParseAtomEscape();
128            break;
129        }
130        case LEX_CHAR_CIRCUMFLEX:
131        case LEX_CHAR_DOLLAR_SIGN: {
132            /* assertion */
133            Next();
134            return;
135        }
136        case LEX_CHAR_LEFT_PAREN: {
137            if (ParseAlternativeCharLeftParen()) {
138                return;
139            }
140            break;
141        }
142        case LEX_CHAR_LEFT_SQUARE: {
143            Next();
144            ParseCharacterClass();
145            break;
146        }
147        case LEX_CHAR_DOT: {
148            Next();
149            break;
150        }
151        default: {
152            if (ParseBracedQuantifier()) {
153                parser_.ThrowSyntaxError("Invalid quantifier, nothing to repeat");
154            }
155
156            if (!ParsePatternCharacter()) {
157                parser_.ThrowSyntaxError("Invalid character");
158            }
159
160            break;
161        }
162    }
163
164    ParseQuantifier();
165}
166
167bool RegExpParser::ParseAlternativeCharLeftParen()
168{
169    Next();
170
171    if (Peek() != LEX_CHAR_QUESTION) {
172        ParseCapturingGroup();
173        return false;
174    }
175
176    Next();  // eat '?'
177
178    char32_t cp = Next();
179    if (cp == LEX_CHAR_COLON) {
180        ParseNonCapturingGroup();
181        return false;
182    }
183
184    if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
185        ParseAssertion();
186
187        return Unicode();
188    }
189
190    if (cp != LEX_CHAR_LESS_THAN) {
191        parser_.ThrowSyntaxError("Invalid group");
192    }
193
194    cp = Peek();
195    if (cp == LEX_CHAR_EQUALS || cp == LEX_CHAR_EXCLAMATION) {
196        Next();
197        ParseAssertion();
198        return true;
199    }
200
201    ParseNamedCapturingGroup();
202    return false;
203}
204
205void RegExpParser::ParseAlternatives()
206{
207    while (true) {
208        switch (Peek()) {
209            case util::StringView::Iterator::INVALID_CP:
210            case LEX_CHAR_RIGHT_PAREN:
211            case LEX_CHAR_VLINE: {
212                return;
213            }
214            default: {
215                ParseAlternative();
216            }
217        }
218    }
219}
220
221void RegExpParser::ParseNonCapturingGroup()
222{
223    ParseDisjunction();
224
225    if (Peek() != LEX_CHAR_RIGHT_PAREN) {
226        parser_.ThrowSyntaxError("Invalid non-capturing group");
227    }
228
229    Next();
230}
231
232void RegExpParser::ParseNamedCapturingGroup()
233{
234    util::StringView name = ParseIdent();
235
236    auto result = groupNames_.insert(name);
237    if (!result.second) {
238        parser_.ThrowSyntaxError("Duplicate group name");
239    }
240
241    ParseCapturingGroup();
242}
243
244void RegExpParser::ParseCapturingGroup()
245{
246    capturingGroupCount_++;
247
248    ParseDisjunction();
249
250    if (Peek() != LEX_CHAR_RIGHT_PAREN) {
251        parser_.ThrowSyntaxError("Invalid capturing group");
252    }
253
254    Next();
255}
256
257void RegExpParser::ParseAssertion()
258{
259    ParseDisjunction();
260
261    if (Peek() != LEX_CHAR_RIGHT_PAREN) {
262        parser_.ThrowSyntaxError("Invalid assertion");
263    }
264
265    Next();
266}
267
268uint32_t RegExpParser::ParseControlEscape()
269{
270    char32_t cp = Peek();
271    if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
272        (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
273        if (Unicode()) {
274            parser_.ThrowSyntaxError("Invalid control escape");
275        }
276
277        if (cp < LEX_CHAR_0 || cp > LEX_CHAR_9) {
278            return LEX_CHAR_LOWERCASE_C;
279        }
280    }
281
282    Next();
283    constexpr auto MODULO = 32;
284    return cp % MODULO;
285}
286
287char32_t RegExpParser::ParseClassAtom()
288{
289    char32_t cp = Next();
290    if (cp != LEX_CHAR_BACKSLASH) {
291        return cp;
292    }
293
294    cp = Peek();
295    if (cp == LEX_CHAR_0) {
296        if (!Unicode()) {
297            return ParseDecimalEscape();
298        }
299
300        Next();
301
302        if (IsDecimalDigit(Peek())) {
303            parser_.ThrowSyntaxError("Invalid class escape");
304        }
305
306        return LEX_CHAR_NULL;
307    }
308
309    Next();
310
311    switch (cp) {
312        case LEX_CHAR_LOWERCASE_C: {
313            return ParseControlEscape();
314        }
315        case LEX_CHAR_LOWERCASE_X: {
316            return ParseHexEscape();
317        }
318        case LEX_CHAR_LOWERCASE_U: {
319            if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
320                return cp;
321            }
322
323            return ParseUnicodeEscape();
324        }
325        case LEX_CHAR_LOWERCASE_P:
326        case LEX_CHAR_UPPERCASE_P: {
327            if (!Unicode()) {
328                return cp;
329            }
330
331            ParseUnicodePropertyEscape();
332            [[fallthrough]];
333        }
334        case LEX_CHAR_LOWERCASE_D:
335        case LEX_CHAR_UPPERCASE_D:
336        case LEX_CHAR_LOWERCASE_S:
337        case LEX_CHAR_UPPERCASE_S:
338        case LEX_CHAR_LOWERCASE_W:
339        case LEX_CHAR_UPPERCASE_W: {
340            return std::numeric_limits<uint32_t>::max();
341        }
342        case LEX_CHAR_LOWERCASE_B: {
343            return LEX_CHAR_BS;
344        }
345        case LEX_CHAR_LOWERCASE_F: {
346            return LEX_CHAR_FF;
347        }
348        case LEX_CHAR_LOWERCASE_N: {
349            return LEX_CHAR_LF;
350        }
351        case LEX_CHAR_LOWERCASE_R: {
352            return LEX_CHAR_CR;
353        }
354        case LEX_CHAR_LOWERCASE_T: {
355            return LEX_CHAR_TAB;
356        }
357        case LEX_CHAR_LOWERCASE_V: {
358            return LEX_CHAR_VT;
359        }
360        case LEX_CHAR_MINUS: {
361            return cp;
362        }
363        default: {
364            if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
365                parser_.ThrowSyntaxError("Invalid escape");
366            }
367
368            return cp;
369        }
370    }
371
372    return cp;
373}
374
375static bool IsClassEscape(uint32_t cp)
376{
377    return cp == std::numeric_limits<uint32_t>::max();
378}
379
380void RegExpParser::ParseCharacterClass()
381{
382    if (Peek() == LEX_CHAR_CIRCUMFLEX) {
383        Next();
384    }
385
386    while (true) {
387        if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
388            Next();
389            break;
390        }
391
392        uint32_t left = ParseClassAtom();
393
394        if (Peek() != LEX_CHAR_MINUS) {
395            continue;
396        }
397
398        Next();
399
400        if (Peek() == LEX_CHAR_RIGHT_SQUARE) {
401            Next();
402            break;
403        }
404
405        uint32_t right = ParseClassAtom();
406        if ((IsClassEscape(left) || IsClassEscape(right))) {
407            if (Unicode()) {
408                parser_.ThrowSyntaxError("Invalid character class");
409            }
410
411            continue;
412        }
413
414        if (left > right) {
415            parser_.ThrowSyntaxError("Class range out of order");
416        }
417    }
418}
419
420bool RegExpParser::IsSyntaxCharacter(char32_t cp) const
421{
422    switch (cp) {
423        case LEX_CHAR_RIGHT_SQUARE:
424        case LEX_CHAR_LEFT_BRACE:
425        case LEX_CHAR_RIGHT_BRACE: {
426            if (!Unicode()) {
427                return false;
428            }
429
430            [[fallthrough]];
431        }
432        case LEX_CHAR_CIRCUMFLEX:
433        case LEX_CHAR_DOLLAR_SIGN:
434        case LEX_CHAR_BACKSLASH:
435        case LEX_CHAR_DOT:
436        case LEX_CHAR_ASTERISK:
437        case LEX_CHAR_PLUS:
438        case LEX_CHAR_QUESTION:
439        case LEX_CHAR_LEFT_PAREN:
440        case LEX_CHAR_RIGHT_PAREN:
441        case LEX_CHAR_LEFT_SQUARE:
442        case LEX_CHAR_VLINE: {
443            return true;
444        }
445        default: {
446            return false;
447        }
448    }
449}
450
451void RegExpParser::ParseAtomEscape()
452{
453    char32_t cp = Peek();
454    if (IsDecimalDigit(cp)) {
455        ParseDecimalEscape();
456        return;
457    }
458
459    Next();
460
461    ParseAtomEscapeSwitch(cp);
462}
463
464void RegExpParser::ParseAtomEscapeSwitch(char32_t cp)
465{
466    switch (cp) {
467        case LEX_CHAR_LOWERCASE_X: {
468            ParseHexEscape();
469            break;
470        }
471        case LEX_CHAR_LOWERCASE_U: {
472            ParseUnicodeEscape();
473            break;
474        }
475        case LEX_CHAR_LOWERCASE_K: {
476            ParseNamedBackreference();
477            return;
478        }
479        /* ControlEscape */
480        case LEX_CHAR_LOWERCASE_F:
481        case LEX_CHAR_LOWERCASE_N:
482        case LEX_CHAR_LOWERCASE_R:
483        case LEX_CHAR_LOWERCASE_T:
484        case LEX_CHAR_LOWERCASE_V:
485        /* CharacterClassEscape */
486        case LEX_CHAR_LOWERCASE_D:
487        case LEX_CHAR_UPPERCASE_D:
488        case LEX_CHAR_LOWERCASE_S:
489        case LEX_CHAR_UPPERCASE_S:
490        case LEX_CHAR_LOWERCASE_W:
491        case LEX_CHAR_UPPERCASE_W: {
492            return;
493        }
494        case LEX_CHAR_LOWERCASE_P:
495        case LEX_CHAR_UPPERCASE_P: {
496            ParseUnicodePropertyEscape();
497            return;
498        }
499        case LEX_CHAR_LOWERCASE_C: {
500            cp = Peek();
501            if ((cp < LEX_CHAR_LOWERCASE_A || cp > LEX_CHAR_LOWERCASE_Z) &&
502                (cp < LEX_CHAR_UPPERCASE_A || cp > LEX_CHAR_UPPERCASE_Z)) {
503                parser_.ThrowSyntaxError("Invalid control escape");
504            }
505
506            Next();
507            return;
508        }
509        default: {
510            /* IdentityEscape */
511            if (Unicode() && !IsSyntaxCharacter(cp) && cp != LEX_CHAR_SLASH) {
512                parser_.ThrowSyntaxError("Invalid escape");
513            }
514        }
515    }
516}
517
518uint32_t RegExpParser::ParseDecimalEscape()
519{
520    ASSERT(IsDecimalDigit(Peek()));
521
522    auto digitStart = iter_;
523    uint32_t decimalValue = DigitValue(Next());
524    if (decimalValue == 0) {
525        if (!IsDecimalDigit(Peek())) {
526            /* \0 */
527            return decimalValue;
528        }
529
530        if (Unicode()) {
531            parser_.ThrowSyntaxError("Invalid decimal escape");
532        }
533
534        iter_ = digitStart;
535        return ParseLegacyOctalEscape();
536    }
537
538    constexpr auto MULTIPLIER = 10;
539
540    while (IsDecimalDigit(Peek())) {
541        uint32_t newValue = decimalValue * MULTIPLIER + DigitValue(Next());
542        if (newValue < decimalValue) {
543            parser_.ThrowSyntaxError("Invalid decimal escape");
544        }
545
546        decimalValue = newValue;
547    }
548
549    if (decimalValue <= capturingGroupCount_) {
550        return decimalValue;
551    }
552
553    if (Unicode()) {
554        parser_.ThrowSyntaxError("Invalid decimal escape");
555    }
556
557    iter_ = digitStart;
558
559    if (!IsOctalDigit(Peek())) {
560        /* \8 or \9 */
561        return DigitValue(Next());
562    }
563
564    return ParseLegacyOctalEscape();
565}
566
567uint32_t RegExpParser::ParseLegacyOctalEscape()
568{
569    ASSERT(IsOctalDigit(Peek()));
570    uint32_t octalValue = DigitValue(Next());
571
572    if (!IsOctalDigit(Peek())) {
573        return octalValue;
574    }
575
576    octalValue = octalValue * 8U + DigitValue(Next());
577
578    if (!IsOctalDigit(Peek())) {
579        return octalValue;
580    }
581
582    uint32_t newValue = octalValue * 8 + DigitValue(Peek());
583    constexpr uint32_t MAX_OCTAL_VALUE = 0xFF;
584
585    if (newValue <= MAX_OCTAL_VALUE) {
586        octalValue = newValue;
587        Next();
588    }
589
590    return octalValue;
591}
592
593uint32_t RegExpParser::ParseHexEscape()
594{
595    char32_t digit = Next();
596    if (!IsHexDigit(digit)) {
597        parser_.ThrowSyntaxError("Invalid hex escape");
598    }
599
600    constexpr auto MULTIPLIER = 16;
601    uint32_t cpValue = HexValue(digit) * MULTIPLIER;
602
603    digit = Next();
604    if (!IsHexDigit(digit)) {
605        parser_.ThrowSyntaxError("Invalid hex escape");
606    }
607
608    cpValue += HexValue(digit);
609    return cpValue;
610}
611
612uint32_t RegExpParser::ParseUnicodeDigits()
613{
614    uint32_t value = 0;
615    uint32_t count = 4;
616
617    while ((count--) != 0U) {
618        char32_t digit = Next();
619        if (!IsHexDigit(digit)) {
620            parser_.ThrowSyntaxError("Invalid Unicode escape");
621        }
622
623        constexpr auto MULTIPLIER = 16;
624        value = value * MULTIPLIER + HexValue(digit);
625    }
626
627    return value;
628}
629
630uint32_t RegExpParser::ParseUnicodeEscape()
631{
632    uint32_t value = 0;
633
634    if (Peek() == LEX_CHAR_LEFT_BRACE) {
635        Next();
636        if (!IsHexDigit(Peek())) {
637            parser_.ThrowSyntaxError("Invalid Unicode escape");
638        }
639
640        while (IsHexDigit(Peek())) {
641            constexpr auto MULTIPLIER = 16;
642            value = value * MULTIPLIER + HexValue(Next());
643            constexpr uint32_t CODE_POINT_MAX = 0x10FFFF;
644
645            if (value > CODE_POINT_MAX) {
646                parser_.ThrowSyntaxError("Invalid Unicode escape");
647            }
648        }
649
650        if (Peek() != LEX_CHAR_RIGHT_BRACE) {
651            parser_.ThrowSyntaxError("Invalid Unicode escape");
652        }
653
654        Next();
655    } else {
656        value = ParseUnicodeDigits();
657        if (!util::StringView::IsHighSurrogate(value)) {
658            return value;
659        }
660
661        auto pos = iter_;
662        if (Next() == LEX_CHAR_BACKSLASH && Next() == LEX_CHAR_LOWERCASE_U) {
663            uint32_t next = ParseUnicodeDigits();
664            if (util::StringView::IsLowSurrogate(next)) {
665                return util::StringView::DecodeSurrogates(value, next);
666            }
667        }
668        iter_ = pos;
669    }
670
671    return value;
672}
673
674void RegExpParser::ParseUnicodePropertyEscape()
675{
676    if (!Unicode()) {
677        return;
678    }
679
680    if (Peek() != LEX_CHAR_LEFT_BRACE) {
681        parser_.ThrowSyntaxError("Invalid Unicode property escape");
682    }
683
684    Next();
685
686    while (true) {
687        if (!iter_.HasNext()) {
688            parser_.ThrowSyntaxError("Unterminated Unicode property escape");
689        }
690
691        char32_t ch = Next();
692        if (ch == LEX_CHAR_LEFT_BRACE) {
693            break;
694        }
695
696        /* NOTE: Parse and validate Unicode property names */
697    }
698}
699
700void RegExpParser::ParseNamedBackreference()
701{
702    if (Next() != LEX_CHAR_LESS_THAN) {
703        if (!Unicode() && groupNames_.empty()) {
704            return;
705        }
706
707        parser_.ThrowSyntaxError("Invalid named backreference");
708    }
709
710    if (IsDecimalDigit(Peek())) {
711        return;
712    }
713
714    util::StringView name = ParseIdent();
715    backReferences_.insert(name);
716
717    ValidateNamedBackreference(Unicode());
718}
719
720void RegExpParser::ValidateNamedBackreference(bool isUnicode)
721{
722    if (Peek() != LEX_CHAR_LEFT_PAREN || Peek() != LEX_CHAR_BACKSLASH || Peek() != UNICODE_INVALID_CP) {
723        if (!isUnicode) {
724            /* Identity escape */
725            return;
726        }
727
728        if (groupNames_.empty()) {
729            parser_.ThrowSyntaxError("Invalid named backreference");
730        }
731    }
732}
733
734void RegExpParser::ValidateGroupNameElement(char32_t cp)
735{
736    if (IsDecimalDigit(cp) && !backReferences_.empty()) {
737        parser_.ThrowSyntaxError("Invalid group name");
738    }
739    if (cp == UNICODE_INVALID_CP && !groupNames_.empty()) {
740        parser_.ThrowSyntaxError("Invalid group name");
741    }
742}
743
744void RegExpParser::ParseQuantifier()
745{
746    switch (Peek()) {
747        case LEX_CHAR_ASTERISK:
748        case LEX_CHAR_PLUS:
749        case LEX_CHAR_QUESTION: {
750            Next();
751            break;
752        }
753        case LEX_CHAR_LEFT_BRACE: {
754            if (!ParseBracedQuantifier()) {
755                return;
756            }
757
758            break;
759        }
760        default: {
761            return;
762        }
763    }
764
765    if (Peek() == LEX_CHAR_QUESTION) {
766        Next();
767    }
768}
769
770bool RegExpParser::ParseBracedQuantifier()
771{
772    if (Peek() != LEX_CHAR_LEFT_BRACE) {
773        return false;
774    }
775
776    auto startPos = iter_;
777    Next();
778
779    if (!IsDecimalDigit(Peek())) {
780        iter_ = startPos;
781        return false;
782    }
783
784    uint32_t leftValue = 0;
785    constexpr auto MULTIPLIER = 10;
786
787    while (IsDecimalDigit(Peek())) {
788        uint32_t newValue = leftValue * MULTIPLIER + DigitValue(Next());
789        if (newValue < leftValue) {
790            leftValue = std::numeric_limits<uint32_t>::max();
791            continue;
792        }
793
794        leftValue = newValue;
795    }
796
797    if (Peek() == LEX_CHAR_COMMA) {
798        Next();
799    }
800
801    if (Peek() == LEX_CHAR_RIGHT_BRACE) {
802        Next();
803        return true;
804    }
805
806    if (IsDecimalDigit(Peek())) {
807        uint32_t rightValue = 0;
808        while (IsDecimalDigit(Peek())) {
809            uint32_t newValue = rightValue * MULTIPLIER + DigitValue(Next());
810            if (newValue < rightValue) {
811                rightValue = std::numeric_limits<uint32_t>::max();
812                continue;
813            }
814
815            rightValue = newValue;
816        }
817
818        if (Peek() == LEX_CHAR_RIGHT_BRACE) {
819            if (rightValue < leftValue) {
820                parser_.ThrowSyntaxError("Quantifier range out of order");
821            }
822
823            Next();
824            return true;
825        }
826    }
827
828    iter_ = startPos;
829    return false;
830}
831
832bool RegExpParser::ParsePatternCharacter()
833{
834    char32_t cp = Peek();
835    if (IsSyntaxCharacter(cp)) {
836        return false;
837    }
838
839    Next();
840    return true;
841}
842
843static bool IsIdStart(uint32_t cp)
844{
845    auto uchar = static_cast<UChar>(cp);
846    return u_isIDStart(uchar) || uchar == LEX_CHAR_DOLLAR_SIGN || uchar == LEX_CHAR_UNDERSCORE ||
847           uchar == LEX_CHAR_BACKSLASH;
848}
849
850static bool IsIdCont(uint32_t cp)
851{
852    auto uchar = static_cast<UChar>(cp);
853    return u_isIDPart(uchar) || uchar == LEX_CHAR_DOLLAR_SIGN || uchar == LEX_CHAR_UNDERSCORE ||
854           uchar == LEX_CHAR_BACKSLASH || uchar == LEX_CHAR_ZWNJ || uchar == LEX_CHAR_ZWJ;
855}
856
857util::StringView RegExpParser::ParseIdent()
858{
859    char32_t cp = Next();
860    if (cp == LEX_CHAR_BACKSLASH) {
861        if (Next() != LEX_CHAR_LOWERCASE_U) {
862            parser_.ThrowSyntaxError("Invalid group name");
863        }
864
865        if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
866            parser_.ThrowSyntaxError("Invalid Unicode escape");
867        }
868
869        cp = ParseUnicodeEscape();
870    }
871
872    if (!IsIdStart(cp) && cp != UNICODE_INVALID_CP && backReferences_.empty()) {
873        parser_.ThrowSyntaxError("Invalid group name");
874    }
875
876    util::UString ident(allocator_);
877    ident.Append(cp);
878
879    while (true) {
880        cp = Next();
881        if (cp == LEX_CHAR_GREATER_THAN) {
882            break;
883        }
884
885        if (cp == LEX_CHAR_BACKSLASH) {
886            if (Next() != LEX_CHAR_LOWERCASE_U) {
887                parser_.ThrowSyntaxError("Invalid group name");
888            }
889
890            if (!Unicode() && Peek() == LEX_CHAR_LEFT_BRACE) {
891                parser_.ThrowSyntaxError("Invalid Unicode escape");
892            }
893
894            cp = ParseUnicodeEscape();
895        }
896
897        ValidateGroupNameElement(cp);
898
899        if (cp == UNICODE_INVALID_CP) {
900            break;
901        }
902
903        if (!IsIdCont(cp)) {
904            parser_.ThrowSyntaxError("Invalid group name");
905        }
906
907        ident.Append(cp);
908    }
909
910    return ident.View();
911}
912}  // namespace ark::es2panda::lexer
913