1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci * Copyright 2017 Google Inc.
3cb93a386Sopenharmony_ci *
4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci * found in the LICENSE file.
6cb93a386Sopenharmony_ci */
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include "src/sksl/lex/RegexParser.h"
9cb93a386Sopenharmony_ci
10cb93a386Sopenharmony_ci#include "src/sksl/lex/LexUtil.h"
11cb93a386Sopenharmony_ci
12cb93a386Sopenharmony_ciRegexNode RegexParser::parse(std::string source) {
13cb93a386Sopenharmony_ci    fSource = source;
14cb93a386Sopenharmony_ci    fIndex = 0;
15cb93a386Sopenharmony_ci    SkASSERT(fStack.size() == 0);
16cb93a386Sopenharmony_ci    this->regex();
17cb93a386Sopenharmony_ci    SkASSERT(fStack.size() == 1);
18cb93a386Sopenharmony_ci    SkASSERT(fIndex == source.size());
19cb93a386Sopenharmony_ci    return this->pop();
20cb93a386Sopenharmony_ci}
21cb93a386Sopenharmony_ci
22cb93a386Sopenharmony_cichar RegexParser::peek() {
23cb93a386Sopenharmony_ci    if (fIndex >= fSource.size()) {
24cb93a386Sopenharmony_ci        return END;
25cb93a386Sopenharmony_ci    }
26cb93a386Sopenharmony_ci    return fSource[fIndex];
27cb93a386Sopenharmony_ci}
28cb93a386Sopenharmony_ci
29cb93a386Sopenharmony_civoid RegexParser::expect(char c) {
30cb93a386Sopenharmony_ci    if (this->peek() != c) {
31cb93a386Sopenharmony_ci        printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
32cb93a386Sopenharmony_ci        exit(1);
33cb93a386Sopenharmony_ci    }
34cb93a386Sopenharmony_ci    ++fIndex;
35cb93a386Sopenharmony_ci}
36cb93a386Sopenharmony_ci
37cb93a386Sopenharmony_ciRegexNode RegexParser::pop() {
38cb93a386Sopenharmony_ci    RegexNode result = fStack.top();
39cb93a386Sopenharmony_ci    fStack.pop();
40cb93a386Sopenharmony_ci    return result;
41cb93a386Sopenharmony_ci}
42cb93a386Sopenharmony_ci
43cb93a386Sopenharmony_civoid RegexParser::term() {
44cb93a386Sopenharmony_ci    switch (this->peek()) {
45cb93a386Sopenharmony_ci        case '(': this->group();  break;
46cb93a386Sopenharmony_ci        case '[': this->set();    break;
47cb93a386Sopenharmony_ci        case '.': this->dot();    break;
48cb93a386Sopenharmony_ci        default: this->literal(); break;
49cb93a386Sopenharmony_ci    }
50cb93a386Sopenharmony_ci}
51cb93a386Sopenharmony_ci
52cb93a386Sopenharmony_civoid RegexParser::quantifiedTerm() {
53cb93a386Sopenharmony_ci    this->term();
54cb93a386Sopenharmony_ci    switch (this->peek()) {
55cb93a386Sopenharmony_ci        case '*': fStack.push(RegexNode(RegexNode::kStar_Kind,     this->pop())); ++fIndex; break;
56cb93a386Sopenharmony_ci        case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind,     this->pop())); ++fIndex; break;
57cb93a386Sopenharmony_ci        case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
58cb93a386Sopenharmony_ci        default:  break;
59cb93a386Sopenharmony_ci    }
60cb93a386Sopenharmony_ci}
61cb93a386Sopenharmony_ci
62cb93a386Sopenharmony_civoid RegexParser::sequence() {
63cb93a386Sopenharmony_ci    this->quantifiedTerm();
64cb93a386Sopenharmony_ci    for (;;) {
65cb93a386Sopenharmony_ci        switch (this->peek()) {
66cb93a386Sopenharmony_ci            case END: [[fallthrough]];
67cb93a386Sopenharmony_ci            case '|': [[fallthrough]];
68cb93a386Sopenharmony_ci            case ')': return;
69cb93a386Sopenharmony_ci            default:
70cb93a386Sopenharmony_ci                this->sequence();
71cb93a386Sopenharmony_ci                RegexNode right = this->pop();
72cb93a386Sopenharmony_ci                RegexNode left = this->pop();
73cb93a386Sopenharmony_ci                fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
74cb93a386Sopenharmony_ci                break;
75cb93a386Sopenharmony_ci        }
76cb93a386Sopenharmony_ci    }
77cb93a386Sopenharmony_ci}
78cb93a386Sopenharmony_ci
79cb93a386Sopenharmony_ciRegexNode RegexParser::escapeSequence(char c) {
80cb93a386Sopenharmony_ci    switch (c) {
81cb93a386Sopenharmony_ci        case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
82cb93a386Sopenharmony_ci        case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
83cb93a386Sopenharmony_ci        case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
84cb93a386Sopenharmony_ci        case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
85cb93a386Sopenharmony_ci        default:  return RegexNode(RegexNode::kChar_Kind, c);
86cb93a386Sopenharmony_ci    }
87cb93a386Sopenharmony_ci}
88cb93a386Sopenharmony_ci
89cb93a386Sopenharmony_civoid RegexParser::literal() {
90cb93a386Sopenharmony_ci    char c = this->peek();
91cb93a386Sopenharmony_ci    if (c == '\\') {
92cb93a386Sopenharmony_ci        ++fIndex;
93cb93a386Sopenharmony_ci        fStack.push(this->escapeSequence(peek()));
94cb93a386Sopenharmony_ci        ++fIndex;
95cb93a386Sopenharmony_ci    }
96cb93a386Sopenharmony_ci    else {
97cb93a386Sopenharmony_ci        fStack.push(RegexNode(RegexNode::kChar_Kind, c));
98cb93a386Sopenharmony_ci        ++fIndex;
99cb93a386Sopenharmony_ci    }
100cb93a386Sopenharmony_ci}
101cb93a386Sopenharmony_ci
102cb93a386Sopenharmony_civoid RegexParser::dot() {
103cb93a386Sopenharmony_ci    this->expect('.');
104cb93a386Sopenharmony_ci    fStack.push(RegexNode(RegexNode::kDot_Kind));
105cb93a386Sopenharmony_ci}
106cb93a386Sopenharmony_ci
107cb93a386Sopenharmony_civoid RegexParser::group() {
108cb93a386Sopenharmony_ci    this->expect('(');
109cb93a386Sopenharmony_ci    this->regex();
110cb93a386Sopenharmony_ci    this->expect(')');
111cb93a386Sopenharmony_ci}
112cb93a386Sopenharmony_ci
113cb93a386Sopenharmony_civoid RegexParser::setItem() {
114cb93a386Sopenharmony_ci    this->literal();
115cb93a386Sopenharmony_ci    if (this->peek() == '-') {
116cb93a386Sopenharmony_ci        ++fIndex;
117cb93a386Sopenharmony_ci        if (peek() == ']') {
118cb93a386Sopenharmony_ci            fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
119cb93a386Sopenharmony_ci        }
120cb93a386Sopenharmony_ci        else {
121cb93a386Sopenharmony_ci            literal();
122cb93a386Sopenharmony_ci            RegexNode end = this->pop();
123cb93a386Sopenharmony_ci            SkASSERT(end.fKind == RegexNode::kChar_Kind);
124cb93a386Sopenharmony_ci            RegexNode start = this->pop();
125cb93a386Sopenharmony_ci            SkASSERT(start.fKind == RegexNode::kChar_Kind);
126cb93a386Sopenharmony_ci            fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
127cb93a386Sopenharmony_ci        }
128cb93a386Sopenharmony_ci    }
129cb93a386Sopenharmony_ci}
130cb93a386Sopenharmony_ci
131cb93a386Sopenharmony_civoid RegexParser::set() {
132cb93a386Sopenharmony_ci    expect('[');
133cb93a386Sopenharmony_ci    size_t depth = fStack.size();
134cb93a386Sopenharmony_ci    RegexNode set(RegexNode::kCharset_Kind);
135cb93a386Sopenharmony_ci    if (this->peek() == '^') {
136cb93a386Sopenharmony_ci        ++fIndex;
137cb93a386Sopenharmony_ci        set.fPayload.fBool = true;
138cb93a386Sopenharmony_ci    }
139cb93a386Sopenharmony_ci    else {
140cb93a386Sopenharmony_ci        set.fPayload.fBool = false;
141cb93a386Sopenharmony_ci    }
142cb93a386Sopenharmony_ci    for (;;) {
143cb93a386Sopenharmony_ci        switch (this->peek()) {
144cb93a386Sopenharmony_ci            case ']':
145cb93a386Sopenharmony_ci                ++fIndex;
146cb93a386Sopenharmony_ci                while (fStack.size() > depth) {
147cb93a386Sopenharmony_ci                    set.fChildren.push_back(this->pop());
148cb93a386Sopenharmony_ci                }
149cb93a386Sopenharmony_ci                fStack.push(std::move(set));
150cb93a386Sopenharmony_ci                return;
151cb93a386Sopenharmony_ci            case END:
152cb93a386Sopenharmony_ci                printf("unterminated character set\n");
153cb93a386Sopenharmony_ci                exit(1);
154cb93a386Sopenharmony_ci            default:
155cb93a386Sopenharmony_ci                this->setItem();
156cb93a386Sopenharmony_ci                break;
157cb93a386Sopenharmony_ci        }
158cb93a386Sopenharmony_ci    }
159cb93a386Sopenharmony_ci}
160cb93a386Sopenharmony_ci
161cb93a386Sopenharmony_civoid RegexParser::regex() {
162cb93a386Sopenharmony_ci    this->sequence();
163cb93a386Sopenharmony_ci    switch (this->peek()) {
164cb93a386Sopenharmony_ci        case '|': {
165cb93a386Sopenharmony_ci            ++fIndex;
166cb93a386Sopenharmony_ci            this->regex();
167cb93a386Sopenharmony_ci            RegexNode right = this->pop();
168cb93a386Sopenharmony_ci            RegexNode left = this->pop();
169cb93a386Sopenharmony_ci            fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
170cb93a386Sopenharmony_ci            break;
171cb93a386Sopenharmony_ci        }
172cb93a386Sopenharmony_ci        case END: // fall through
173cb93a386Sopenharmony_ci        case ')':
174cb93a386Sopenharmony_ci            return;
175cb93a386Sopenharmony_ci        default:
176cb93a386Sopenharmony_ci            SkASSERT(false);
177cb93a386Sopenharmony_ci    }
178cb93a386Sopenharmony_ci}
179