1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2017 Google Inc. 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include "src/sksl/lex/RegexParser.h" 9cb93a386Sopenharmony_ci 10cb93a386Sopenharmony_ci#include "src/sksl/lex/LexUtil.h" 11cb93a386Sopenharmony_ci 12cb93a386Sopenharmony_ciRegexNode RegexParser::parse(std::string source) { 13cb93a386Sopenharmony_ci fSource = source; 14cb93a386Sopenharmony_ci fIndex = 0; 15cb93a386Sopenharmony_ci SkASSERT(fStack.size() == 0); 16cb93a386Sopenharmony_ci this->regex(); 17cb93a386Sopenharmony_ci SkASSERT(fStack.size() == 1); 18cb93a386Sopenharmony_ci SkASSERT(fIndex == source.size()); 19cb93a386Sopenharmony_ci return this->pop(); 20cb93a386Sopenharmony_ci} 21cb93a386Sopenharmony_ci 22cb93a386Sopenharmony_cichar RegexParser::peek() { 23cb93a386Sopenharmony_ci if (fIndex >= fSource.size()) { 24cb93a386Sopenharmony_ci return END; 25cb93a386Sopenharmony_ci } 26cb93a386Sopenharmony_ci return fSource[fIndex]; 27cb93a386Sopenharmony_ci} 28cb93a386Sopenharmony_ci 29cb93a386Sopenharmony_civoid RegexParser::expect(char c) { 30cb93a386Sopenharmony_ci if (this->peek() != c) { 31cb93a386Sopenharmony_ci printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek()); 32cb93a386Sopenharmony_ci exit(1); 33cb93a386Sopenharmony_ci } 34cb93a386Sopenharmony_ci ++fIndex; 35cb93a386Sopenharmony_ci} 36cb93a386Sopenharmony_ci 37cb93a386Sopenharmony_ciRegexNode RegexParser::pop() { 38cb93a386Sopenharmony_ci RegexNode result = fStack.top(); 39cb93a386Sopenharmony_ci fStack.pop(); 40cb93a386Sopenharmony_ci return result; 41cb93a386Sopenharmony_ci} 42cb93a386Sopenharmony_ci 43cb93a386Sopenharmony_civoid RegexParser::term() { 44cb93a386Sopenharmony_ci switch (this->peek()) { 45cb93a386Sopenharmony_ci case '(': this->group(); break; 46cb93a386Sopenharmony_ci case '[': this->set(); break; 47cb93a386Sopenharmony_ci case '.': this->dot(); break; 48cb93a386Sopenharmony_ci default: this->literal(); break; 49cb93a386Sopenharmony_ci } 50cb93a386Sopenharmony_ci} 51cb93a386Sopenharmony_ci 52cb93a386Sopenharmony_civoid RegexParser::quantifiedTerm() { 53cb93a386Sopenharmony_ci this->term(); 54cb93a386Sopenharmony_ci switch (this->peek()) { 55cb93a386Sopenharmony_ci case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break; 56cb93a386Sopenharmony_ci case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break; 57cb93a386Sopenharmony_ci case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break; 58cb93a386Sopenharmony_ci default: break; 59cb93a386Sopenharmony_ci } 60cb93a386Sopenharmony_ci} 61cb93a386Sopenharmony_ci 62cb93a386Sopenharmony_civoid RegexParser::sequence() { 63cb93a386Sopenharmony_ci this->quantifiedTerm(); 64cb93a386Sopenharmony_ci for (;;) { 65cb93a386Sopenharmony_ci switch (this->peek()) { 66cb93a386Sopenharmony_ci case END: [[fallthrough]]; 67cb93a386Sopenharmony_ci case '|': [[fallthrough]]; 68cb93a386Sopenharmony_ci case ')': return; 69cb93a386Sopenharmony_ci default: 70cb93a386Sopenharmony_ci this->sequence(); 71cb93a386Sopenharmony_ci RegexNode right = this->pop(); 72cb93a386Sopenharmony_ci RegexNode left = this->pop(); 73cb93a386Sopenharmony_ci fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right)); 74cb93a386Sopenharmony_ci break; 75cb93a386Sopenharmony_ci } 76cb93a386Sopenharmony_ci } 77cb93a386Sopenharmony_ci} 78cb93a386Sopenharmony_ci 79cb93a386Sopenharmony_ciRegexNode RegexParser::escapeSequence(char c) { 80cb93a386Sopenharmony_ci switch (c) { 81cb93a386Sopenharmony_ci case 'n': return RegexNode(RegexNode::kChar_Kind, '\n'); 82cb93a386Sopenharmony_ci case 'r': return RegexNode(RegexNode::kChar_Kind, '\r'); 83cb93a386Sopenharmony_ci case 't': return RegexNode(RegexNode::kChar_Kind, '\t'); 84cb93a386Sopenharmony_ci case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r"); 85cb93a386Sopenharmony_ci default: return RegexNode(RegexNode::kChar_Kind, c); 86cb93a386Sopenharmony_ci } 87cb93a386Sopenharmony_ci} 88cb93a386Sopenharmony_ci 89cb93a386Sopenharmony_civoid RegexParser::literal() { 90cb93a386Sopenharmony_ci char c = this->peek(); 91cb93a386Sopenharmony_ci if (c == '\\') { 92cb93a386Sopenharmony_ci ++fIndex; 93cb93a386Sopenharmony_ci fStack.push(this->escapeSequence(peek())); 94cb93a386Sopenharmony_ci ++fIndex; 95cb93a386Sopenharmony_ci } 96cb93a386Sopenharmony_ci else { 97cb93a386Sopenharmony_ci fStack.push(RegexNode(RegexNode::kChar_Kind, c)); 98cb93a386Sopenharmony_ci ++fIndex; 99cb93a386Sopenharmony_ci } 100cb93a386Sopenharmony_ci} 101cb93a386Sopenharmony_ci 102cb93a386Sopenharmony_civoid RegexParser::dot() { 103cb93a386Sopenharmony_ci this->expect('.'); 104cb93a386Sopenharmony_ci fStack.push(RegexNode(RegexNode::kDot_Kind)); 105cb93a386Sopenharmony_ci} 106cb93a386Sopenharmony_ci 107cb93a386Sopenharmony_civoid RegexParser::group() { 108cb93a386Sopenharmony_ci this->expect('('); 109cb93a386Sopenharmony_ci this->regex(); 110cb93a386Sopenharmony_ci this->expect(')'); 111cb93a386Sopenharmony_ci} 112cb93a386Sopenharmony_ci 113cb93a386Sopenharmony_civoid RegexParser::setItem() { 114cb93a386Sopenharmony_ci this->literal(); 115cb93a386Sopenharmony_ci if (this->peek() == '-') { 116cb93a386Sopenharmony_ci ++fIndex; 117cb93a386Sopenharmony_ci if (peek() == ']') { 118cb93a386Sopenharmony_ci fStack.push(RegexNode(RegexNode::kChar_Kind, '-')); 119cb93a386Sopenharmony_ci } 120cb93a386Sopenharmony_ci else { 121cb93a386Sopenharmony_ci literal(); 122cb93a386Sopenharmony_ci RegexNode end = this->pop(); 123cb93a386Sopenharmony_ci SkASSERT(end.fKind == RegexNode::kChar_Kind); 124cb93a386Sopenharmony_ci RegexNode start = this->pop(); 125cb93a386Sopenharmony_ci SkASSERT(start.fKind == RegexNode::kChar_Kind); 126cb93a386Sopenharmony_ci fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end))); 127cb93a386Sopenharmony_ci } 128cb93a386Sopenharmony_ci } 129cb93a386Sopenharmony_ci} 130cb93a386Sopenharmony_ci 131cb93a386Sopenharmony_civoid RegexParser::set() { 132cb93a386Sopenharmony_ci expect('['); 133cb93a386Sopenharmony_ci size_t depth = fStack.size(); 134cb93a386Sopenharmony_ci RegexNode set(RegexNode::kCharset_Kind); 135cb93a386Sopenharmony_ci if (this->peek() == '^') { 136cb93a386Sopenharmony_ci ++fIndex; 137cb93a386Sopenharmony_ci set.fPayload.fBool = true; 138cb93a386Sopenharmony_ci } 139cb93a386Sopenharmony_ci else { 140cb93a386Sopenharmony_ci set.fPayload.fBool = false; 141cb93a386Sopenharmony_ci } 142cb93a386Sopenharmony_ci for (;;) { 143cb93a386Sopenharmony_ci switch (this->peek()) { 144cb93a386Sopenharmony_ci case ']': 145cb93a386Sopenharmony_ci ++fIndex; 146cb93a386Sopenharmony_ci while (fStack.size() > depth) { 147cb93a386Sopenharmony_ci set.fChildren.push_back(this->pop()); 148cb93a386Sopenharmony_ci } 149cb93a386Sopenharmony_ci fStack.push(std::move(set)); 150cb93a386Sopenharmony_ci return; 151cb93a386Sopenharmony_ci case END: 152cb93a386Sopenharmony_ci printf("unterminated character set\n"); 153cb93a386Sopenharmony_ci exit(1); 154cb93a386Sopenharmony_ci default: 155cb93a386Sopenharmony_ci this->setItem(); 156cb93a386Sopenharmony_ci break; 157cb93a386Sopenharmony_ci } 158cb93a386Sopenharmony_ci } 159cb93a386Sopenharmony_ci} 160cb93a386Sopenharmony_ci 161cb93a386Sopenharmony_civoid RegexParser::regex() { 162cb93a386Sopenharmony_ci this->sequence(); 163cb93a386Sopenharmony_ci switch (this->peek()) { 164cb93a386Sopenharmony_ci case '|': { 165cb93a386Sopenharmony_ci ++fIndex; 166cb93a386Sopenharmony_ci this->regex(); 167cb93a386Sopenharmony_ci RegexNode right = this->pop(); 168cb93a386Sopenharmony_ci RegexNode left = this->pop(); 169cb93a386Sopenharmony_ci fStack.push(RegexNode(RegexNode::kOr_Kind, left, right)); 170cb93a386Sopenharmony_ci break; 171cb93a386Sopenharmony_ci } 172cb93a386Sopenharmony_ci case END: // fall through 173cb93a386Sopenharmony_ci case ')': 174cb93a386Sopenharmony_ci return; 175cb93a386Sopenharmony_ci default: 176cb93a386Sopenharmony_ci SkASSERT(false); 177cb93a386Sopenharmony_ci } 178cb93a386Sopenharmony_ci} 179