1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2017 Google Inc. 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include "src/sksl/lex/NFAtoDFA.h" 9cb93a386Sopenharmony_ci#include "src/sksl/lex/RegexParser.h" 10cb93a386Sopenharmony_ci#include "src/sksl/lex/TransitionTable.h" 11cb93a386Sopenharmony_ci 12cb93a386Sopenharmony_ci#include <fstream> 13cb93a386Sopenharmony_ci#include <sstream> 14cb93a386Sopenharmony_ci#include <string> 15cb93a386Sopenharmony_ci 16cb93a386Sopenharmony_ci/** 17cb93a386Sopenharmony_ci * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex 18cb93a386Sopenharmony_ci * file is a text file with one token definition per line. Each line is of the form: 19cb93a386Sopenharmony_ci * <TOKEN_NAME> = <pattern> 20cb93a386Sopenharmony_ci * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string. 21cb93a386Sopenharmony_ci */ 22cb93a386Sopenharmony_ci 23cb93a386Sopenharmony_cistatic constexpr const char* HEADER = 24cb93a386Sopenharmony_ci "/*\n" 25cb93a386Sopenharmony_ci " * Copyright 2017 Google Inc.\n" 26cb93a386Sopenharmony_ci " *\n" 27cb93a386Sopenharmony_ci " * Use of this source code is governed by a BSD-style license that can be\n" 28cb93a386Sopenharmony_ci " * found in the LICENSE file.\n" 29cb93a386Sopenharmony_ci " */\n" 30cb93a386Sopenharmony_ci "/*****************************************************************************************\n" 31cb93a386Sopenharmony_ci " ******************** This file was generated by sksllex. Do not edit. *******************\n" 32cb93a386Sopenharmony_ci " *****************************************************************************************/\n"; 33cb93a386Sopenharmony_ci 34cb93a386Sopenharmony_cistatic void writeH(const DFA& dfa, const char* lexer, const char* token, 35cb93a386Sopenharmony_ci const std::vector<std::string>& tokens, const char* hPath) { 36cb93a386Sopenharmony_ci std::ofstream out(hPath); 37cb93a386Sopenharmony_ci SkASSERT(out.good()); 38cb93a386Sopenharmony_ci out << HEADER; 39cb93a386Sopenharmony_ci out << "#ifndef SKSL_" << lexer << "\n"; 40cb93a386Sopenharmony_ci out << "#define SKSL_" << lexer << "\n"; 41cb93a386Sopenharmony_ci out << "#include \"include/core/SkStringView.h\"\n"; 42cb93a386Sopenharmony_ci out << "#include <cstddef>\n"; 43cb93a386Sopenharmony_ci out << "#include <cstdint>\n"; 44cb93a386Sopenharmony_ci out << "namespace SkSL {\n"; 45cb93a386Sopenharmony_ci out << "\n"; 46cb93a386Sopenharmony_ci out << "struct " << token << " {\n"; 47cb93a386Sopenharmony_ci out << " enum class Kind {\n"; 48cb93a386Sopenharmony_ci for (const std::string& t : tokens) { 49cb93a386Sopenharmony_ci out << " TK_" << t << ",\n"; 50cb93a386Sopenharmony_ci } 51cb93a386Sopenharmony_ci out << " TK_NONE,"; 52cb93a386Sopenharmony_ci out << R"( 53cb93a386Sopenharmony_ci }; 54cb93a386Sopenharmony_ci 55cb93a386Sopenharmony_ci )" << token << "() {}"; 56cb93a386Sopenharmony_ci 57cb93a386Sopenharmony_ci out << token << R"((Kind kind, int32_t offset, int32_t length, int32_t line) 58cb93a386Sopenharmony_ci : fKind(kind) 59cb93a386Sopenharmony_ci , fOffset(offset) 60cb93a386Sopenharmony_ci , fLength(length) 61cb93a386Sopenharmony_ci , fLine(line) {} 62cb93a386Sopenharmony_ci 63cb93a386Sopenharmony_ci Kind fKind = Kind::TK_NONE; 64cb93a386Sopenharmony_ci int32_t fOffset = -1; 65cb93a386Sopenharmony_ci int32_t fLength = -1; 66cb93a386Sopenharmony_ci int32_t fLine = -1; 67cb93a386Sopenharmony_ci}; 68cb93a386Sopenharmony_ci 69cb93a386Sopenharmony_ciclass )" << lexer << R"( { 70cb93a386Sopenharmony_cipublic: 71cb93a386Sopenharmony_ci void start(skstd::string_view text) { 72cb93a386Sopenharmony_ci fText = text; 73cb93a386Sopenharmony_ci fOffset = 0; 74cb93a386Sopenharmony_ci fLine = 1; 75cb93a386Sopenharmony_ci } 76cb93a386Sopenharmony_ci 77cb93a386Sopenharmony_ci )" << token << R"( next(); 78cb93a386Sopenharmony_ci 79cb93a386Sopenharmony_ci struct Checkpoint { 80cb93a386Sopenharmony_ci int32_t fOffset; 81cb93a386Sopenharmony_ci int32_t fLine; 82cb93a386Sopenharmony_ci }; 83cb93a386Sopenharmony_ci 84cb93a386Sopenharmony_ci Checkpoint getCheckpoint() const { 85cb93a386Sopenharmony_ci return {fOffset, fLine}; 86cb93a386Sopenharmony_ci } 87cb93a386Sopenharmony_ci 88cb93a386Sopenharmony_ci void rewindToCheckpoint(Checkpoint checkpoint) { 89cb93a386Sopenharmony_ci fOffset = checkpoint.fOffset; 90cb93a386Sopenharmony_ci fLine = checkpoint.fLine; 91cb93a386Sopenharmony_ci } 92cb93a386Sopenharmony_ci 93cb93a386Sopenharmony_ciprivate: 94cb93a386Sopenharmony_ci skstd::string_view fText; 95cb93a386Sopenharmony_ci int32_t fOffset; 96cb93a386Sopenharmony_ci int32_t fLine; 97cb93a386Sopenharmony_ci}; 98cb93a386Sopenharmony_ci 99cb93a386Sopenharmony_ci} // namespace 100cb93a386Sopenharmony_ci#endif 101cb93a386Sopenharmony_ci)"; 102cb93a386Sopenharmony_ci} 103cb93a386Sopenharmony_ci 104cb93a386Sopenharmony_cistatic void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include, 105cb93a386Sopenharmony_ci const char* cppPath) { 106cb93a386Sopenharmony_ci std::ofstream out(cppPath); 107cb93a386Sopenharmony_ci SkASSERT(out.good()); 108cb93a386Sopenharmony_ci out << HEADER; 109cb93a386Sopenharmony_ci out << "#include \"" << include << "\"\n"; 110cb93a386Sopenharmony_ci out << "\n"; 111cb93a386Sopenharmony_ci out << "namespace SkSL {\n"; 112cb93a386Sopenharmony_ci out << "\n"; 113cb93a386Sopenharmony_ci 114cb93a386Sopenharmony_ci size_t states = 0; 115cb93a386Sopenharmony_ci for (const auto& row : dfa.fTransitions) { 116cb93a386Sopenharmony_ci states = std::max(states, row.size()); 117cb93a386Sopenharmony_ci } 118cb93a386Sopenharmony_ci out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n"; 119cb93a386Sopenharmony_ci // arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual 120cb93a386Sopenharmony_ci // input 121cb93a386Sopenharmony_ci out << "static const uint8_t INVALID_CHAR = 18;"; 122cb93a386Sopenharmony_ci out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n "; 123cb93a386Sopenharmony_ci const char* separator = ""; 124cb93a386Sopenharmony_ci for (int m : dfa.fCharMappings) { 125cb93a386Sopenharmony_ci out << separator << std::to_string(m); 126cb93a386Sopenharmony_ci separator = ", "; 127cb93a386Sopenharmony_ci } 128cb93a386Sopenharmony_ci out << "\n};\n"; 129cb93a386Sopenharmony_ci 130cb93a386Sopenharmony_ci WriteTransitionTable(out, dfa, states); 131cb93a386Sopenharmony_ci 132cb93a386Sopenharmony_ci out << "static const int8_t kAccepts[" << states << "] = {"; 133cb93a386Sopenharmony_ci for (size_t i = 0; i < states; ++i) { 134cb93a386Sopenharmony_ci if (i < dfa.fAccepts.size()) { 135cb93a386Sopenharmony_ci out << " " << dfa.fAccepts[i] << ","; 136cb93a386Sopenharmony_ci } else { 137cb93a386Sopenharmony_ci out << " " << INVALID << ","; 138cb93a386Sopenharmony_ci } 139cb93a386Sopenharmony_ci } 140cb93a386Sopenharmony_ci out << " };\n"; 141cb93a386Sopenharmony_ci out << "\n"; 142cb93a386Sopenharmony_ci 143cb93a386Sopenharmony_ci out << token << " " << lexer << "::next() {"; 144cb93a386Sopenharmony_ci out << R"( 145cb93a386Sopenharmony_ci // note that we cheat here: normally a lexer needs to worry about the case 146cb93a386Sopenharmony_ci // where a token has a prefix which is not itself a valid token - for instance, 147cb93a386Sopenharmony_ci // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid 148cb93a386Sopenharmony_ci // tokens. Our grammar doesn't have this property, so we can simplify the logic 149cb93a386Sopenharmony_ci // a bit. 150cb93a386Sopenharmony_ci int32_t startOffset = fOffset; 151cb93a386Sopenharmony_ci if (startOffset == (int32_t)fText.length()) { 152cb93a386Sopenharmony_ci return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0, fLine); 153cb93a386Sopenharmony_ci } 154cb93a386Sopenharmony_ci State state = 1; 155cb93a386Sopenharmony_ci for (;;) { 156cb93a386Sopenharmony_ci if (fOffset >= (int32_t)fText.length()) { 157cb93a386Sopenharmony_ci if (kAccepts[state] == -1) { 158cb93a386Sopenharmony_ci return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0, fLine); 159cb93a386Sopenharmony_ci } 160cb93a386Sopenharmony_ci break; 161cb93a386Sopenharmony_ci } 162cb93a386Sopenharmony_ci uint8_t c = (uint8_t) fText[fOffset]; 163cb93a386Sopenharmony_ci if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() { 164cb93a386Sopenharmony_ci c = INVALID_CHAR; 165cb93a386Sopenharmony_ci } 166cb93a386Sopenharmony_ci State newState = get_transition(kMappings[c], state); 167cb93a386Sopenharmony_ci if (!newState) { 168cb93a386Sopenharmony_ci break; 169cb93a386Sopenharmony_ci } 170cb93a386Sopenharmony_ci state = newState; 171cb93a386Sopenharmony_ci ++fOffset; 172cb93a386Sopenharmony_ci if (c == '\n') { 173cb93a386Sopenharmony_ci ++fLine; 174cb93a386Sopenharmony_ci } 175cb93a386Sopenharmony_ci } 176cb93a386Sopenharmony_ci Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state]; 177cb93a386Sopenharmony_ci return )" << token << R"((kind, startOffset, fOffset - startOffset, fLine); 178cb93a386Sopenharmony_ci} 179cb93a386Sopenharmony_ci 180cb93a386Sopenharmony_ci} // namespace 181cb93a386Sopenharmony_ci)"; 182cb93a386Sopenharmony_ci} 183cb93a386Sopenharmony_ci 184cb93a386Sopenharmony_cistatic void process(const char* inPath, const char* lexer, const char* token, const char* hPath, 185cb93a386Sopenharmony_ci const char* cppPath) { 186cb93a386Sopenharmony_ci NFA nfa; 187cb93a386Sopenharmony_ci std::vector<std::string> tokens; 188cb93a386Sopenharmony_ci tokens.push_back("END_OF_FILE"); 189cb93a386Sopenharmony_ci std::string line; 190cb93a386Sopenharmony_ci std::ifstream in(inPath); 191cb93a386Sopenharmony_ci while (std::getline(in, line)) { 192cb93a386Sopenharmony_ci if (line.length() == 0) { 193cb93a386Sopenharmony_ci continue; 194cb93a386Sopenharmony_ci } 195cb93a386Sopenharmony_ci if (line.length() >= 2 && line[0] == '/' && line[1] == '/') { 196cb93a386Sopenharmony_ci continue; 197cb93a386Sopenharmony_ci } 198cb93a386Sopenharmony_ci std::istringstream split(line); 199cb93a386Sopenharmony_ci std::string name, delimiter, pattern; 200cb93a386Sopenharmony_ci if (split >> name >> delimiter >> pattern) { 201cb93a386Sopenharmony_ci SkASSERT(split.eof()); 202cb93a386Sopenharmony_ci SkASSERT(name != ""); 203cb93a386Sopenharmony_ci SkASSERT(delimiter == "="); 204cb93a386Sopenharmony_ci SkASSERT(pattern != ""); 205cb93a386Sopenharmony_ci tokens.push_back(name); 206cb93a386Sopenharmony_ci if (pattern[0] == '"') { 207cb93a386Sopenharmony_ci SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"'); 208cb93a386Sopenharmony_ci RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]); 209cb93a386Sopenharmony_ci for (size_t i = 2; i < pattern.size() - 1; ++i) { 210cb93a386Sopenharmony_ci node = RegexNode(RegexNode::kConcat_Kind, node, 211cb93a386Sopenharmony_ci RegexNode(RegexNode::kChar_Kind, pattern[i])); 212cb93a386Sopenharmony_ci } 213cb93a386Sopenharmony_ci nfa.addRegex(node); 214cb93a386Sopenharmony_ci } 215cb93a386Sopenharmony_ci else { 216cb93a386Sopenharmony_ci nfa.addRegex(RegexParser().parse(pattern)); 217cb93a386Sopenharmony_ci } 218cb93a386Sopenharmony_ci } 219cb93a386Sopenharmony_ci } 220cb93a386Sopenharmony_ci NFAtoDFA converter(&nfa); 221cb93a386Sopenharmony_ci DFA dfa = converter.convert(); 222cb93a386Sopenharmony_ci writeH(dfa, lexer, token, tokens, hPath); 223cb93a386Sopenharmony_ci writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath); 224cb93a386Sopenharmony_ci} 225cb93a386Sopenharmony_ci 226cb93a386Sopenharmony_ciint main(int argc, const char** argv) { 227cb93a386Sopenharmony_ci if (argc != 6) { 228cb93a386Sopenharmony_ci printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n"); 229cb93a386Sopenharmony_ci exit(1); 230cb93a386Sopenharmony_ci } 231cb93a386Sopenharmony_ci process(argv[1], argv[2], argv[3], argv[4], argv[5]); 232cb93a386Sopenharmony_ci return 0; 233cb93a386Sopenharmony_ci} 234