1fd4e5da5Sopenharmony_ci// Copyright 2019 Google LLC 2fd4e5da5Sopenharmony_ci// 3fd4e5da5Sopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License"); 4fd4e5da5Sopenharmony_ci// you may not use this file except in compliance with the License. 5fd4e5da5Sopenharmony_ci// You may obtain a copy of the License at 6fd4e5da5Sopenharmony_ci// 7fd4e5da5Sopenharmony_ci// http://www.apache.org/licenses/LICENSE-2.0 8fd4e5da5Sopenharmony_ci// 9fd4e5da5Sopenharmony_ci// Unless required by applicable law or agreed to in writing, software 10fd4e5da5Sopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS, 11fd4e5da5Sopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12fd4e5da5Sopenharmony_ci// See the License for the specific language governing permissions and 13fd4e5da5Sopenharmony_ci// limitations under the License. 14fd4e5da5Sopenharmony_ci 15fd4e5da5Sopenharmony_ciimport { Token, TokenType } from "./token.js"; 16fd4e5da5Sopenharmony_ci 17fd4e5da5Sopenharmony_ciexport default class Lexer { 18fd4e5da5Sopenharmony_ci /** 19fd4e5da5Sopenharmony_ci * @param {String} input The input string to tokenize. 20fd4e5da5Sopenharmony_ci */ 21fd4e5da5Sopenharmony_ci constructor(input) { 22fd4e5da5Sopenharmony_ci this.input_ = input; 23fd4e5da5Sopenharmony_ci this.len_ = input.length; 24fd4e5da5Sopenharmony_ci this.cur_pos_ = 0; 25fd4e5da5Sopenharmony_ci this.cur_line_ = 1; 26fd4e5da5Sopenharmony_ci 27fd4e5da5Sopenharmony_ci this.num_regex_ = /^[0-9]+$/; 28fd4e5da5Sopenharmony_ci this.alpha_regex_ = /^[a-zA-Z_]+$/; 29fd4e5da5Sopenharmony_ci this.op_regex_ = /^Op[A-Z][^\s]*$/; 30fd4e5da5Sopenharmony_ci this.hex_regex_ = /^[0-9a-fA-F]$/; 31fd4e5da5Sopenharmony_ci } 32fd4e5da5Sopenharmony_ci 33fd4e5da5Sopenharmony_ci /** 34fd4e5da5Sopenharmony_ci * Parses the next token from the input stream. 35fd4e5da5Sopenharmony_ci * @return {Token} the next token. 36fd4e5da5Sopenharmony_ci */ 37fd4e5da5Sopenharmony_ci next() { 38fd4e5da5Sopenharmony_ci this.skipWhitespace(); 39fd4e5da5Sopenharmony_ci this.skipComments(); 40fd4e5da5Sopenharmony_ci 41fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_) 42fd4e5da5Sopenharmony_ci return new Token(TokenType.kEOF, this.cur_line_); 43fd4e5da5Sopenharmony_ci 44fd4e5da5Sopenharmony_ci let n = this.tryHexInteger(); 45fd4e5da5Sopenharmony_ci if (n !== undefined) 46fd4e5da5Sopenharmony_ci return n; 47fd4e5da5Sopenharmony_ci 48fd4e5da5Sopenharmony_ci n = this.tryFloat(); 49fd4e5da5Sopenharmony_ci if (n !== undefined) 50fd4e5da5Sopenharmony_ci return n; 51fd4e5da5Sopenharmony_ci 52fd4e5da5Sopenharmony_ci n = this.tryInteger(); 53fd4e5da5Sopenharmony_ci if (n !== undefined) 54fd4e5da5Sopenharmony_ci return n; 55fd4e5da5Sopenharmony_ci 56fd4e5da5Sopenharmony_ci n = this.tryString(); 57fd4e5da5Sopenharmony_ci if (n !== undefined) 58fd4e5da5Sopenharmony_ci return n; 59fd4e5da5Sopenharmony_ci 60fd4e5da5Sopenharmony_ci n = this.tryOp(); 61fd4e5da5Sopenharmony_ci if (n !== undefined) 62fd4e5da5Sopenharmony_ci return n; 63fd4e5da5Sopenharmony_ci 64fd4e5da5Sopenharmony_ci n = this.tryPunctuation(); 65fd4e5da5Sopenharmony_ci if (n !== undefined) 66fd4e5da5Sopenharmony_ci return n; 67fd4e5da5Sopenharmony_ci 68fd4e5da5Sopenharmony_ci n = this.tryResultId(); 69fd4e5da5Sopenharmony_ci if (n !== undefined) 70fd4e5da5Sopenharmony_ci return n; 71fd4e5da5Sopenharmony_ci 72fd4e5da5Sopenharmony_ci n = this.tryIdent(); 73fd4e5da5Sopenharmony_ci if (n !== undefined) 74fd4e5da5Sopenharmony_ci return n; 75fd4e5da5Sopenharmony_ci 76fd4e5da5Sopenharmony_ci return new Token(TokenType.kError, this.cur_line_, "Failed to match token"); 77fd4e5da5Sopenharmony_ci } 78fd4e5da5Sopenharmony_ci 79fd4e5da5Sopenharmony_ci is(str) { 80fd4e5da5Sopenharmony_ci if (this.len_ <= this.cur_pos_ + (str.length - 1)) 81fd4e5da5Sopenharmony_ci return false; 82fd4e5da5Sopenharmony_ci 83fd4e5da5Sopenharmony_ci for (let i = 0; i < str.length; ++i) { 84fd4e5da5Sopenharmony_ci if (this.input_[this.cur_pos_ + i] !== str[i]) 85fd4e5da5Sopenharmony_ci return false; 86fd4e5da5Sopenharmony_ci } 87fd4e5da5Sopenharmony_ci 88fd4e5da5Sopenharmony_ci return true; 89fd4e5da5Sopenharmony_ci } 90fd4e5da5Sopenharmony_ci 91fd4e5da5Sopenharmony_ci isNum(ch) { 92fd4e5da5Sopenharmony_ci return ch.match(this.num_regex_); 93fd4e5da5Sopenharmony_ci } 94fd4e5da5Sopenharmony_ci 95fd4e5da5Sopenharmony_ci isAlpha(ch) { 96fd4e5da5Sopenharmony_ci return ch.match(this.alpha_regex_); 97fd4e5da5Sopenharmony_ci } 98fd4e5da5Sopenharmony_ci 99fd4e5da5Sopenharmony_ci isAlphaNum(ch) { 100fd4e5da5Sopenharmony_ci return this.isNum(ch) || this.isAlpha(ch); 101fd4e5da5Sopenharmony_ci } 102fd4e5da5Sopenharmony_ci 103fd4e5da5Sopenharmony_ci isHex(char) { 104fd4e5da5Sopenharmony_ci return char.match(this.hex_regex_); 105fd4e5da5Sopenharmony_ci } 106fd4e5da5Sopenharmony_ci 107fd4e5da5Sopenharmony_ci isCurWhitespace() { 108fd4e5da5Sopenharmony_ci return this.is(" ") || this.is("\t") || this.is("\r") || this.is("\n"); 109fd4e5da5Sopenharmony_ci } 110fd4e5da5Sopenharmony_ci 111fd4e5da5Sopenharmony_ci skipWhitespace() { 112fd4e5da5Sopenharmony_ci for(;;) { 113fd4e5da5Sopenharmony_ci let cur_pos = this.cur_pos_; 114fd4e5da5Sopenharmony_ci while (this.cur_pos_ < this.len_ && 115fd4e5da5Sopenharmony_ci this.isCurWhitespace()) { 116fd4e5da5Sopenharmony_ci if (this.is("\n")) 117fd4e5da5Sopenharmony_ci this.cur_line_ += 1; 118fd4e5da5Sopenharmony_ci 119fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 120fd4e5da5Sopenharmony_ci } 121fd4e5da5Sopenharmony_ci 122fd4e5da5Sopenharmony_ci this.skipComments(); 123fd4e5da5Sopenharmony_ci 124fd4e5da5Sopenharmony_ci // Cursor didn't move so no whitespace matched. 125fd4e5da5Sopenharmony_ci if (cur_pos === this.cur_pos_) 126fd4e5da5Sopenharmony_ci break; 127fd4e5da5Sopenharmony_ci } 128fd4e5da5Sopenharmony_ci } 129fd4e5da5Sopenharmony_ci 130fd4e5da5Sopenharmony_ci skipComments() { 131fd4e5da5Sopenharmony_ci if (!this.is(";")) 132fd4e5da5Sopenharmony_ci return; 133fd4e5da5Sopenharmony_ci 134fd4e5da5Sopenharmony_ci while (this.cur_pos_ < this.len_ && !this.is("\n")) 135fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 136fd4e5da5Sopenharmony_ci } 137fd4e5da5Sopenharmony_ci 138fd4e5da5Sopenharmony_ci /** 139fd4e5da5Sopenharmony_ci * Attempt to parse the next part of the input as a float. 140fd4e5da5Sopenharmony_ci * @return {Token|undefined} returns a Token if a float is matched, 141fd4e5da5Sopenharmony_ci * undefined otherwise. 142fd4e5da5Sopenharmony_ci */ 143fd4e5da5Sopenharmony_ci tryFloat() { 144fd4e5da5Sopenharmony_ci let start = this.cur_pos_; 145fd4e5da5Sopenharmony_ci let end = start; 146fd4e5da5Sopenharmony_ci 147fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_) 148fd4e5da5Sopenharmony_ci return undefined; 149fd4e5da5Sopenharmony_ci if (this.input_[end] === "-") 150fd4e5da5Sopenharmony_ci end += 1; 151fd4e5da5Sopenharmony_ci 152fd4e5da5Sopenharmony_ci while (end < this.len_ && this.isNum(this.input_[end])) 153fd4e5da5Sopenharmony_ci end += 1; 154fd4e5da5Sopenharmony_ci 155fd4e5da5Sopenharmony_ci // Must have a "." in a float 156fd4e5da5Sopenharmony_ci if (end >= this.len_ || this.input_[end] !== ".") 157fd4e5da5Sopenharmony_ci return undefined; 158fd4e5da5Sopenharmony_ci 159fd4e5da5Sopenharmony_ci end += 1; 160fd4e5da5Sopenharmony_ci while (end < this.len_ && this.isNum(this.input_[end])) 161fd4e5da5Sopenharmony_ci end += 1; 162fd4e5da5Sopenharmony_ci 163fd4e5da5Sopenharmony_ci let substr = this.input_.substr(start, end - start); 164fd4e5da5Sopenharmony_ci if (substr === "." || substr === "-.") 165fd4e5da5Sopenharmony_ci return undefined; 166fd4e5da5Sopenharmony_ci 167fd4e5da5Sopenharmony_ci this.cur_pos_ = end; 168fd4e5da5Sopenharmony_ci 169fd4e5da5Sopenharmony_ci return new Token(TokenType.kFloatLiteral, this.cur_line_, parseFloat(substr)); 170fd4e5da5Sopenharmony_ci } 171fd4e5da5Sopenharmony_ci 172fd4e5da5Sopenharmony_ci /** 173fd4e5da5Sopenharmony_ci * Attempt to parse a hex encoded integer. 174fd4e5da5Sopenharmony_ci * @return {Token|undefined} returns a Token if a Hex number is matched, 175fd4e5da5Sopenharmony_ci * undefined otherwise. 176fd4e5da5Sopenharmony_ci */ 177fd4e5da5Sopenharmony_ci tryHexInteger() { 178fd4e5da5Sopenharmony_ci let start = this.cur_pos_; 179fd4e5da5Sopenharmony_ci let end = start; 180fd4e5da5Sopenharmony_ci 181fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_) 182fd4e5da5Sopenharmony_ci return undefined; 183fd4e5da5Sopenharmony_ci if (end + 2 >= this.len_ || this.input_[end] !== "0" || 184fd4e5da5Sopenharmony_ci this.input_[end + 1] !== "x") { 185fd4e5da5Sopenharmony_ci return undefined; 186fd4e5da5Sopenharmony_ci } 187fd4e5da5Sopenharmony_ci 188fd4e5da5Sopenharmony_ci end += 2; 189fd4e5da5Sopenharmony_ci 190fd4e5da5Sopenharmony_ci while (end < this.len_ && this.isHex(this.input_[end])) 191fd4e5da5Sopenharmony_ci end += 1; 192fd4e5da5Sopenharmony_ci 193fd4e5da5Sopenharmony_ci this.cur_pos_ = end; 194fd4e5da5Sopenharmony_ci 195fd4e5da5Sopenharmony_ci let val = parseInt(this.input_.substr(start, end - start), 16); 196fd4e5da5Sopenharmony_ci return new Token(TokenType.kIntegerLiteral, this.cur_line_, val); 197fd4e5da5Sopenharmony_ci } 198fd4e5da5Sopenharmony_ci 199fd4e5da5Sopenharmony_ci /** 200fd4e5da5Sopenharmony_ci * Attempt to parse an encoded integer. 201fd4e5da5Sopenharmony_ci * @return {Token|undefined} returns a Token if a number is matched, 202fd4e5da5Sopenharmony_ci * undefined otherwise. 203fd4e5da5Sopenharmony_ci */ 204fd4e5da5Sopenharmony_ci tryInteger() { 205fd4e5da5Sopenharmony_ci let start = this.cur_pos_; 206fd4e5da5Sopenharmony_ci let end = start; 207fd4e5da5Sopenharmony_ci 208fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_) 209fd4e5da5Sopenharmony_ci return undefined; 210fd4e5da5Sopenharmony_ci if (this.input_[end] === "-") 211fd4e5da5Sopenharmony_ci end += 1; 212fd4e5da5Sopenharmony_ci 213fd4e5da5Sopenharmony_ci if (end >= this.len_ || !this.isNum(this.input_[end])) 214fd4e5da5Sopenharmony_ci return undefined; 215fd4e5da5Sopenharmony_ci 216fd4e5da5Sopenharmony_ci while (end < this.len_ && this.isNum(this.input_[end])) 217fd4e5da5Sopenharmony_ci end += 1; 218fd4e5da5Sopenharmony_ci 219fd4e5da5Sopenharmony_ci this.cur_pos_ = end; 220fd4e5da5Sopenharmony_ci 221fd4e5da5Sopenharmony_ci let val = parseInt(this.input_.substr(start, end - start), 10); 222fd4e5da5Sopenharmony_ci return new Token(TokenType.kIntegerLiteral, this.cur_line_, val); 223fd4e5da5Sopenharmony_ci } 224fd4e5da5Sopenharmony_ci 225fd4e5da5Sopenharmony_ci /** 226fd4e5da5Sopenharmony_ci * Attempt to parse a result id. 227fd4e5da5Sopenharmony_ci * @return {Token|undefined} returns a Token if a result id is matched, 228fd4e5da5Sopenharmony_ci * undefined otherwise. 229fd4e5da5Sopenharmony_ci */ 230fd4e5da5Sopenharmony_ci tryResultId() { 231fd4e5da5Sopenharmony_ci let start = this.cur_pos_; 232fd4e5da5Sopenharmony_ci if (start >= this.len_) 233fd4e5da5Sopenharmony_ci return undefined; 234fd4e5da5Sopenharmony_ci if (!this.is("%")) 235fd4e5da5Sopenharmony_ci return undefined; 236fd4e5da5Sopenharmony_ci 237fd4e5da5Sopenharmony_ci start += 1; 238fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 239fd4e5da5Sopenharmony_ci while (this.cur_pos_ < this.len_ && 240fd4e5da5Sopenharmony_ci (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) { 241fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 242fd4e5da5Sopenharmony_ci } 243fd4e5da5Sopenharmony_ci 244fd4e5da5Sopenharmony_ci let ident = this.input_.substr(start, this.cur_pos_ - start); 245fd4e5da5Sopenharmony_ci let value = undefined; 246fd4e5da5Sopenharmony_ci if (ident.match(this.num_regex_)) 247fd4e5da5Sopenharmony_ci value = parseInt(ident, 10); 248fd4e5da5Sopenharmony_ci 249fd4e5da5Sopenharmony_ci return new Token(TokenType.kResultId, this.cur_line_, { 250fd4e5da5Sopenharmony_ci name: ident, 251fd4e5da5Sopenharmony_ci val: value 252fd4e5da5Sopenharmony_ci }); 253fd4e5da5Sopenharmony_ci } 254fd4e5da5Sopenharmony_ci 255fd4e5da5Sopenharmony_ci /** 256fd4e5da5Sopenharmony_ci * Attempt to parse an identifier. 257fd4e5da5Sopenharmony_ci * @return {Token|undefined} returns a Token if an identifier is matched, 258fd4e5da5Sopenharmony_ci * undefined otherwise. 259fd4e5da5Sopenharmony_ci */ 260fd4e5da5Sopenharmony_ci tryIdent() { 261fd4e5da5Sopenharmony_ci let start = this.cur_pos_; 262fd4e5da5Sopenharmony_ci if (start >= this.len_) 263fd4e5da5Sopenharmony_ci return undefined; 264fd4e5da5Sopenharmony_ci 265fd4e5da5Sopenharmony_ci while (this.cur_pos_ < this.len_ && 266fd4e5da5Sopenharmony_ci (this.isAlphaNum(this.input_[this.cur_pos_]) || this.is("_"))) { 267fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 268fd4e5da5Sopenharmony_ci } 269fd4e5da5Sopenharmony_ci 270fd4e5da5Sopenharmony_ci let ident = this.input_.substr(start, this.cur_pos_ - start); 271fd4e5da5Sopenharmony_ci return new Token(TokenType.kIdentifier, this.cur_line_, ident); 272fd4e5da5Sopenharmony_ci } 273fd4e5da5Sopenharmony_ci 274fd4e5da5Sopenharmony_ci /** 275fd4e5da5Sopenharmony_ci * Attempt to parse an Op command. 276fd4e5da5Sopenharmony_ci * @return {Token|undefined} returns a Token if an Op command is matched, 277fd4e5da5Sopenharmony_ci * undefined otherwise. 278fd4e5da5Sopenharmony_ci */ 279fd4e5da5Sopenharmony_ci tryOp() { 280fd4e5da5Sopenharmony_ci let start = this.cur_pos_; 281fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_)) 282fd4e5da5Sopenharmony_ci return undefined; 283fd4e5da5Sopenharmony_ci 284fd4e5da5Sopenharmony_ci if (this.input_[this.cur_pos_] !== "O" || 285fd4e5da5Sopenharmony_ci this.input_[this.cur_pos_ + 1] !== "p") { 286fd4e5da5Sopenharmony_ci return undefined; 287fd4e5da5Sopenharmony_ci } 288fd4e5da5Sopenharmony_ci 289fd4e5da5Sopenharmony_ci while (this.cur_pos_ < this.len_ && 290fd4e5da5Sopenharmony_ci !this.isCurWhitespace()) { 291fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 292fd4e5da5Sopenharmony_ci } 293fd4e5da5Sopenharmony_ci 294fd4e5da5Sopenharmony_ci return new Token(TokenType.kOp, this.cur_line_, { 295fd4e5da5Sopenharmony_ci name: this.input_.substr(start, this.cur_pos_ - start) 296fd4e5da5Sopenharmony_ci }); 297fd4e5da5Sopenharmony_ci } 298fd4e5da5Sopenharmony_ci 299fd4e5da5Sopenharmony_ci /** 300fd4e5da5Sopenharmony_ci * Attempts to match punctuation strings against the input 301fd4e5da5Sopenharmony_ci * @return {Token|undefined} Returns the Token for the punctuation or 302fd4e5da5Sopenharmony_ci * undefined if no matches found. 303fd4e5da5Sopenharmony_ci */ 304fd4e5da5Sopenharmony_ci tryPunctuation() { 305fd4e5da5Sopenharmony_ci let type = undefined; 306fd4e5da5Sopenharmony_ci if (this.is("=")) 307fd4e5da5Sopenharmony_ci type = TokenType.kEqual; 308fd4e5da5Sopenharmony_ci else if (this.is("|")) 309fd4e5da5Sopenharmony_ci type = TokenType.kPipe; 310fd4e5da5Sopenharmony_ci 311fd4e5da5Sopenharmony_ci if (type === undefined) 312fd4e5da5Sopenharmony_ci return undefined; 313fd4e5da5Sopenharmony_ci 314fd4e5da5Sopenharmony_ci this.cur_pos_ += type.length; 315fd4e5da5Sopenharmony_ci return new Token(type, this.cur_line_, type); 316fd4e5da5Sopenharmony_ci } 317fd4e5da5Sopenharmony_ci 318fd4e5da5Sopenharmony_ci /** 319fd4e5da5Sopenharmony_ci * Attempts to match strings against the input 320fd4e5da5Sopenharmony_ci * @return {Token|undefined} Returns the Token for the string or undefined 321fd4e5da5Sopenharmony_ci * if no match found. 322fd4e5da5Sopenharmony_ci */ 323fd4e5da5Sopenharmony_ci tryString() { 324fd4e5da5Sopenharmony_ci let start = this.cur_pos_; 325fd4e5da5Sopenharmony_ci 326fd4e5da5Sopenharmony_ci // Must have at least 2 chars for a string. 327fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_ || (this.cur_pos_ + 1 >= this.len_)) 328fd4e5da5Sopenharmony_ci return undefined; 329fd4e5da5Sopenharmony_ci if (!this.is("\"")) 330fd4e5da5Sopenharmony_ci return undefined; 331fd4e5da5Sopenharmony_ci 332fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 333fd4e5da5Sopenharmony_ci let str = ""; 334fd4e5da5Sopenharmony_ci while (this.cur_pos_ <= this.len_) { 335fd4e5da5Sopenharmony_ci if (this.is("\"")) 336fd4e5da5Sopenharmony_ci break; 337fd4e5da5Sopenharmony_ci 338fd4e5da5Sopenharmony_ci if (this.is("\\")) { 339fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 340fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_) 341fd4e5da5Sopenharmony_ci return undefined; 342fd4e5da5Sopenharmony_ci 343fd4e5da5Sopenharmony_ci if (this.is("\\")) { 344fd4e5da5Sopenharmony_ci str += "\\"; 345fd4e5da5Sopenharmony_ci } else if (this.is("\"")) { 346fd4e5da5Sopenharmony_ci str += '"'; 347fd4e5da5Sopenharmony_ci } else { 348fd4e5da5Sopenharmony_ci str += this.input_[this.cur_pos_]; 349fd4e5da5Sopenharmony_ci } 350fd4e5da5Sopenharmony_ci } else { 351fd4e5da5Sopenharmony_ci str += this.input_[this.cur_pos_]; 352fd4e5da5Sopenharmony_ci } 353fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 354fd4e5da5Sopenharmony_ci } 355fd4e5da5Sopenharmony_ci 356fd4e5da5Sopenharmony_ci if (this.cur_pos_ >= this.len_) 357fd4e5da5Sopenharmony_ci return undefined; 358fd4e5da5Sopenharmony_ci 359fd4e5da5Sopenharmony_ci this.cur_pos_ += 1; 360fd4e5da5Sopenharmony_ci 361fd4e5da5Sopenharmony_ci return new Token(TokenType.kStringLiteral, this.cur_line_, str); 362fd4e5da5Sopenharmony_ci } 363fd4e5da5Sopenharmony_ci} 364