1695b41eeSopenharmony_ci// Copyright 2011 Google Inc. All Rights Reserved.
2695b41eeSopenharmony_ci//
3695b41eeSopenharmony_ci// Licensed under the Apache License, Version 2.0 (the "License");
4695b41eeSopenharmony_ci// you may not use this file except in compliance with the License.
5695b41eeSopenharmony_ci// You may obtain a copy of the License at
6695b41eeSopenharmony_ci//
7695b41eeSopenharmony_ci//     http://www.apache.org/licenses/LICENSE-2.0
8695b41eeSopenharmony_ci//
9695b41eeSopenharmony_ci// Unless required by applicable law or agreed to in writing, software
10695b41eeSopenharmony_ci// distributed under the License is distributed on an "AS IS" BASIS,
11695b41eeSopenharmony_ci// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12695b41eeSopenharmony_ci// See the License for the specific language governing permissions and
13695b41eeSopenharmony_ci// limitations under the License.
14695b41eeSopenharmony_ci
15695b41eeSopenharmony_ci#include "lexer.h"
16695b41eeSopenharmony_ci
17695b41eeSopenharmony_ci#include <stdio.h>
18695b41eeSopenharmony_ci
19695b41eeSopenharmony_ci#include "eval_env.h"
20695b41eeSopenharmony_ci#include "util.h"
21695b41eeSopenharmony_ci
22695b41eeSopenharmony_ciusing namespace std;
23695b41eeSopenharmony_ci
24695b41eeSopenharmony_cibool Lexer::Error(const string& message, string* err) {
25695b41eeSopenharmony_ci  // Compute line/column.
26695b41eeSopenharmony_ci  int line = 1;
27695b41eeSopenharmony_ci  const char* line_start = input_.str_;
28695b41eeSopenharmony_ci  for (const char* p = input_.str_; p < last_token_; ++p) {
29695b41eeSopenharmony_ci    if (*p == '\n') {
30695b41eeSopenharmony_ci      ++line;
31695b41eeSopenharmony_ci      line_start = p + 1;
32695b41eeSopenharmony_ci    }
33695b41eeSopenharmony_ci  }
34695b41eeSopenharmony_ci  int col = last_token_ ? (int)(last_token_ - line_start) : 0;
35695b41eeSopenharmony_ci
36695b41eeSopenharmony_ci  char buf[1024];
37695b41eeSopenharmony_ci  snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
38695b41eeSopenharmony_ci  *err = buf;
39695b41eeSopenharmony_ci  *err += message + "\n";
40695b41eeSopenharmony_ci
41695b41eeSopenharmony_ci  // Add some context to the message.
42695b41eeSopenharmony_ci  const int kTruncateColumn = 72;
43695b41eeSopenharmony_ci  if (col > 0 && col < kTruncateColumn) {
44695b41eeSopenharmony_ci    int len;
45695b41eeSopenharmony_ci    bool truncated = true;
46695b41eeSopenharmony_ci    for (len = 0; len < kTruncateColumn; ++len) {
47695b41eeSopenharmony_ci      if (line_start[len] == 0 || line_start[len] == '\n') {
48695b41eeSopenharmony_ci        truncated = false;
49695b41eeSopenharmony_ci        break;
50695b41eeSopenharmony_ci      }
51695b41eeSopenharmony_ci    }
52695b41eeSopenharmony_ci    *err += string(line_start, len);
53695b41eeSopenharmony_ci    if (truncated)
54695b41eeSopenharmony_ci      *err += "...";
55695b41eeSopenharmony_ci    *err += "\n";
56695b41eeSopenharmony_ci    *err += string(col, ' ');
57695b41eeSopenharmony_ci    *err += "^ near here";
58695b41eeSopenharmony_ci  }
59695b41eeSopenharmony_ci
60695b41eeSopenharmony_ci  return false;
61695b41eeSopenharmony_ci}
62695b41eeSopenharmony_ci
63695b41eeSopenharmony_ciLexer::Lexer(const char* input) {
64695b41eeSopenharmony_ci  Start("input", input);
65695b41eeSopenharmony_ci}
66695b41eeSopenharmony_ci
67695b41eeSopenharmony_civoid Lexer::Start(StringPiece filename, StringPiece input) {
68695b41eeSopenharmony_ci  filename_ = filename;
69695b41eeSopenharmony_ci  input_ = input;
70695b41eeSopenharmony_ci  ofs_ = input_.str_;
71695b41eeSopenharmony_ci  last_token_ = NULL;
72695b41eeSopenharmony_ci}
73695b41eeSopenharmony_ci
74695b41eeSopenharmony_ciconst char* Lexer::TokenName(Token t) {
75695b41eeSopenharmony_ci  switch (t) {
76695b41eeSopenharmony_ci  case ERROR:    return "lexing error";
77695b41eeSopenharmony_ci  case BUILD:    return "'build'";
78695b41eeSopenharmony_ci  case COLON:    return "':'";
79695b41eeSopenharmony_ci  case DEFAULT:  return "'default'";
80695b41eeSopenharmony_ci  case EQUALS:   return "'='";
81695b41eeSopenharmony_ci  case IDENT:    return "identifier";
82695b41eeSopenharmony_ci  case INCLUDE:  return "'include'";
83695b41eeSopenharmony_ci  case INDENT:   return "indent";
84695b41eeSopenharmony_ci  case NEWLINE:  return "newline";
85695b41eeSopenharmony_ci  case PIPE2:    return "'||'";
86695b41eeSopenharmony_ci  case PIPE:     return "'|'";
87695b41eeSopenharmony_ci  case PIPEAT:   return "'|@'";
88695b41eeSopenharmony_ci  case POOL:     return "'pool'";
89695b41eeSopenharmony_ci  case RULE:     return "'rule'";
90695b41eeSopenharmony_ci  case SUBNINJA: return "'subninja'";
91695b41eeSopenharmony_ci  case TEOF:     return "eof";
92695b41eeSopenharmony_ci  }
93695b41eeSopenharmony_ci  return NULL;  // not reached
94695b41eeSopenharmony_ci}
95695b41eeSopenharmony_ci
96695b41eeSopenharmony_ciconst char* Lexer::TokenErrorHint(Token expected) {
97695b41eeSopenharmony_ci  switch (expected) {
98695b41eeSopenharmony_ci  case COLON:
99695b41eeSopenharmony_ci    return " ($ also escapes ':')";
100695b41eeSopenharmony_ci  default:
101695b41eeSopenharmony_ci    return "";
102695b41eeSopenharmony_ci  }
103695b41eeSopenharmony_ci}
104695b41eeSopenharmony_ci
105695b41eeSopenharmony_cistring Lexer::DescribeLastError() {
106695b41eeSopenharmony_ci  if (last_token_) {
107695b41eeSopenharmony_ci    switch (last_token_[0]) {
108695b41eeSopenharmony_ci    case '\t':
109695b41eeSopenharmony_ci      return "tabs are not allowed, use spaces";
110695b41eeSopenharmony_ci    }
111695b41eeSopenharmony_ci  }
112695b41eeSopenharmony_ci  return "lexing error";
113695b41eeSopenharmony_ci}
114695b41eeSopenharmony_ci
115695b41eeSopenharmony_civoid Lexer::UnreadToken() {
116695b41eeSopenharmony_ci  ofs_ = last_token_;
117695b41eeSopenharmony_ci}
118695b41eeSopenharmony_ci
119695b41eeSopenharmony_ciLexer::Token Lexer::ReadToken() {
120695b41eeSopenharmony_ci  const char* p = ofs_;
121695b41eeSopenharmony_ci  const char* q;
122695b41eeSopenharmony_ci  const char* start;
123695b41eeSopenharmony_ci  Lexer::Token token;
124695b41eeSopenharmony_ci  for (;;) {
125695b41eeSopenharmony_ci    start = p;
126695b41eeSopenharmony_ci    /*!re2c
127695b41eeSopenharmony_ci    re2c:define:YYCTYPE = "unsigned char";
128695b41eeSopenharmony_ci    re2c:define:YYCURSOR = p;
129695b41eeSopenharmony_ci    re2c:define:YYMARKER = q;
130695b41eeSopenharmony_ci    re2c:yyfill:enable = 0;
131695b41eeSopenharmony_ci
132695b41eeSopenharmony_ci    nul = "\000";
133695b41eeSopenharmony_ci    simple_varname = [a-zA-Z0-9_-]+;
134695b41eeSopenharmony_ci    varname = [a-zA-Z0-9_.-]+;
135695b41eeSopenharmony_ci
136695b41eeSopenharmony_ci    [ ]*"#"[^\000\n]*"\n" { continue; }
137695b41eeSopenharmony_ci    [ ]*"\r\n" { token = NEWLINE;  break; }
138695b41eeSopenharmony_ci    [ ]*"\n"   { token = NEWLINE;  break; }
139695b41eeSopenharmony_ci    [ ]+       { token = INDENT;   break; }
140695b41eeSopenharmony_ci    "build"    { token = BUILD;    break; }
141695b41eeSopenharmony_ci    "pool"     { token = POOL;     break; }
142695b41eeSopenharmony_ci    "rule"     { token = RULE;     break; }
143695b41eeSopenharmony_ci    "default"  { token = DEFAULT;  break; }
144695b41eeSopenharmony_ci    "="        { token = EQUALS;   break; }
145695b41eeSopenharmony_ci    ":"        { token = COLON;    break; }
146695b41eeSopenharmony_ci    "|@"       { token = PIPEAT;   break; }
147695b41eeSopenharmony_ci    "||"       { token = PIPE2;    break; }
148695b41eeSopenharmony_ci    "|"        { token = PIPE;     break; }
149695b41eeSopenharmony_ci    "include"  { token = INCLUDE;  break; }
150695b41eeSopenharmony_ci    "subninja" { token = SUBNINJA; break; }
151695b41eeSopenharmony_ci    varname    { token = IDENT;    break; }
152695b41eeSopenharmony_ci    nul        { token = TEOF;     break; }
153695b41eeSopenharmony_ci    [^]        { token = ERROR;    break; }
154695b41eeSopenharmony_ci    */
155695b41eeSopenharmony_ci  }
156695b41eeSopenharmony_ci
157695b41eeSopenharmony_ci  last_token_ = start;
158695b41eeSopenharmony_ci  ofs_ = p;
159695b41eeSopenharmony_ci  if (token != NEWLINE && token != TEOF)
160695b41eeSopenharmony_ci    EatWhitespace();
161695b41eeSopenharmony_ci  return token;
162695b41eeSopenharmony_ci}
163695b41eeSopenharmony_ci
164695b41eeSopenharmony_cibool Lexer::PeekToken(Token token) {
165695b41eeSopenharmony_ci  Token t = ReadToken();
166695b41eeSopenharmony_ci  if (t == token)
167695b41eeSopenharmony_ci    return true;
168695b41eeSopenharmony_ci  UnreadToken();
169695b41eeSopenharmony_ci  return false;
170695b41eeSopenharmony_ci}
171695b41eeSopenharmony_ci
172695b41eeSopenharmony_civoid Lexer::EatWhitespace() {
173695b41eeSopenharmony_ci  const char* p = ofs_;
174695b41eeSopenharmony_ci  const char* q;
175695b41eeSopenharmony_ci  for (;;) {
176695b41eeSopenharmony_ci    ofs_ = p;
177695b41eeSopenharmony_ci    /*!re2c
178695b41eeSopenharmony_ci    [ ]+    { continue; }
179695b41eeSopenharmony_ci    "$\r\n" { continue; }
180695b41eeSopenharmony_ci    "$\n"   { continue; }
181695b41eeSopenharmony_ci    nul     { break; }
182695b41eeSopenharmony_ci    [^]     { break; }
183695b41eeSopenharmony_ci    */
184695b41eeSopenharmony_ci  }
185695b41eeSopenharmony_ci}
186695b41eeSopenharmony_ci
187695b41eeSopenharmony_cibool Lexer::ReadIdent(string* out) {
188695b41eeSopenharmony_ci  const char* p = ofs_;
189695b41eeSopenharmony_ci  const char* start;
190695b41eeSopenharmony_ci  for (;;) {
191695b41eeSopenharmony_ci    start = p;
192695b41eeSopenharmony_ci    /*!re2c
193695b41eeSopenharmony_ci    varname {
194695b41eeSopenharmony_ci      out->assign(start, p - start);
195695b41eeSopenharmony_ci      break;
196695b41eeSopenharmony_ci    }
197695b41eeSopenharmony_ci    [^] {
198695b41eeSopenharmony_ci      last_token_ = start;
199695b41eeSopenharmony_ci      return false;
200695b41eeSopenharmony_ci    }
201695b41eeSopenharmony_ci    */
202695b41eeSopenharmony_ci  }
203695b41eeSopenharmony_ci  last_token_ = start;
204695b41eeSopenharmony_ci  ofs_ = p;
205695b41eeSopenharmony_ci  EatWhitespace();
206695b41eeSopenharmony_ci  return true;
207695b41eeSopenharmony_ci}
208695b41eeSopenharmony_ci
209695b41eeSopenharmony_cibool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
210695b41eeSopenharmony_ci  const char* p = ofs_;
211695b41eeSopenharmony_ci  const char* q;
212695b41eeSopenharmony_ci  const char* start;
213695b41eeSopenharmony_ci  for (;;) {
214695b41eeSopenharmony_ci    start = p;
215695b41eeSopenharmony_ci    /*!re2c
216695b41eeSopenharmony_ci    [^$ :\r\n|\000]+ {
217695b41eeSopenharmony_ci      eval->AddText(StringPiece(start, p - start));
218695b41eeSopenharmony_ci      continue;
219695b41eeSopenharmony_ci    }
220695b41eeSopenharmony_ci    "\r\n" {
221695b41eeSopenharmony_ci      if (path)
222695b41eeSopenharmony_ci        p = start;
223695b41eeSopenharmony_ci      break;
224695b41eeSopenharmony_ci    }
225695b41eeSopenharmony_ci    [ :|\n] {
226695b41eeSopenharmony_ci      if (path) {
227695b41eeSopenharmony_ci        p = start;
228695b41eeSopenharmony_ci        break;
229695b41eeSopenharmony_ci      } else {
230695b41eeSopenharmony_ci        if (*start == '\n')
231695b41eeSopenharmony_ci          break;
232695b41eeSopenharmony_ci        eval->AddText(StringPiece(start, 1));
233695b41eeSopenharmony_ci        continue;
234695b41eeSopenharmony_ci      }
235695b41eeSopenharmony_ci    }
236695b41eeSopenharmony_ci    "$$" {
237695b41eeSopenharmony_ci      eval->AddText(StringPiece("$", 1));
238695b41eeSopenharmony_ci      continue;
239695b41eeSopenharmony_ci    }
240695b41eeSopenharmony_ci    "$ " {
241695b41eeSopenharmony_ci      eval->AddText(StringPiece(" ", 1));
242695b41eeSopenharmony_ci      continue;
243695b41eeSopenharmony_ci    }
244695b41eeSopenharmony_ci    "$\r\n"[ ]* {
245695b41eeSopenharmony_ci      continue;
246695b41eeSopenharmony_ci    }
247695b41eeSopenharmony_ci    "$\n"[ ]* {
248695b41eeSopenharmony_ci      continue;
249695b41eeSopenharmony_ci    }
250695b41eeSopenharmony_ci    "${"varname"}" {
251695b41eeSopenharmony_ci      eval->AddSpecial(StringPiece(start + 2, p - start - 3));
252695b41eeSopenharmony_ci      continue;
253695b41eeSopenharmony_ci    }
254695b41eeSopenharmony_ci    "$"simple_varname {
255695b41eeSopenharmony_ci      eval->AddSpecial(StringPiece(start + 1, p - start - 1));
256695b41eeSopenharmony_ci      continue;
257695b41eeSopenharmony_ci    }
258695b41eeSopenharmony_ci    "$:" {
259695b41eeSopenharmony_ci      eval->AddText(StringPiece(":", 1));
260695b41eeSopenharmony_ci      continue;
261695b41eeSopenharmony_ci    }
262695b41eeSopenharmony_ci    "$". {
263695b41eeSopenharmony_ci      last_token_ = start;
264695b41eeSopenharmony_ci      return Error("bad $-escape (literal $ must be written as $$)", err);
265695b41eeSopenharmony_ci    }
266695b41eeSopenharmony_ci    nul {
267695b41eeSopenharmony_ci      last_token_ = start;
268695b41eeSopenharmony_ci      return Error("unexpected EOF", err);
269695b41eeSopenharmony_ci    }
270695b41eeSopenharmony_ci    [^] {
271695b41eeSopenharmony_ci      last_token_ = start;
272695b41eeSopenharmony_ci      return Error(DescribeLastError(), err);
273695b41eeSopenharmony_ci    }
274695b41eeSopenharmony_ci    */
275695b41eeSopenharmony_ci  }
276695b41eeSopenharmony_ci  last_token_ = start;
277695b41eeSopenharmony_ci  ofs_ = p;
278695b41eeSopenharmony_ci  if (path)
279695b41eeSopenharmony_ci    EatWhitespace();
280695b41eeSopenharmony_ci  // Non-path strings end in newlines, so there's no whitespace to eat.
281695b41eeSopenharmony_ci  return true;
282695b41eeSopenharmony_ci}
283