1 /* Generated by re2c */
2 // Copyright 2011 Google Inc. All Rights Reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 // http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15
16 #include "depfile_parser.h"
17 #include "util.h"
18
19 #include <algorithm>
20
21 using namespace std;
22
DepfileParser(DepfileParserOptions options)23 DepfileParser::DepfileParser(DepfileParserOptions options)
24 : options_(options)
25 {
26 }
27
28 // A note on backslashes in Makefiles, from reading the docs:
29 // Backslash-newline is the line continuation character.
30 // Backslash-# escapes a # (otherwise meaningful as a comment start).
31 // Backslash-% escapes a % (otherwise meaningful as a special).
32 // Finally, quoting the GNU manual, "Backslashes that are not in danger
33 // of quoting ‘%’ characters go unmolested."
34 // How do you end a line with a backslash? The netbsd Make docs suggest
35 // reading the result of a shell command echoing a backslash!
36 //
37 // Rather than implement all of above, we follow what GCC/Clang produces:
38 // Backslashes escape a space or hash sign.
39 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
40 // followed by space.
41 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
42 // the end of a filename.
43 // A hash sign is escaped by a single backslash. All other backslashes remain
44 // unchanged.
45 //
46 // If anyone actually has depfiles that rely on the more complicated
47 // behavior we can adjust this.
Parse(string* content, string* err)48 bool DepfileParser::Parse(string* content, string* err) {
49 // in: current parser input point.
50 // end: end of input.
51 // parsing_targets: whether we are parsing targets or dependencies.
52 char* in = &(*content)[0];
53 char* end = in + content->size();
54 bool have_target = false;
55 bool parsing_targets = true;
56 bool poisoned_input = false;
57 bool is_empty = true;
58 while (in < end) {
59 bool have_newline = false;
60 // out: current output point (typically same as in, but can fall behind
61 // as we de-escape backslashes).
62 char* out = in;
63 // filename: start of the current parsed filename.
64 char* filename = out;
65 for (;;) {
66 // start: beginning of the current parsed span.
67 const char* start = in;
68 char* yymarker = NULL;
69
70 {
71 unsigned char yych;
72 static const unsigned char yybm[] = {
73 0, 0, 0, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0,
76 0, 0, 0, 0, 0, 0, 0, 0,
77 0, 128, 0, 0, 0, 128, 0, 0,
78 128, 128, 0, 128, 128, 128, 128, 128,
79 128, 128, 128, 128, 128, 128, 128, 128,
80 128, 128, 128, 0, 0, 128, 0, 0,
81 128, 128, 128, 128, 128, 128, 128, 128,
82 128, 128, 128, 128, 128, 128, 128, 128,
83 128, 128, 128, 128, 128, 128, 128, 128,
84 128, 128, 128, 128, 0, 128, 0, 128,
85 0, 128, 128, 128, 128, 128, 128, 128,
86 128, 128, 128, 128, 128, 128, 128, 128,
87 128, 128, 128, 128, 128, 128, 128, 128,
88 128, 128, 128, 128, 0, 128, 128, 0,
89 128, 128, 128, 128, 128, 128, 128, 128,
90 128, 128, 128, 128, 128, 128, 128, 128,
91 128, 128, 128, 128, 128, 128, 128, 128,
92 128, 128, 128, 128, 128, 128, 128, 128,
93 128, 128, 128, 128, 128, 128, 128, 128,
94 128, 128, 128, 128, 128, 128, 128, 128,
95 128, 128, 128, 128, 128, 128, 128, 128,
96 128, 128, 128, 128, 128, 128, 128, 128,
97 128, 128, 128, 128, 128, 128, 128, 128,
98 128, 128, 128, 128, 128, 128, 128, 128,
99 128, 128, 128, 128, 128, 128, 128, 128,
100 128, 128, 128, 128, 128, 128, 128, 128,
101 128, 128, 128, 128, 128, 128, 128, 128,
102 128, 128, 128, 128, 128, 128, 128, 128,
103 128, 128, 128, 128, 128, 128, 128, 128,
104 128, 128, 128, 128, 128, 128, 128, 128,
105 };
106 yych = *in;
107 if (yybm[0+yych] & 128) {
108 goto yy9;
109 }
110 if (yych <= '\r') {
111 if (yych <= '\t') {
112 if (yych >= 0x01) goto yy4;
113 } else {
114 if (yych <= '\n') goto yy6;
115 if (yych <= '\f') goto yy4;
116 goto yy8;
117 }
118 } else {
119 if (yych <= '$') {
120 if (yych <= '#') goto yy4;
121 goto yy12;
122 } else {
123 if (yych <= '?') goto yy4;
124 if (yych <= '\\') goto yy13;
125 goto yy4;
126 }
127 }
128 ++in;
129 {
130 break;
131 }
132 yy4:
133 ++in;
134 yy5:
135 {
136 // For any other character (e.g. whitespace), swallow it here,
137 // allowing the outer logic to loop around again.
138 break;
139 }
140 yy6:
141 ++in;
142 {
143 // A newline ends the current file name and the current rule.
144 have_newline = true;
145 break;
146 }
147 yy8:
148 yych = *++in;
149 if (yych == '\n') goto yy6;
150 goto yy5;
151 yy9:
152 yych = *++in;
153 if (yybm[0+yych] & 128) {
154 goto yy9;
155 }
156 yy11:
157 {
158 // Got a span of plain text.
159 int len = (int)(in - start);
160 // Need to shift it over if we're overwriting backslashes.
161 if (out < start)
162 memmove(out, start, len);
163 out += len;
164 continue;
165 }
166 yy12:
167 yych = *++in;
168 if (yych == '$') goto yy14;
169 goto yy5;
170 yy13:
171 yych = *(yymarker = ++in);
172 if (yych <= ' ') {
173 if (yych <= '\n') {
174 if (yych <= 0x00) goto yy5;
175 if (yych <= '\t') goto yy16;
176 goto yy17;
177 } else {
178 if (yych == '\r') goto yy19;
179 if (yych <= 0x1F) goto yy16;
180 goto yy21;
181 }
182 } else {
183 if (yych <= '9') {
184 if (yych == '#') goto yy23;
185 goto yy16;
186 } else {
187 if (yych <= ':') goto yy25;
188 if (yych == '\\') goto yy27;
189 goto yy16;
190 }
191 }
192 yy14:
193 ++in;
194 {
195 // De-escape dollar character.
196 *out++ = '$';
197 continue;
198 }
199 yy16:
200 ++in;
201 goto yy11;
202 yy17:
203 ++in;
204 {
205 // A line continuation ends the current file name.
206 break;
207 }
208 yy19:
209 yych = *++in;
210 if (yych == '\n') goto yy17;
211 in = yymarker;
212 goto yy5;
213 yy21:
214 ++in;
215 {
216 // 2N+1 backslashes plus space -> N backslashes plus space.
217 int len = (int)(in - start);
218 int n = len / 2 - 1;
219 if (out < start)
220 memset(out, '\\', n);
221 out += n;
222 *out++ = ' ';
223 continue;
224 }
225 yy23:
226 ++in;
227 {
228 // De-escape hash sign, but preserve other leading backslashes.
229 int len = (int)(in - start);
230 if (len > 2 && out < start)
231 memset(out, '\\', len - 2);
232 out += len - 2;
233 *out++ = '#';
234 continue;
235 }
236 yy25:
237 yych = *++in;
238 if (yych <= '\f') {
239 if (yych <= 0x00) goto yy28;
240 if (yych <= 0x08) goto yy26;
241 if (yych <= '\n') goto yy28;
242 } else {
243 if (yych <= '\r') goto yy28;
244 if (yych == ' ') goto yy28;
245 }
246 yy26:
247 {
248 // De-escape colon sign, but preserve other leading backslashes.
249 // Regular expression uses lookahead to make sure that no whitespace
250 // nor EOF follows. In that case it'd be the : at the end of a target
251 int len = (int)(in - start);
252 if (len > 2 && out < start)
253 memset(out, '\\', len - 2);
254 out += len - 2;
255 *out++ = ':';
256 continue;
257 }
258 yy27:
259 yych = *++in;
260 if (yych <= ' ') {
261 if (yych <= '\n') {
262 if (yych <= 0x00) goto yy11;
263 if (yych <= '\t') goto yy16;
264 goto yy11;
265 } else {
266 if (yych == '\r') goto yy11;
267 if (yych <= 0x1F) goto yy16;
268 goto yy30;
269 }
270 } else {
271 if (yych <= '9') {
272 if (yych == '#') goto yy23;
273 goto yy16;
274 } else {
275 if (yych <= ':') goto yy25;
276 if (yych == '\\') goto yy32;
277 goto yy16;
278 }
279 }
280 yy28:
281 ++in;
282 {
283 // Backslash followed by : and whitespace.
284 // It is therefore normal text and not an escaped colon
285 int len = (int)(in - start - 1);
286 // Need to shift it over if we're overwriting backslashes.
287 if (out < start)
288 memmove(out, start, len);
289 out += len;
290 if (*(in - 1) == '\n')
291 have_newline = true;
292 break;
293 }
294 yy30:
295 ++in;
296 {
297 // 2N backslashes plus space -> 2N backslashes, end of filename.
298 int len = (int)(in - start);
299 if (out < start)
300 memset(out, '\\', len - 1);
301 out += len - 1;
302 break;
303 }
304 yy32:
305 yych = *++in;
306 if (yych <= ' ') {
307 if (yych <= '\n') {
308 if (yych <= 0x00) goto yy11;
309 if (yych <= '\t') goto yy16;
310 goto yy11;
311 } else {
312 if (yych == '\r') goto yy11;
313 if (yych <= 0x1F) goto yy16;
314 goto yy21;
315 }
316 } else {
317 if (yych <= '9') {
318 if (yych == '#') goto yy23;
319 goto yy16;
320 } else {
321 if (yych <= ':') goto yy25;
322 if (yych == '\\') goto yy27;
323 goto yy16;
324 }
325 }
326 }
327 }
328
329 int len = (int)(out - filename);
330 const bool is_dependency = !parsing_targets;
331 if (len > 0 && filename[len - 1] == ':') {
332 len--; // Strip off trailing colon, if any.
333 parsing_targets = false;
334 have_target = true;
335 }
336
337 if (len > 0) {
338 is_empty = false;
339 StringPiece piece = StringPiece(filename, len);
340 // If we've seen this as an input before, skip it.
341 std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
342 if (pos == ins_.end()) {
343 if (is_dependency) {
344 if (poisoned_input) {
345 *err = "inputs may not also have inputs";
346 return false;
347 }
348 // New input.
349 ins_.push_back(piece);
350 } else {
351 // Check for a new output.
352 if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
353 outs_.push_back(piece);
354 }
355 } else if (!is_dependency) {
356 // We've passed an input on the left side; reject new inputs.
357 poisoned_input = true;
358 }
359 }
360
361 if (have_newline) {
362 // A newline ends a rule so the next filename will be a new target.
363 parsing_targets = true;
364 poisoned_input = false;
365 }
366 }
367 if (!have_target && !is_empty) {
368 *err = "expected ':' in depfile";
369 return false;
370 }
371 return true;
372 }
373