1 /* Generated by re2c */
2 // Copyright 2011 Google Inc. All Rights Reserved.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //     http://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 
16 #include "depfile_parser.h"
17 #include "util.h"
18 
19 #include <algorithm>
20 
21 using namespace std;
22 
DepfileParser(DepfileParserOptions options)23 DepfileParser::DepfileParser(DepfileParserOptions options)
24     : options_(options)
25 {
26 }
27 
28 // A note on backslashes in Makefiles, from reading the docs:
29 // Backslash-newline is the line continuation character.
30 // Backslash-# escapes a # (otherwise meaningful as a comment start).
31 // Backslash-% escapes a % (otherwise meaningful as a special).
32 // Finally, quoting the GNU manual, "Backslashes that are not in danger
33 // of quoting ‘%’ characters go unmolested."
34 // How do you end a line with a backslash?  The netbsd Make docs suggest
35 // reading the result of a shell command echoing a backslash!
36 //
37 // Rather than implement all of above, we follow what GCC/Clang produces:
38 // Backslashes escape a space or hash sign.
39 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
40 // followed by space.
41 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
42 // the end of a filename.
43 // A hash sign is escaped by a single backslash. All other backslashes remain
44 // unchanged.
45 //
46 // If anyone actually has depfiles that rely on the more complicated
47 // behavior we can adjust this.
Parse(string* content, string* err)48 bool DepfileParser::Parse(string* content, string* err) {
49     // in: current parser input point.
50     // end: end of input.
51     // parsing_targets: whether we are parsing targets or dependencies.
52     char* in = &(*content)[0];
53     char* end = in + content->size();
54     bool have_target = false;
55     bool parsing_targets = true;
56     bool poisoned_input = false;
57     bool is_empty = true;
58     while (in < end) {
59         bool have_newline = false;
60         // out: current output point (typically same as in, but can fall behind
61         // as we de-escape backslashes).
62         char* out = in;
63         // filename: start of the current parsed filename.
64         char* filename = out;
65         for (;;) {
66             // start: beginning of the current parsed span.
67             const char* start = in;
68             char* yymarker = NULL;
69 
70         {
71             unsigned char yych;
72             static const unsigned char yybm[] = {
73                 0,   0,   0,   0,   0,   0,   0,   0,
74                 0,   0,   0,   0,   0,   0,   0,   0,
75                 0,   0,   0,   0,   0,   0,   0,   0,
76                 0,   0,   0,   0,   0,   0,   0,   0,
77                 0, 128,   0,   0,   0, 128,   0,   0,
78               128, 128,   0, 128, 128, 128, 128, 128,
79               128, 128, 128, 128, 128, 128, 128, 128,
80               128, 128, 128,   0,   0, 128,   0,   0,
81               128, 128, 128, 128, 128, 128, 128, 128,
82               128, 128, 128, 128, 128, 128, 128, 128,
83               128, 128, 128, 128, 128, 128, 128, 128,
84               128, 128, 128, 128,   0, 128,   0, 128,
85                 0, 128, 128, 128, 128, 128, 128, 128,
86               128, 128, 128, 128, 128, 128, 128, 128,
87               128, 128, 128, 128, 128, 128, 128, 128,
88               128, 128, 128, 128,   0, 128, 128,   0,
89               128, 128, 128, 128, 128, 128, 128, 128,
90               128, 128, 128, 128, 128, 128, 128, 128,
91               128, 128, 128, 128, 128, 128, 128, 128,
92               128, 128, 128, 128, 128, 128, 128, 128,
93               128, 128, 128, 128, 128, 128, 128, 128,
94               128, 128, 128, 128, 128, 128, 128, 128,
95               128, 128, 128, 128, 128, 128, 128, 128,
96               128, 128, 128, 128, 128, 128, 128, 128,
97               128, 128, 128, 128, 128, 128, 128, 128,
98               128, 128, 128, 128, 128, 128, 128, 128,
99               128, 128, 128, 128, 128, 128, 128, 128,
100               128, 128, 128, 128, 128, 128, 128, 128,
101               128, 128, 128, 128, 128, 128, 128, 128,
102               128, 128, 128, 128, 128, 128, 128, 128,
103               128, 128, 128, 128, 128, 128, 128, 128,
104               128, 128, 128, 128, 128, 128, 128, 128,
105           };
106           yych = *in;
107           if (yybm[0+yych] & 128) {
108               goto yy9;
109           }
110           if (yych <= '\r') {
111               if (yych <= '\t') {
112                   if (yych >= 0x01) goto yy4;
113               } else {
114                   if (yych <= '\n') goto yy6;
115                   if (yych <= '\f') goto yy4;
116                   goto yy8;
117               }
118           } else {
119               if (yych <= '$') {
120                   if (yych <= '#') goto yy4;
121                   goto yy12;
122               } else {
123                   if (yych <= '?') goto yy4;
124                   if (yych <= '\\') goto yy13;
125                   goto yy4;
126               }
127           }
128           ++in;
129           {
130               break;
131           }
132     yy4:
133         ++in;
134     yy5:
135         {
136             // For any other character (e.g. whitespace), swallow it here,
137             // allowing the outer logic to loop around again.
138             break;
139         }
140     yy6:
141         ++in;
142         {
143             // A newline ends the current file name and the current rule.
144             have_newline = true;
145             break;
146         }
147     yy8:
148         yych = *++in;
149         if (yych == '\n') goto yy6;
150         goto yy5;
151     yy9:
152         yych = *++in;
153         if (yybm[0+yych] & 128) {
154             goto yy9;
155         }
156     yy11:
157         {
158             // Got a span of plain text.
159             int len = (int)(in - start);
160             // Need to shift it over if we're overwriting backslashes.
161             if (out < start)
162                 memmove(out, start, len);
163             out += len;
164             continue;
165         }
166     yy12:
167         yych = *++in;
168         if (yych == '$') goto yy14;
169         goto yy5;
170     yy13:
171         yych = *(yymarker = ++in);
172         if (yych <= ' ') {
173             if (yych <= '\n') {
174                 if (yych <= 0x00) goto yy5;
175                 if (yych <= '\t') goto yy16;
176                 goto yy17;
177             } else {
178                 if (yych == '\r') goto yy19;
179                 if (yych <= 0x1F) goto yy16;
180                 goto yy21;
181             }
182         } else {
183             if (yych <= '9') {
184                 if (yych == '#') goto yy23;
185                 goto yy16;
186             } else {
187                 if (yych <= ':') goto yy25;
188                 if (yych == '\\') goto yy27;
189                 goto yy16;
190             }
191         }
192     yy14:
193         ++in;
194         {
195             // De-escape dollar character.
196             *out++ = '$';
197             continue;
198         }
199     yy16:
200         ++in;
201         goto yy11;
202     yy17:
203         ++in;
204         {
205             // A line continuation ends the current file name.
206             break;
207         }
208     yy19:
209         yych = *++in;
210         if (yych == '\n') goto yy17;
211         in = yymarker;
212         goto yy5;
213     yy21:
214         ++in;
215         {
216             // 2N+1 backslashes plus space -> N backslashes plus space.
217             int len = (int)(in - start);
218             int n = len / 2 - 1;
219             if (out < start)
220                 memset(out, '\\', n);
221             out += n;
222             *out++ = ' ';
223             continue;
224         }
225     yy23:
226         ++in;
227         {
228             // De-escape hash sign, but preserve other leading backslashes.
229             int len = (int)(in - start);
230             if (len > 2 && out < start)
231                 memset(out, '\\', len - 2);
232             out += len - 2;
233             *out++ = '#';
234             continue;
235         }
236     yy25:
237         yych = *++in;
238         if (yych <= '\f') {
239             if (yych <= 0x00) goto yy28;
240             if (yych <= 0x08) goto yy26;
241             if (yych <= '\n') goto yy28;
242         } else {
243             if (yych <= '\r') goto yy28;
244             if (yych == ' ') goto yy28;
245         }
246     yy26:
247         {
248             // De-escape colon sign, but preserve other leading backslashes.
249             // Regular expression uses lookahead to make sure that no whitespace
250             // nor EOF follows. In that case it'd be the : at the end of a target
251             int len = (int)(in - start);
252             if (len > 2 && out < start)
253                 memset(out, '\\', len - 2);
254             out += len - 2;
255             *out++ = ':';
256             continue;
257         }
258     yy27:
259         yych = *++in;
260         if (yych <= ' ') {
261             if (yych <= '\n') {
262                 if (yych <= 0x00) goto yy11;
263                 if (yych <= '\t') goto yy16;
264                 goto yy11;
265             } else {
266                 if (yych == '\r') goto yy11;
267                 if (yych <= 0x1F) goto yy16;
268                 goto yy30;
269             }
270         } else {
271             if (yych <= '9') {
272                 if (yych == '#') goto yy23;
273                 goto yy16;
274             } else {
275                 if (yych <= ':') goto yy25;
276                 if (yych == '\\') goto yy32;
277                 goto yy16;
278             }
279         }
280     yy28:
281         ++in;
282         {
283             // Backslash followed by : and whitespace.
284             // It is therefore normal text and not an escaped colon
285             int len = (int)(in - start - 1);
286             // Need to shift it over if we're overwriting backslashes.
287             if (out < start)
288                 memmove(out, start, len);
289             out += len;
290             if (*(in - 1) == '\n')
291                 have_newline = true;
292             break;
293         }
294     yy30:
295         ++in;
296         {
297             // 2N backslashes plus space -> 2N backslashes, end of filename.
298             int len = (int)(in - start);
299             if (out < start)
300                 memset(out, '\\', len - 1);
301             out += len - 1;
302             break;
303         }
304     yy32:
305         yych = *++in;
306         if (yych <= ' ') {
307             if (yych <= '\n') {
308                 if (yych <= 0x00) goto yy11;
309                 if (yych <= '\t') goto yy16;
310                 goto yy11;
311             } else {
312                 if (yych == '\r') goto yy11;
313                 if (yych <= 0x1F) goto yy16;
314                 goto yy21;
315             }
316         } else {
317             if (yych <= '9') {
318                 if (yych == '#') goto yy23;
319                 goto yy16;
320             } else {
321                 if (yych <= ':') goto yy25;
322                 if (yych == '\\') goto yy27;
323                 goto yy16;
324             }
325         }
326     }
327 }
328 
329     int len = (int)(out - filename);
330     const bool is_dependency = !parsing_targets;
331     if (len > 0 && filename[len - 1] == ':') {
332         len--;  // Strip off trailing colon, if any.
333         parsing_targets = false;
334         have_target = true;
335     }
336 
337     if (len > 0) {
338         is_empty = false;
339         StringPiece piece = StringPiece(filename, len);
340         // If we've seen this as an input before, skip it.
341         std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
342         if (pos == ins_.end()) {
343             if (is_dependency) {
344                 if (poisoned_input) {
345                     *err = "inputs may not also have inputs";
346                     return false;
347                 }
348                 // New input.
349                 ins_.push_back(piece);
350             } else {
351                 // Check for a new output.
352                 if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
353                     outs_.push_back(piece);
354                 }
355             } else if (!is_dependency) {
356                 // We've passed an input on the left side; reject new inputs.
357                 poisoned_input = true;
358             }
359     }
360 
361     if (have_newline) {
362         // A newline ends a rule so the next filename will be a new target.
363         parsing_targets = true;
364         poisoned_input = false;
365     }
366 }
367     if (!have_target && !is_empty) {
368         *err = "expected ':' in depfile";
369         return false;
370     }
371     return true;
372 }
373