1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //     http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "depfile_parser.h"
16 #include "util.h"
17 
18 #include <algorithm>
19 
20 using namespace std;
21 
DepfileParser(DepfileParserOptions options)22 DepfileParser::DepfileParser(DepfileParserOptions options)
23     : options_(options)
24 {
25 }
26 
27 // A note on backslashes in Makefiles, from reading the docs:
28 // Backslash-newline is the line continuation character.
29 // Backslash-# escapes a # (otherwise meaningful as a comment start).
30 // Backslash-% escapes a % (otherwise meaningful as a special).
31 // Finally, quoting the GNU manual, "Backslashes that are not in danger
32 // of quoting ‘%’ characters go unmolested."
33 // How do you end a line with a backslash?  The netbsd Make docs suggest
34 // reading the result of a shell command echoing a backslash!
35 //
36 // Rather than implement all of above, we follow what GCC/Clang produces:
37 // Backslashes escape a space or hash sign.
38 // When a space is preceded by 2N+1 backslashes, it is represents N backslashes
39 // followed by space.
40 // When a space is preceded by 2N backslashes, it represents 2N backslashes at
41 // the end of a filename.
42 // A hash sign is escaped by a single backslash. All other backslashes remain
43 // unchanged.
44 //
45 // If anyone actually has depfiles that rely on the more complicated
46 // behavior we can adjust this.
Parse(string* content, string* err)47 bool DepfileParser::Parse(string* content, string* err) {
48     // in: current parser input point.
49     // end: end of input.
50     // parsing_targets: whether we are parsing targets or dependencies.
51     char* in = &(*content)[0];
52     char* end = in + content->size();
53     bool have_target = false;
54     bool parsing_targets = true;
55     bool poisoned_input = false;
56     bool is_empty = true;
57     while (in < end) {
58         bool have_newline = false;
59         // out: current output point (typically same as in, but can fall behind
60         // as we de-escape backslashes).
61         char* out = in;
62         // filename: start of the current parsed filename.
63         char* filename = out;
64         for (;;) {
65             // start: beginning of the current parsed span.
66             const char* start = in;
67             char* yymarker = NULL;
68             /*!re2c
69             re2c:define:YYCTYPE = "unsigned char";
70             re2c:define:YYCURSOR = in;
71             re2c:define:YYLIMIT = end;
72             re2c:define:YYMARKER = yymarker;
73 
74             re2c:yyfill:enable = 0;
75 
76             re2c:indent:top = 2;
77             re2c:indent:string = "  ";
78 
79             nul = "\000";
80             newline = '\r'?'\n';
81 
82             '\\\\'* '\\ ' {
83               // 2N+1 backslashes plus space -> N backslashes plus space.
84               int len = (int)(in - start);
85               int n = len / 2 - 1;
86               if (out < start)
87                 memset(out, '\\', n);
88               out += n;
89               *out++ = ' ';
90               continue;
91             }
92             '\\\\'+ ' ' {
93               // 2N backslashes plus space -> 2N backslashes, end of filename.
94               int len = (int)(in - start);
95               if (out < start)
96                 memset(out, '\\', len - 1);
97               out += len - 1;
98               break;
99             }
100             '\\'+ '#' {
101               // De-escape hash sign, but preserve other leading backslashes.
102               int len = (int)(in - start);
103               if (len > 2 && out < start)
104                 memset(out, '\\', len - 2);
105               out += len - 2;
106               *out++ = '#';
107               continue;
108             }
109             '\\'+ ':' [\x00\x20\r\n\t] {
110               // Backslash followed by : and whitespace.
111               // It is therefore normal text and not an escaped colon
112               int len = (int)(in - start - 1);
113               // Need to shift it over if we're overwriting backslashes.
114               if (out < start)
115                 memmove(out, start, len);
116               out += len;
117               if (*(in - 1) == '\n')
118                 have_newline = true;
119               break;
120             }
121             '\\'+ ':' {
122               // De-escape colon sign, but preserve other leading backslashes.
123               // Regular expression uses lookahead to make sure that no whitespace
124               // nor EOF follows. In that case it'd be the : at the end of a target
125               int len = (int)(in - start);
126               if (len > 2 && out < start)
127                 memset(out, '\\', len - 2);
128               out += len - 2;
129               *out++ = ':';
130               continue;
131             }
132             '$$' {
133               // De-escape dollar character.
134               *out++ = '$';
135               continue;
136             }
137             '\\'+ [^\000\r\n] | [a-zA-Z0-9+,/_:.~()}{%=@\x5B\x5D!\x80-\xFF-]+ {
138               // Got a span of plain text.
139               int len = (int)(in - start);
140               // Need to shift it over if we're overwriting backslashes.
141               if (out < start)
142                 memmove(out, start, len);
143               out += len;
144               continue;
145             }
146             nul {
147               break;
148             }
149             '\\' newline {
150               // A line continuation ends the current file name.
151               break;
152             }
153             newline {
154               // A newline ends the current file name and the current rule.
155               have_newline = true;
156               break;
157             }
158             [^] {
159               // For any other character (e.g. whitespace), swallow it here,
160               // allowing the outer logic to loop around again.
161               break;
162             }
163             */
164         }
165 
166         int len = (int)(out - filename);
167         const bool is_dependency = !parsing_targets;
168         if (len > 0 && filename[len - 1] == ':') {
169             len--;  // Strip off trailing colon, if any.
170             parsing_targets = false;
171             have_target = true;
172         }
173 
174         if (len > 0) {
175             is_empty = false;
176             StringPiece piece = StringPiece(filename, len);
177             // If we've seen this as an input before, skip it.
178             std::vector<StringPiece>::iterator pos = std::find(ins_.begin(), ins_.end(), piece);
179             if (pos == ins_.end()) {
180                 if (is_dependency) {
181                     if (poisoned_input) {
182                         *err = "inputs may not also have inputs";
183                         return false;
184                     }
185                     // New input.
186                     ins_.push_back(piece);
187                 } else {
188                     // Check for a new output.
189                     if (std::find(outs_.begin(), outs_.end(), piece) == outs_.end())
190                         outs_.push_back(piece);
191                 }
192             } else if (!is_dependency) {
193                 // We've passed an input on the left side; reject new inputs.
194                 poisoned_input = true;
195             }
196         }
197 
198         if (have_newline) {
199             // A newline ends a rule so the next filename will be a new target.
200             parsing_targets = true;
201             poisoned_input = false;
202         }
203     }
204     if (!have_target && !is_empty) {
205         *err = "expected ':' in depfile";
206         return false;
207     }
208     return true;
209 }
210