1c5f01b2fSopenharmony_ci#!/usr/bin/env python3
2c5f01b2fSopenharmony_ci# coding=utf-8
3c5f01b2fSopenharmony_ci
4c5f01b2fSopenharmony_ci# amalgamate.py - Amalgamate C source and header files.
5c5f01b2fSopenharmony_ci# Copyright (c) 2012, Erik Edlund <erik.edlund@32767.se>
6c5f01b2fSopenharmony_ci#
7c5f01b2fSopenharmony_ci# Redistribution and use in source and binary forms, with or without modification,
8c5f01b2fSopenharmony_ci# are permitted provided that the following conditions are met:
9c5f01b2fSopenharmony_ci#
10c5f01b2fSopenharmony_ci#  * Redistributions of source code must retain the above copyright notice,
11c5f01b2fSopenharmony_ci#  this list of conditions and the following disclaimer.
12c5f01b2fSopenharmony_ci#
13c5f01b2fSopenharmony_ci#  * Redistributions in binary form must reproduce the above copyright notice,
14c5f01b2fSopenharmony_ci#  this list of conditions and the following disclaimer in the documentation
15c5f01b2fSopenharmony_ci#  and/or other materials provided with the distribution.
16c5f01b2fSopenharmony_ci#
17c5f01b2fSopenharmony_ci#  * Neither the name of Erik Edlund, nor the names of its contributors may
18c5f01b2fSopenharmony_ci#  be used to endorse or promote products derived from this software without
19c5f01b2fSopenharmony_ci#  specific prior written permission.
20c5f01b2fSopenharmony_ci#
21c5f01b2fSopenharmony_ci# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
22c5f01b2fSopenharmony_ci# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
23c5f01b2fSopenharmony_ci# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24c5f01b2fSopenharmony_ci# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
25c5f01b2fSopenharmony_ci# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
26c5f01b2fSopenharmony_ci# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
27c5f01b2fSopenharmony_ci# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
28c5f01b2fSopenharmony_ci# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29c5f01b2fSopenharmony_ci# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30c5f01b2fSopenharmony_ci# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31c5f01b2fSopenharmony_ci
32c5f01b2fSopenharmony_cifrom __future__ import division
33c5f01b2fSopenharmony_cifrom __future__ import print_function
34c5f01b2fSopenharmony_cifrom __future__ import unicode_literals
35c5f01b2fSopenharmony_ci
36c5f01b2fSopenharmony_ciimport argparse
37c5f01b2fSopenharmony_ciimport datetime
38c5f01b2fSopenharmony_ciimport json
39c5f01b2fSopenharmony_ciimport os
40c5f01b2fSopenharmony_ciimport re
41c5f01b2fSopenharmony_ci
42c5f01b2fSopenharmony_ci
43c5f01b2fSopenharmony_ciclass Amalgamation(object):
44c5f01b2fSopenharmony_ci
45c5f01b2fSopenharmony_ci    # Prepends self.source_path to file_path if needed.
46c5f01b2fSopenharmony_ci    def actual_path(self, file_path):
47c5f01b2fSopenharmony_ci        if not os.path.isabs(file_path):
48c5f01b2fSopenharmony_ci            file_path = os.path.join(self.source_path, file_path)
49c5f01b2fSopenharmony_ci        return file_path
50c5f01b2fSopenharmony_ci
51c5f01b2fSopenharmony_ci    # Search included file_path in self.include_paths and
52c5f01b2fSopenharmony_ci    # in source_dir if specified.
53c5f01b2fSopenharmony_ci    def find_included_file(self, file_path, source_dir):
54c5f01b2fSopenharmony_ci        search_dirs = self.include_paths[:]
55c5f01b2fSopenharmony_ci        if source_dir:
56c5f01b2fSopenharmony_ci            search_dirs.insert(0, source_dir)
57c5f01b2fSopenharmony_ci
58c5f01b2fSopenharmony_ci        for search_dir in search_dirs:
59c5f01b2fSopenharmony_ci            search_path = os.path.join(search_dir, file_path)
60c5f01b2fSopenharmony_ci            if os.path.isfile(self.actual_path(search_path)):
61c5f01b2fSopenharmony_ci                return search_path
62c5f01b2fSopenharmony_ci        return None
63c5f01b2fSopenharmony_ci
64c5f01b2fSopenharmony_ci    def __init__(self, args):
65c5f01b2fSopenharmony_ci        with open(args.config, 'r') as f:
66c5f01b2fSopenharmony_ci            config = json.loads(f.read())
67c5f01b2fSopenharmony_ci            for key in config:
68c5f01b2fSopenharmony_ci                setattr(self, key, config[key])
69c5f01b2fSopenharmony_ci
70c5f01b2fSopenharmony_ci            self.verbose = args.verbose == "yes"
71c5f01b2fSopenharmony_ci            self.prologue = args.prologue
72c5f01b2fSopenharmony_ci            self.source_path = args.source_path
73c5f01b2fSopenharmony_ci            self.included_files = []
74c5f01b2fSopenharmony_ci
75c5f01b2fSopenharmony_ci    # Generate the amalgamation and write it to the target file.
76c5f01b2fSopenharmony_ci    def generate(self):
77c5f01b2fSopenharmony_ci        amalgamation = ""
78c5f01b2fSopenharmony_ci
79c5f01b2fSopenharmony_ci        if self.prologue:
80c5f01b2fSopenharmony_ci            with open(self.prologue, 'r') as f:
81c5f01b2fSopenharmony_ci                amalgamation += datetime.datetime.now().strftime(f.read())
82c5f01b2fSopenharmony_ci
83c5f01b2fSopenharmony_ci        if self.verbose:
84c5f01b2fSopenharmony_ci            print("Config:")
85c5f01b2fSopenharmony_ci            print(" target        = {0}".format(self.target))
86c5f01b2fSopenharmony_ci            print(" working_dir   = {0}".format(os.getcwd()))
87c5f01b2fSopenharmony_ci            print(" include_paths = {0}".format(self.include_paths))
88c5f01b2fSopenharmony_ci        print("Creating amalgamation:")
89c5f01b2fSopenharmony_ci        for file_path in self.sources:
90c5f01b2fSopenharmony_ci            # Do not check the include paths while processing the source
91c5f01b2fSopenharmony_ci            # list, all given source paths must be correct.
92c5f01b2fSopenharmony_ci            # actual_path = self.actual_path(file_path)
93c5f01b2fSopenharmony_ci            print(" - processing \"{0}\"".format(file_path))
94c5f01b2fSopenharmony_ci            t = TranslationUnit(file_path, self, True)
95c5f01b2fSopenharmony_ci            amalgamation += t.content
96c5f01b2fSopenharmony_ci
97c5f01b2fSopenharmony_ci        with open(self.target, 'w') as f:
98c5f01b2fSopenharmony_ci            f.write(amalgamation)
99c5f01b2fSopenharmony_ci
100c5f01b2fSopenharmony_ci        print("...done!\n")
101c5f01b2fSopenharmony_ci        if self.verbose:
102c5f01b2fSopenharmony_ci            print("Files processed: {0}".format(self.sources))
103c5f01b2fSopenharmony_ci            print("Files included: {0}".format(self.included_files))
104c5f01b2fSopenharmony_ci        print("")
105c5f01b2fSopenharmony_ci
106c5f01b2fSopenharmony_ci
107c5f01b2fSopenharmony_cidef _is_within(match, matches):
108c5f01b2fSopenharmony_ci    for m in matches:
109c5f01b2fSopenharmony_ci        if match.start() > m.start() and \
110c5f01b2fSopenharmony_ci                match.end() < m.end():
111c5f01b2fSopenharmony_ci            return True
112c5f01b2fSopenharmony_ci    return False
113c5f01b2fSopenharmony_ci
114c5f01b2fSopenharmony_ci
115c5f01b2fSopenharmony_ciclass TranslationUnit(object):
116c5f01b2fSopenharmony_ci    # // C++ comment.
117c5f01b2fSopenharmony_ci    cpp_comment_pattern = re.compile(r"//.*?\n")
118c5f01b2fSopenharmony_ci
119c5f01b2fSopenharmony_ci    # /* C comment. */
120c5f01b2fSopenharmony_ci    c_comment_pattern = re.compile(r"/\*.*?\*/", re.S)
121c5f01b2fSopenharmony_ci
122c5f01b2fSopenharmony_ci    # "complex \"stri\\\ng\" value".
123c5f01b2fSopenharmony_ci    string_pattern = re.compile("[^']" r'".*?(?<=[^\\])"', re.S)
124c5f01b2fSopenharmony_ci
125c5f01b2fSopenharmony_ci    # Handle simple include directives. Support for advanced
126c5f01b2fSopenharmony_ci    # directives where macros and defines needs to expanded is
127c5f01b2fSopenharmony_ci    # not a concern right now.
128c5f01b2fSopenharmony_ci    include_pattern = re.compile(
129c5f01b2fSopenharmony_ci        r'#\s*include\s+(<|")(?P<path>.*?)("|>)', re.S)
130c5f01b2fSopenharmony_ci
131c5f01b2fSopenharmony_ci    # #pragma once
132c5f01b2fSopenharmony_ci    pragma_once_pattern = re.compile(r'#\s*pragma\s+once', re.S)
133c5f01b2fSopenharmony_ci
134c5f01b2fSopenharmony_ci    # Search for pattern in self.content, add the match to
135c5f01b2fSopenharmony_ci    # contexts if found and update the index accordingly.
136c5f01b2fSopenharmony_ci    def _search_content(self, index, pattern, contexts):
137c5f01b2fSopenharmony_ci        match = pattern.search(self.content, index)
138c5f01b2fSopenharmony_ci        if match:
139c5f01b2fSopenharmony_ci            contexts.append(match)
140c5f01b2fSopenharmony_ci            return match.end()
141c5f01b2fSopenharmony_ci        return index + 2
142c5f01b2fSopenharmony_ci
143c5f01b2fSopenharmony_ci    # Return all the skippable contexts, i.e., comments and strings
144c5f01b2fSopenharmony_ci    def _find_skippable_contexts(self):
145c5f01b2fSopenharmony_ci        # Find contexts in the content in which a found include
146c5f01b2fSopenharmony_ci        # directive should not be processed.
147c5f01b2fSopenharmony_ci        skippable_contexts = []
148c5f01b2fSopenharmony_ci
149c5f01b2fSopenharmony_ci        # Walk through the content char by char, and try to grab
150c5f01b2fSopenharmony_ci        # skippable contexts using regular expressions when found.
151c5f01b2fSopenharmony_ci        i = 1
152c5f01b2fSopenharmony_ci        content_len = len(self.content)
153c5f01b2fSopenharmony_ci        while i < content_len:
154c5f01b2fSopenharmony_ci            j = i - 1
155c5f01b2fSopenharmony_ci            current = self.content[i]
156c5f01b2fSopenharmony_ci            previous = self.content[j]
157c5f01b2fSopenharmony_ci
158c5f01b2fSopenharmony_ci            if current == '"':
159c5f01b2fSopenharmony_ci                # String value.
160c5f01b2fSopenharmony_ci                i = self._search_content(j, self.string_pattern,
161c5f01b2fSopenharmony_ci                                         skippable_contexts)
162c5f01b2fSopenharmony_ci            elif current == '*' and previous == '/':
163c5f01b2fSopenharmony_ci                # C style comment.
164c5f01b2fSopenharmony_ci                i = self._search_content(j, self.c_comment_pattern,
165c5f01b2fSopenharmony_ci                                         skippable_contexts)
166c5f01b2fSopenharmony_ci            elif current == '/' and previous == '/':
167c5f01b2fSopenharmony_ci                # C++ style comment.
168c5f01b2fSopenharmony_ci                i = self._search_content(j, self.cpp_comment_pattern,
169c5f01b2fSopenharmony_ci                                         skippable_contexts)
170c5f01b2fSopenharmony_ci            else:
171c5f01b2fSopenharmony_ci                # Skip to the next char.
172c5f01b2fSopenharmony_ci                i += 1
173c5f01b2fSopenharmony_ci
174c5f01b2fSopenharmony_ci        return skippable_contexts
175c5f01b2fSopenharmony_ci
176c5f01b2fSopenharmony_ci    # Returns True if the match is within list of other matches
177c5f01b2fSopenharmony_ci
178c5f01b2fSopenharmony_ci    # Removes pragma once from content
179c5f01b2fSopenharmony_ci    def _process_pragma_once(self):
180c5f01b2fSopenharmony_ci        content_len = len(self.content)
181c5f01b2fSopenharmony_ci        if content_len < len("#include <x>"):
182c5f01b2fSopenharmony_ci            return 0
183c5f01b2fSopenharmony_ci
184c5f01b2fSopenharmony_ci        # Find contexts in the content in which a found include
185c5f01b2fSopenharmony_ci        # directive should not be processed.
186c5f01b2fSopenharmony_ci        skippable_contexts = self._find_skippable_contexts()
187c5f01b2fSopenharmony_ci
188c5f01b2fSopenharmony_ci        pragmas = []
189c5f01b2fSopenharmony_ci        pragma_once_match = self.pragma_once_pattern.search(self.content)
190c5f01b2fSopenharmony_ci        while pragma_once_match:
191c5f01b2fSopenharmony_ci            if not _is_within(pragma_once_match, skippable_contexts):
192c5f01b2fSopenharmony_ci                pragmas.append(pragma_once_match)
193c5f01b2fSopenharmony_ci
194c5f01b2fSopenharmony_ci            pragma_once_match = self.pragma_once_pattern.search(self.content,
195c5f01b2fSopenharmony_ci                                                                pragma_once_match.end())
196c5f01b2fSopenharmony_ci
197c5f01b2fSopenharmony_ci        # Handle all collected pragma once directives.
198c5f01b2fSopenharmony_ci        prev_end = 0
199c5f01b2fSopenharmony_ci        tmp_content = ''
200c5f01b2fSopenharmony_ci        for pragma_match in pragmas:
201c5f01b2fSopenharmony_ci            tmp_content += self.content[prev_end:pragma_match.start()]
202c5f01b2fSopenharmony_ci            prev_end = pragma_match.end()
203c5f01b2fSopenharmony_ci        tmp_content += self.content[prev_end:]
204c5f01b2fSopenharmony_ci        self.content = tmp_content
205c5f01b2fSopenharmony_ci
206c5f01b2fSopenharmony_ci    # Include all trivial #include directives into self.content.
207c5f01b2fSopenharmony_ci    def _process_includes(self):
208c5f01b2fSopenharmony_ci        content_len = len(self.content)
209c5f01b2fSopenharmony_ci        if content_len < len("#include <x>"):
210c5f01b2fSopenharmony_ci            return 0
211c5f01b2fSopenharmony_ci
212c5f01b2fSopenharmony_ci        # Find contexts in the content in which a found include
213c5f01b2fSopenharmony_ci        # directive should not be processed.
214c5f01b2fSopenharmony_ci        skippable_contexts = self._find_skippable_contexts()
215c5f01b2fSopenharmony_ci
216c5f01b2fSopenharmony_ci        # Search for include directives in the content, collect those
217c5f01b2fSopenharmony_ci        # which should be included into the content.
218c5f01b2fSopenharmony_ci        includes = []
219c5f01b2fSopenharmony_ci        include_match = self.include_pattern.search(self.content)
220c5f01b2fSopenharmony_ci        while include_match:
221c5f01b2fSopenharmony_ci            if not _is_within(include_match, skippable_contexts):
222c5f01b2fSopenharmony_ci                include_path = include_match.group("path")
223c5f01b2fSopenharmony_ci                search_same_dir = include_match.group(1) == '"'
224c5f01b2fSopenharmony_ci                found_included_path = self.amalgamation.find_included_file(
225c5f01b2fSopenharmony_ci                    include_path, self.file_dir if search_same_dir else None)
226c5f01b2fSopenharmony_ci                if found_included_path:
227c5f01b2fSopenharmony_ci                    includes.append((include_match, found_included_path))
228c5f01b2fSopenharmony_ci
229c5f01b2fSopenharmony_ci            include_match = self.include_pattern.search(self.content,
230c5f01b2fSopenharmony_ci                                                        include_match.end())
231c5f01b2fSopenharmony_ci
232c5f01b2fSopenharmony_ci        # Handle all collected include directives.
233c5f01b2fSopenharmony_ci        prev_end = 0
234c5f01b2fSopenharmony_ci        tmp_content = ''
235c5f01b2fSopenharmony_ci        for include in includes:
236c5f01b2fSopenharmony_ci            include_match, found_included_path = include
237c5f01b2fSopenharmony_ci            tmp_content += self.content[prev_end:include_match.start()]
238c5f01b2fSopenharmony_ci            tmp_content += "// {0}\n".format(include_match.group(0))
239c5f01b2fSopenharmony_ci            if found_included_path not in self.amalgamation.included_files:
240c5f01b2fSopenharmony_ci                t = TranslationUnit(found_included_path, self.amalgamation, False)
241c5f01b2fSopenharmony_ci                tmp_content += t.content
242c5f01b2fSopenharmony_ci            prev_end = include_match.end()
243c5f01b2fSopenharmony_ci        tmp_content += self.content[prev_end:]
244c5f01b2fSopenharmony_ci        self.content = tmp_content
245c5f01b2fSopenharmony_ci
246c5f01b2fSopenharmony_ci        return len(includes)
247c5f01b2fSopenharmony_ci
248c5f01b2fSopenharmony_ci    # Make all content processing
249c5f01b2fSopenharmony_ci    def _process(self):
250c5f01b2fSopenharmony_ci        if not self.is_root:
251c5f01b2fSopenharmony_ci            self._process_pragma_once()
252c5f01b2fSopenharmony_ci        self._process_includes()
253c5f01b2fSopenharmony_ci
254c5f01b2fSopenharmony_ci    def __init__(self, file_path, amalgamation, is_root):
255c5f01b2fSopenharmony_ci        self.file_path = file_path
256c5f01b2fSopenharmony_ci        self.file_dir = os.path.dirname(file_path)
257c5f01b2fSopenharmony_ci        self.amalgamation = amalgamation
258c5f01b2fSopenharmony_ci        self.is_root = is_root
259c5f01b2fSopenharmony_ci
260c5f01b2fSopenharmony_ci        self.amalgamation.included_files.append(self.file_path)
261c5f01b2fSopenharmony_ci
262c5f01b2fSopenharmony_ci        actual_path = self.amalgamation.actual_path(file_path)
263c5f01b2fSopenharmony_ci        if not os.path.isfile(actual_path):
264c5f01b2fSopenharmony_ci            raise IOError("File not found: \"{0}\"".format(file_path))
265c5f01b2fSopenharmony_ci        with open(actual_path, 'r') as f:
266c5f01b2fSopenharmony_ci            self.content = f.read()
267c5f01b2fSopenharmony_ci            self._process()
268c5f01b2fSopenharmony_ci
269c5f01b2fSopenharmony_ci
270c5f01b2fSopenharmony_cidef main():
271c5f01b2fSopenharmony_ci    description = "Amalgamate C source and header files."
272c5f01b2fSopenharmony_ci    usage = " ".join([
273c5f01b2fSopenharmony_ci        "amalgamate.py",
274c5f01b2fSopenharmony_ci        "[-v]",
275c5f01b2fSopenharmony_ci        "-c path/to/config.json",
276c5f01b2fSopenharmony_ci        "-s path/to/source/dir",
277c5f01b2fSopenharmony_ci        "[-p path/to/prologue.(c|h)]"
278c5f01b2fSopenharmony_ci    ])
279c5f01b2fSopenharmony_ci    argsparser = argparse.ArgumentParser(
280c5f01b2fSopenharmony_ci        description=description, usage=usage)
281c5f01b2fSopenharmony_ci
282c5f01b2fSopenharmony_ci    argsparser.add_argument("-v", "--verbose", dest="verbose",
283c5f01b2fSopenharmony_ci                            choices=["yes", "no"], metavar="", help="be verbose")
284c5f01b2fSopenharmony_ci
285c5f01b2fSopenharmony_ci    argsparser.add_argument("-c", "--config", dest="config",
286c5f01b2fSopenharmony_ci                            required=True, metavar="", help="path to a JSON config file")
287c5f01b2fSopenharmony_ci
288c5f01b2fSopenharmony_ci    argsparser.add_argument("-s", "--source", dest="source_path",
289c5f01b2fSopenharmony_ci                            required=True, metavar="", help="source code path")
290c5f01b2fSopenharmony_ci
291c5f01b2fSopenharmony_ci    argsparser.add_argument("-p", "--prologue", dest="prologue",
292c5f01b2fSopenharmony_ci                            required=False, metavar="", help="path to a C prologue file")
293c5f01b2fSopenharmony_ci
294c5f01b2fSopenharmony_ci    amalgamation = Amalgamation(argsparser.parse_args())
295c5f01b2fSopenharmony_ci    amalgamation.generate()
296c5f01b2fSopenharmony_ci
297c5f01b2fSopenharmony_ci
298c5f01b2fSopenharmony_ciif __name__ == "__main__":
299c5f01b2fSopenharmony_ci    main()
300