1b1994897Sopenharmony_ci#!/usr/bin/env python3
2b1994897Sopenharmony_ci# -- coding: utf-8 --
3b1994897Sopenharmony_ci# Copyright (c) 2021-2022 Huawei Device Co., Ltd.
4b1994897Sopenharmony_ci# Licensed under the Apache License, Version 2.0 (the "License");
5b1994897Sopenharmony_ci# you may not use this file except in compliance with the License.
6b1994897Sopenharmony_ci# You may obtain a copy of the License at
7b1994897Sopenharmony_ci#
8b1994897Sopenharmony_ci# http://www.apache.org/licenses/LICENSE-2.0
9b1994897Sopenharmony_ci#
10b1994897Sopenharmony_ci# Unless required by applicable law or agreed to in writing, software
11b1994897Sopenharmony_ci# distributed under the License is distributed on an "AS IS" BASIS,
12b1994897Sopenharmony_ci# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13b1994897Sopenharmony_ci# See the License for the specific language governing permissions and
14b1994897Sopenharmony_ci# limitations under the License.
15b1994897Sopenharmony_ci
16b1994897Sopenharmony_ciimport sys
17b1994897Sopenharmony_ciimport os
18b1994897Sopenharmony_ciimport itertools
19b1994897Sopenharmony_cifrom typing import NamedTuple
20b1994897Sopenharmony_ci
21b1994897Sopenharmony_ci
22b1994897Sopenharmony_ciclass GCPauseStats(NamedTuple):
23b1994897Sopenharmony_ci    """Data class with constants for gc stats"""
24b1994897Sopenharmony_ci    GC_TYPES = ["YOUNG", "MIXED", "TENURED", "FULL"]
25b1994897Sopenharmony_ci    PAUSE_DETECT_STR = ", paused "
26b1994897Sopenharmony_ci    TOTAL_DETECT_STR = " total "
27b1994897Sopenharmony_ci    LIST_OF_STATS = ["count", "min", "max", "avg", "sum"]
28b1994897Sopenharmony_ci
29b1994897Sopenharmony_ci
30b1994897Sopenharmony_cidef sort_one_gc_stat(stats: dict, gc_type: str) -> list:
31b1994897Sopenharmony_ci    """Sort one type of gc stats for pretty table"""
32b1994897Sopenharmony_ci    stats_list = list()
33b1994897Sopenharmony_ci    for trig_type in stats:
34b1994897Sopenharmony_ci        if trig_type.find(gc_type) != -1:
35b1994897Sopenharmony_ci            stats_list.append(trig_type)
36b1994897Sopenharmony_ci    return sorted(stats_list, key=lambda x: stats.get(x)["count"], reverse=True)
37b1994897Sopenharmony_ci
38b1994897Sopenharmony_ci
39b1994897Sopenharmony_cidef sort_gc_stats(stats: dict) -> list:
40b1994897Sopenharmony_ci    """Sort gc stats for pretty table"""
41b1994897Sopenharmony_ci    stats_info = list()
42b1994897Sopenharmony_ci    for gc_type in GCPauseStats.GC_TYPES:
43b1994897Sopenharmony_ci        if gc_type in stats:
44b1994897Sopenharmony_ci            stats_info.append(sort_one_gc_stat(stats, gc_type))
45b1994897Sopenharmony_ci    stats_info.sort(key=lambda x: stats.get(x[0])["count"], reverse=True)
46b1994897Sopenharmony_ci    return list(itertools.chain(*stats_info))
47b1994897Sopenharmony_ci
48b1994897Sopenharmony_ci
49b1994897Sopenharmony_cidef save_pause_stats(gc_log_path: str, file_name: str, stats: dict) -> None:
50b1994897Sopenharmony_ci    """Save md table in the file"""
51b1994897Sopenharmony_ci    with open(file_name, 'a') as file:
52b1994897Sopenharmony_ci        file.write(f"GC logs: {gc_log_path}\n\n")
53b1994897Sopenharmony_ci        file.write("| Parameter |")
54b1994897Sopenharmony_ci        gc_stats_list = ["Total"] + sort_gc_stats(stats)
55b1994897Sopenharmony_ci        for gc_type in gc_stats_list:
56b1994897Sopenharmony_ci            file.write(f" {gc_type} |")
57b1994897Sopenharmony_ci        file.write("\n|:----|")
58b1994897Sopenharmony_ci        for _ in range(len(stats)):
59b1994897Sopenharmony_ci            file.write(":---:|")
60b1994897Sopenharmony_ci        for stat_type in GCPauseStats.LIST_OF_STATS:
61b1994897Sopenharmony_ci            file.write(f"\n| {stat_type} |")
62b1994897Sopenharmony_ci            for trigger_stat in gc_stats_list:
63b1994897Sopenharmony_ci                file.write(f" {stats.get(trigger_stat).get(stat_type)} |")
64b1994897Sopenharmony_ci        file.write("\n\n")
65b1994897Sopenharmony_ci
66b1994897Sopenharmony_ci
67b1994897Sopenharmony_cidef get_ms_time(line: str) -> float:
68b1994897Sopenharmony_ci    """Return time in ms"""
69b1994897Sopenharmony_ci    times = [("ms", 1.0), ("us", 0.001), ("s", 1000.0)]
70b1994897Sopenharmony_ci    i = line.find(GCPauseStats.PAUSE_DETECT_STR)
71b1994897Sopenharmony_ci    j = line.find(GCPauseStats.TOTAL_DETECT_STR, i)
72b1994897Sopenharmony_ci    time_str = line[i + len(GCPauseStats.PAUSE_DETECT_STR):j]
73b1994897Sopenharmony_ci    for time_end in times:
74b1994897Sopenharmony_ci        if time_str.endswith(time_end[0]):
75b1994897Sopenharmony_ci            return float(time_str[:-len(time_end[0])]) * time_end[1]
76b1994897Sopenharmony_ci    raise ValueError("Could not detect time format")
77b1994897Sopenharmony_ci
78b1994897Sopenharmony_ci
79b1994897Sopenharmony_cidef get_full_type(line: str, cause_start: int, cause_len: int) -> str:
80b1994897Sopenharmony_ci    """Get gc type with cause"""
81b1994897Sopenharmony_ci    cause_end = cause_start + cause_len
82b1994897Sopenharmony_ci    while line[cause_start] != '[':
83b1994897Sopenharmony_ci        cause_start -= 1
84b1994897Sopenharmony_ci    while line[cause_end] != ']':
85b1994897Sopenharmony_ci        cause_end += 1
86b1994897Sopenharmony_ci    return line[cause_start + 1: cause_end]
87b1994897Sopenharmony_ci
88b1994897Sopenharmony_ci
89b1994897Sopenharmony_cidef get_gc_type(line: str) -> (str, str):
90b1994897Sopenharmony_ci    """Get gc type type and gc type with cause"""
91b1994897Sopenharmony_ci    for cause in GCPauseStats.GC_TYPES:
92b1994897Sopenharmony_ci        i = line.find(cause)
93b1994897Sopenharmony_ci        if i != -1:
94b1994897Sopenharmony_ci            return cause, get_full_type(line, i, len(cause))
95b1994897Sopenharmony_ci    raise ValueError("Unsupported gc cause")
96b1994897Sopenharmony_ci
97b1994897Sopenharmony_ci
98b1994897Sopenharmony_cidef update_stats(stats: dict, gc_type: str, time_value: float):
99b1994897Sopenharmony_ci    """Update info about the gc type"""
100b1994897Sopenharmony_ci    trigger_info = stats.setdefault(gc_type, {
101b1994897Sopenharmony_ci        "max": 0.0,
102b1994897Sopenharmony_ci        "min": 0.0,
103b1994897Sopenharmony_ci        "avg": 0.0,
104b1994897Sopenharmony_ci        "sum": 0.0,
105b1994897Sopenharmony_ci        "count": 0
106b1994897Sopenharmony_ci    })
107b1994897Sopenharmony_ci    count_v = trigger_info.get("count") + 1
108b1994897Sopenharmony_ci    sum_v = trigger_info.get("sum") + time_value
109b1994897Sopenharmony_ci    avg_v = sum_v / count_v
110b1994897Sopenharmony_ci    if count_v == 1:
111b1994897Sopenharmony_ci        min_v = time_value
112b1994897Sopenharmony_ci    else:
113b1994897Sopenharmony_ci        min_v = min(trigger_info.get("min"), time_value)
114b1994897Sopenharmony_ci    max_v = max(trigger_info.get("max"), time_value)
115b1994897Sopenharmony_ci    trigger_info.update({
116b1994897Sopenharmony_ci        "max": max_v,
117b1994897Sopenharmony_ci        "min": min_v,
118b1994897Sopenharmony_ci        "avg": avg_v,
119b1994897Sopenharmony_ci        "sum": sum_v,
120b1994897Sopenharmony_ci        "count": count_v
121b1994897Sopenharmony_ci    })
122b1994897Sopenharmony_ci    stats.update({gc_type: trigger_info})
123b1994897Sopenharmony_ci
124b1994897Sopenharmony_ci
125b1994897Sopenharmony_cidef detect_str(line: str) -> (int, int):
126b1994897Sopenharmony_ci    """Detect gc info string from log lines"""
127b1994897Sopenharmony_ci    # Find for mobile and host logs
128b1994897Sopenharmony_ci    for detect_string in [" I Ark gc  : ", " I/gc: "]:
129b1994897Sopenharmony_ci        i = line.find(detect_string)
130b1994897Sopenharmony_ci        if i != -1:
131b1994897Sopenharmony_ci            return (i, len(detect_string))
132b1994897Sopenharmony_ci    return (-1, 0)
133b1994897Sopenharmony_ci
134b1994897Sopenharmony_ci
135b1994897Sopenharmony_cidef update_group_stats(gc_pause_stats: dict, gc_type: str, full_gc_type: str, time_v: float):
136b1994897Sopenharmony_ci    """Update group (Total, full on short gc type) of stats"""
137b1994897Sopenharmony_ci    update_stats(gc_pause_stats, "Total", time_v)
138b1994897Sopenharmony_ci    update_stats(gc_pause_stats, gc_type, time_v)
139b1994897Sopenharmony_ci    update_stats(gc_pause_stats, full_gc_type, time_v)
140b1994897Sopenharmony_ci
141b1994897Sopenharmony_ci
142b1994897Sopenharmony_cidef process_one_log(gc_log_path: str, result_file_path: str, all_stats: dict) -> None:
143b1994897Sopenharmony_ci    """Process one log file"""
144b1994897Sopenharmony_ci    gc_pause_stats = {"Total": {
145b1994897Sopenharmony_ci        "max": 0.0,
146b1994897Sopenharmony_ci        "min": 0.0,
147b1994897Sopenharmony_ci        "avg": 0.0,
148b1994897Sopenharmony_ci        "sum": 0.0,
149b1994897Sopenharmony_ci        "count": 0
150b1994897Sopenharmony_ci    }
151b1994897Sopenharmony_ci    }
152b1994897Sopenharmony_ci    with open(gc_log_path, 'r') as log_file:
153b1994897Sopenharmony_ci        for f_line in log_file.readlines():
154b1994897Sopenharmony_ci            ii = detect_str(f_line)
155b1994897Sopenharmony_ci            if ii[0] != -1 and f_line.find(GCPauseStats.PAUSE_DETECT_STR) != -1:
156b1994897Sopenharmony_ci                gc_info_str = f_line[ii[0] + ii[1]:]
157b1994897Sopenharmony_ci                time_v = get_ms_time(gc_info_str)
158b1994897Sopenharmony_ci                cause_s, full_cause_s = get_gc_type(gc_info_str)
159b1994897Sopenharmony_ci                update_group_stats(gc_pause_stats, cause_s,
160b1994897Sopenharmony_ci                                   full_cause_s, time_v)
161b1994897Sopenharmony_ci                update_group_stats(all_stats, cause_s,
162b1994897Sopenharmony_ci                                   full_cause_s, time_v)
163b1994897Sopenharmony_ci    save_pause_stats(gc_log_path, result_file_path, gc_pause_stats)
164b1994897Sopenharmony_ci
165b1994897Sopenharmony_ci
166b1994897Sopenharmony_cidef main() -> None:
167b1994897Sopenharmony_ci    """Script's entrypoint"""
168b1994897Sopenharmony_ci    if len(sys.argv) < 3:
169b1994897Sopenharmony_ci        print("Incorrect parameters count", file=sys.stderr)
170b1994897Sopenharmony_ci        print("Usage: ", file=sys.stderr)
171b1994897Sopenharmony_ci        print(
172b1994897Sopenharmony_ci            f"  python3 {sys.argv[0]} <gc_log_1...> <results_path>", file=sys.stderr)
173b1994897Sopenharmony_ci        print(f"    gc_log_num   -- Path to gc logs or application logs with gc logs", file=sys.stderr)
174b1994897Sopenharmony_ci        print(
175b1994897Sopenharmony_ci            f"    results_path -- Path to result file with pause stats", file=sys.stderr)
176b1994897Sopenharmony_ci        print(
177b1994897Sopenharmony_ci            f"Example: python3 {sys.argv[0]} gc_log.txt result.md", file=sys.stderr)
178b1994897Sopenharmony_ci        exit(2)
179b1994897Sopenharmony_ci    gc_log_paths = list()
180b1994897Sopenharmony_ci    all_gc_stats = {"Total": {
181b1994897Sopenharmony_ci        "max": 0.0,
182b1994897Sopenharmony_ci        "min": 0.0,
183b1994897Sopenharmony_ci        "avg": 0.0,
184b1994897Sopenharmony_ci        "sum": 0.0,
185b1994897Sopenharmony_ci        "count": 0
186b1994897Sopenharmony_ci    }
187b1994897Sopenharmony_ci    }
188b1994897Sopenharmony_ci    result_file_path = os.path.abspath(sys.argv[-1])
189b1994897Sopenharmony_ci
190b1994897Sopenharmony_ci    with open(result_file_path, 'w') as result_file:
191b1994897Sopenharmony_ci        result_file.write("_Generated by gc pause stats script_\n\n")
192b1994897Sopenharmony_ci        result_file.write("All times in ms\n\n")
193b1994897Sopenharmony_ci
194b1994897Sopenharmony_ci    for log_path in list(map(os.path.abspath, sys.argv[1:-1])):
195b1994897Sopenharmony_ci        if os.path.isfile(log_path):
196b1994897Sopenharmony_ci            gc_log_paths.append(log_path)
197b1994897Sopenharmony_ci        else:
198b1994897Sopenharmony_ci            print(f"{log_path}: No such log file", file=sys.stderr)
199b1994897Sopenharmony_ci
200b1994897Sopenharmony_ci    for log_path in gc_log_paths:
201b1994897Sopenharmony_ci        process_one_log(log_path, result_file_path, all_gc_stats)
202b1994897Sopenharmony_ci    if len(gc_log_paths) > 1:
203b1994897Sopenharmony_ci        save_pause_stats(
204b1994897Sopenharmony_ci            f"All {len(gc_log_paths)} logs", result_file_path, all_gc_stats)
205b1994897Sopenharmony_ci
206b1994897Sopenharmony_ci
207b1994897Sopenharmony_ciif __name__ == "__main__":
208b1994897Sopenharmony_ci    main()
209