1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3#
4# Copyright (c) 2024 Huawei Device Co., Ltd.
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9#     http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16#
17
18import re
19import copy
20import optparse
21import struct
22import os
23import stat
24
25import parse_functions
26
27TRACE_REGEX_ASYNC = "\s*(\d+)\s+(.*?)\|\d+\|[SFC]\s+:(.*?)\s+:(.*?)\s+(.*?)\s+\]\d+\[\s+\)(\d+)\s*\(\s+(\d+?)-(.*?)\s+"
28TRACE_REGEX_SYNC = "\s*\|\d+\|E\s+:(.*?)\s+:(.*?)\s+(.*?)\s+\]\d+\[\s+\)(\d+)\s*\(\s+(\d+?)-(.*?)\s+"
29text_file = ""
30binary_file = ""
31out_file = ""
32
33CONTENT_TYPE_DEFAULT = 0
34CONTENT_TYPE_EVENTS_FORMAT = 1
35CONTENT_TYPE_CMDLINES = 2
36CONTENT_TYPE_TGIDS = 3
37CONTENT_TYPE_CPU_RAW = 4
38CONTENT_TYPE_HEADER_PAGE = 30
39CONTENT_TYPE_PRINTK_FORMATS = 31
40CONTENT_TYPE_KALLSYMS = 32
41
42INT8_DATA_READ_LEN = 1
43INT16_DATA_READ_LEN = 2
44INT32_DATA_READ_LEN = 4
45INT64_DATA_READ_LEN = 8
46
47READ_PAGE_SIZE = 4096
48
49RB_MISSED_FLAGS = (0x1 << 31) | (1 << 30)
50
51BUFFER_TYPE_PADDING = 29
52BUFFER_TYPE_TIME_EXTEND = 30
53BUFFER_TYPE_TIME_STAMP = 31
54
55events_format = {}
56cmd_lines = {}
57tgids = {}
58
59
60TRACE_TXT_HEADER_FORMAT = """# tracer: nop
61#
62# entries-in-buffer/entries-written: %lu/%lu   #P:%d
63#
64#                                      _-----=> irqs-off
65#                                     / _----=> need-resched
66#                                    | / _---=> hardirq/softirq
67#                                    || / _--=> preempt-depth
68#                                    ||| /     delay
69#           TASK-PID    TGID   CPU#  ||||    TIMESTAMP  FUNCTION
70#              | |        |      |   ||||       |         |
71"""
72
73
74def parse_options():
75    global text_file
76    global binary_file
77    global out_file
78
79    usage = "Usage: %prog -t text_file -o out_file or\n%prog -b binary_file -o out_file"
80    desc = "Example: %prog -t my_trace_file.htrace -o my_trace_file.systrace"
81
82    parser = optparse.OptionParser(usage=usage, description=desc)
83    parser.add_option('-t', '--text_file', dest='text_file',
84        help='Name of the text file to be parsed.', metavar='FILE')
85    parser.add_option('-b', '--binary_file', dest='binary_file',
86        help='Name of the binary file to be parsed.', metavar='FILE')
87    parser.add_option('-o', '--out_file', dest='out_file',
88        help='File name after successful parsing.', metavar='FILE')
89
90    options, args = parser.parse_args()
91
92    if options.out_file is not None:
93        out_file = options.out_file
94    else:
95        print("Error: out_file must be specified")
96        exit(-1)
97    if options.text_file is not None:
98        text_file = options.text_file
99    if options.binary_file is not None:
100        binary_file = options.binary_file
101
102    if text_file == '' and binary_file == '':
103        print("Error: You must specify a text or binary file")
104        exit(-1)
105    if text_file != '' and binary_file != '':
106        print("Error: Only one parsed file can be specified")
107        exit(-1)
108
109
110def parse_text_trace_file():
111    print("start processing text trace file")
112    pattern_async = re.compile(TRACE_REGEX_ASYNC)
113    pattern_sync = re.compile(TRACE_REGEX_SYNC)
114    match_num = 0
115
116    infile_flags = os.O_RDONLY
117    infile_mode = stat.S_IRUSR
118    infile = os.fdopen(os.open(text_file, infile_flags, infile_mode), "r", encoding="utf-8")
119    outfile_flags = os.O_RDWR | os.O_CREAT
120    outfile_mode = stat.S_IRUSR | stat.S_IWUSR
121    outfile = os.fdopen(os.open(out_file, outfile_flags, outfile_mode), "w+", encoding="utf-8")
122
123    for line in infile:
124        reverse_line = line[::-1]
125        trace_match_async = pattern_async.match(reverse_line)
126        trace_match_sync = pattern_sync.match(reverse_line)
127        if trace_match_async:
128            line = line.rstrip(' ')
129            pos = line.rfind(' ')
130            line = "%s%s%s" % (line[:pos], '|', line[pos + 1:])
131            match_num += 1
132        elif trace_match_sync:
133            line = "%s\n" % (line.rstrip()[:-1])
134            match_num += 1
135        outfile.write(line)
136    infile.close()
137    outfile.close()
138    print("total matched and modified lines: ", match_num)
139
140
141cpu_raw_read_pos = 0
142TRACE_HEADER_SIZE = 12
143
144
145def parse_trace_header(infile):
146    trace_header = {}
147    trace_header_data = infile.read(TRACE_HEADER_SIZE)
148    trace_header_data_len = len(trace_header_data)
149    if trace_header_data_len == 12:
150        trace_header_tuple = struct.unpack('HBHL', trace_header_data)
151        trace_header["magic_number"] = trace_header_tuple[0]
152        trace_header["file_type"] = trace_header_tuple[1]
153        trace_header["version_number"] = trace_header_tuple[2]
154        trace_header["reserved"] = trace_header_tuple[3]
155    return trace_header
156
157
158def parse_page_header(data):
159    global cpu_raw_read_pos
160    page_header = {}
161
162    data_str = data[cpu_raw_read_pos:cpu_raw_read_pos + INT64_DATA_READ_LEN * 2 + INT8_DATA_READ_LEN]
163    data_str_len = len(data_str)
164    if data_str_len == 17:
165        struct_page_header = struct.unpack('QQB', data_str)
166        cpu_raw_read_pos += INT64_DATA_READ_LEN * 2 + INT8_DATA_READ_LEN
167        page_header["time_stamp"] = struct_page_header[0]
168
169        page_header["length"] = struct_page_header[1]
170        page_header["core_id"] = struct_page_header[2]
171
172    return page_header
173
174
175def parse_event_header(data):
176    global cpu_raw_read_pos
177    event_header = {}
178
179    data_str = data[cpu_raw_read_pos:cpu_raw_read_pos + INT32_DATA_READ_LEN + INT16_DATA_READ_LEN]
180    data_str_len = len(data_str)
181    if data_str_len == 6:
182        struct_event_header = struct.unpack('LH', data_str)
183        event_header["time_stamp_offset"] = struct_event_header[0]
184        event_header["size"] = struct_event_header[1]
185
186    cpu_raw_read_pos += INT32_DATA_READ_LEN + INT16_DATA_READ_LEN
187
188    return event_header
189
190
191TRACE_FLAG_IRQS_OFF = 0x01
192TRACE_FLAG_IRQS_NOSUPPORT = 0x02
193TRACE_FLAG_NEED_RESCHED = 0x04
194TRACE_FLAG_HARDIRQ = 0x08
195TRACE_FLAG_SOFTIRQ = 0x10
196TRACE_FLAG_PREEMPT_RESCHED = 0x20
197TRACE_FLAG_NMI = 0x40
198
199
200def trace_flags_to_str(flags, preempt_count):
201    result = ""
202    irqs_off = '.'
203    if flags & TRACE_FLAG_IRQS_OFF != 0:
204        irqs_off = 'd'
205    elif flags & TRACE_FLAG_IRQS_NOSUPPORT != 0:
206        irqs_off = 'X'
207    result += irqs_off
208
209    need_resched = '.'
210    is_need_resched = flags & TRACE_FLAG_NEED_RESCHED
211    is_preempt_resched = flags & TRACE_FLAG_PREEMPT_RESCHED
212    if is_need_resched != 0 and is_preempt_resched != 0:
213        need_resched = 'N'
214    elif is_need_resched != 0:
215        need_resched = 'n'
216    elif is_preempt_resched != 0:
217        need_resched = 'p'
218    result += need_resched
219
220    nmi_flag = flags & TRACE_FLAG_NMI
221    hard_irq = flags & TRACE_FLAG_HARDIRQ
222    soft_irq = flags & TRACE_FLAG_SOFTIRQ
223    irq_char = '.'
224    if nmi_flag != 0 and hard_irq != 0:
225        irq_char = 'Z'
226    elif nmi_flag != 0:
227        irq_char = 'z'
228    elif hard_irq != 0 and soft_irq != 0:
229        irq_char = 'H'
230    elif hard_irq != 0:
231        irq_char = 'h'
232    elif soft_irq != 0:
233        irq_char = 's'
234    result += irq_char
235
236    if preempt_count != 0:
237        result += "0123456789abcdef"[preempt_count & 0x0F]
238    else:
239        result += "."
240
241    return result
242
243
244COMM_STR_MAX = 16
245PID_STR_MAX = 6
246TGID_STR_MAX = 5
247CPU_STR_MAX = 3
248TS_SECS_MIN = 5
249TS_MICRO_SECS = 6
250
251
252def generate_one_event_str(data, cpu_id, time_stamp, one_event):
253    pid = int.from_bytes(one_event["fields"]["common_pid"], byteorder='little')
254    event_str = ""
255
256    cmd_line = cmd_lines.get(pid, "")
257    if pid == 0:
258        event_str += "<idle>"
259    elif cmd_line != "":
260        event_str += cmd_line
261    else:
262        event_str += "<...>"
263    event_str = event_str.rjust(COMM_STR_MAX)
264    event_str += "-"
265
266    event_str += str(pid).ljust(PID_STR_MAX)
267
268    tgid = tgids.get(pid, "")
269    if tgid != "":
270        event_str += "(" + tgid.rjust(TGID_STR_MAX) + ")"
271    else:
272        event_str += "(-----)"
273
274    event_str += " [" + str(cpu_id).zfill(CPU_STR_MAX) + "] "
275
276    flags = int.from_bytes(one_event["fields"]["common_flags"], byteorder='little')
277    preempt_count = int.from_bytes(one_event["fields"]["common_preempt_count"], byteorder='little')
278    if flags | preempt_count != 0:
279        event_str += trace_flags_to_str(flags, preempt_count) + " "
280    else:
281        event_str += ".... "
282
283    if time_stamp % 1000 >= 500:
284        time_stamp_str = str((time_stamp // 1000) + 1)
285    else:
286        time_stamp_str = str(time_stamp // 1000)
287    ts_secs = time_stamp_str[:-6].rjust(TS_SECS_MIN)
288    ts_micro_secs = time_stamp_str[-6:]
289    event_str += ts_secs + "." + ts_micro_secs + ": "
290
291    parse_result = parse_functions.parse(one_event["print_fmt"], data, one_event)
292    if parse_result is None:
293        print("Error: function parse_" + str(one_event["name"]) + " not found")
294    else:
295        event_str += str(one_event["name"]) + ": " + parse_result
296
297    return event_str
298
299
300def parse_one_event(data, event_id, cpu_id, time_stamp):
301    event_format = events_format.get(event_id, "")
302    if event_format == "":
303        return ""
304
305    fields = event_format["fields"]
306    one_event = {}
307    one_event["id"] = event_id
308    one_event["name"] = event_format["name"]
309    one_event["print_fmt"] = event_format["print_fmt"]
310    one_event["fields"] = {}
311    for field in fields:
312        offset = field["offset"]
313        size = field["size"]
314        one_event["fields"][field["name"]] = data[offset:offset + size]
315
316    return generate_one_event_str(data, cpu_id, time_stamp, one_event)
317
318
319RMQ_ENTRY_ALIGN_MASK = 3
320
321
322def parse_cpu_raw_one_page(data, result):
323    global cpu_raw_read_pos
324    end_pos = cpu_raw_read_pos + READ_PAGE_SIZE
325    page_header = parse_page_header(data)
326
327    while cpu_raw_read_pos < end_pos:
328        event_header = parse_event_header(data)
329        if event_header.get("size", 0) == 0:
330            break
331
332        time_stamp = page_header.get("time_stamp", 0) + event_header.get("time_stamp_offset", 0)
333        event_id = struct.unpack('H', data[cpu_raw_read_pos:cpu_raw_read_pos + INT16_DATA_READ_LEN])[0]
334
335        one_event_data = data[cpu_raw_read_pos:cpu_raw_read_pos + event_header.get("size", 0)]
336        one_event_result = parse_one_event(one_event_data, event_id, page_header.get("core_id", 0), time_stamp)
337        if one_event_result != "":
338            result.append([time_stamp, one_event_result])
339
340        evt_size = ((event_header.get("size", 0) + RMQ_ENTRY_ALIGN_MASK) & (~RMQ_ENTRY_ALIGN_MASK))
341        cpu_raw_read_pos += evt_size
342    cpu_raw_read_pos = end_pos
343
344
345def parse_cpu_raw(data, data_len, result):
346    global cpu_raw_read_pos
347    cpu_raw_read_pos = 0
348
349    while cpu_raw_read_pos < data_len:
350        parse_cpu_raw_one_page(data, result)
351
352
353def parse_events_format_field(field_line):
354    field_info = field_line.split(";")
355    field_info[0] = field_info[0].lstrip()
356    field_info[1] = field_info[1].lstrip()
357    field_info[2] = field_info[2].lstrip()
358    field_info[3] = field_info[3].lstrip()
359
360    field = {}
361    type_name_pos = field_info[0].rfind(" ")
362    field["type"] = field_info[0][len("field:"):type_name_pos]
363    field["name"] = field_info[0][type_name_pos + 1:]
364    field["offset"] = int(field_info[1][len("offset:"):])
365    field["size"] = int(field_info[2][len("size:"):])
366    field["signed"] = field_info[3][len("signed:"):]
367
368    return field
369
370
371def parse_events_format(data):
372    name_line_prefix = "name: "
373    id_line_prefix = "ID: "
374    field_line_prefix = "field:"
375    print_fmt_line_prefix = "print fmt: "
376
377    events_format_lines = data.decode('utf-8').split("\n")
378    event_format = {}
379    event_format["fields"] = []
380    for line in events_format_lines:
381        line = line.lstrip()
382        if line.startswith(name_line_prefix):
383            event_format["name"] = line[len(name_line_prefix):]
384        elif line.startswith(id_line_prefix):
385            event_format["id"] = int(line[len(id_line_prefix):])
386        elif line.startswith(field_line_prefix):
387            event_format["fields"].append(parse_events_format_field(line))
388        elif line.startswith(print_fmt_line_prefix):
389            event_format["print_fmt"] = line[len(print_fmt_line_prefix):]
390            events_format[event_format["id"]] = copy.deepcopy(event_format)
391            event_format["fields"].clear()
392
393
394def parse_cmdlines(data):
395    cmd_lines_list = data.decode('utf-8').split("\n")
396    for cmd_line in cmd_lines_list:
397        pos = cmd_line.find(" ")
398        if pos != -1:
399            cmd_lines[int(cmd_line[:pos])] = cmd_line[pos + 1:]
400
401
402def parse_tgids(data):
403    tgids_lines_list = data.decode('utf-8').split("\n")
404    for tgids_line in tgids_lines_list:
405        pos = tgids_line.find(" ")
406        if pos != -1:
407            tgids[int(tgids_line[:pos])] = tgids_line[pos + 1:]
408
409
410def parse_header_page(data):
411    print("in parse_header_page")
412
413
414def parse_printk_formats(data):
415    print("in parse_printk_formats")
416
417
418def parse_kallsyms(data):
419    print("in parse_kallsyms")
420
421
422def parse_trace_base_data(infile, file_size):
423    while infile.tell() < file_size:
424        data_type = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0]
425        data_len = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0]
426        data = infile.read(data_len)
427        if data_type == CONTENT_TYPE_HEADER_PAGE:
428            parse_header_page(data)
429        elif data_type == CONTENT_TYPE_CMDLINES:
430            parse_cmdlines(data)
431        elif data_type == CONTENT_TYPE_TGIDS:
432            parse_tgids(data)
433        elif data_type == CONTENT_TYPE_EVENTS_FORMAT:
434            parse_events_format(data)
435        elif data_type == CONTENT_TYPE_PRINTK_FORMATS:
436            parse_printk_formats(data)
437        elif data_type == CONTENT_TYPE_KALLSYMS:
438            parse_kallsyms(data)
439
440
441def parse_trace_events_data(infile, file_size, cpu_nums, result):
442    while infile.tell() < file_size:
443        data_type = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0]
444        data_len = struct.unpack('L', infile.read(INT32_DATA_READ_LEN))[0]
445        data = infile.read(data_len)
446
447        if data_type >= CONTENT_TYPE_CPU_RAW and data_type < CONTENT_TYPE_CPU_RAW + cpu_nums:
448            parse_cpu_raw(data, data_len, result)
449
450
451def parse_binary_trace_file():
452    infile_flags = os.O_RDONLY | os.O_BINARY
453    infile_mode = stat.S_IRUSR
454    infile = os.fdopen(os.open(binary_file, infile_flags, infile_mode), 'rb')
455
456    outfile_flags = os.O_RDWR | os.O_CREAT
457    outfile_mode = stat.S_IRUSR | stat.S_IWUSR
458    outfile = os.fdopen(os.open(out_file, outfile_flags, outfile_mode), 'w', encoding="utf-8")
459
460    trace_header = parse_trace_header(infile)
461    cpu_nums = (trace_header.get("reserved", 0) >> 1) & 0xf
462
463    outfile.write(TRACE_TXT_HEADER_FORMAT)
464    trace_file_size = os.path.getsize(binary_file)
465    parse_trace_base_data(infile, trace_file_size)
466    infile.seek(TRACE_HEADER_SIZE)
467
468    result = []
469    parse_trace_events_data(infile, trace_file_size, cpu_nums, result)
470    result = sorted(result, key=lambda x: x[0])
471    for line in result:
472        outfile.write("{}\n".format(line[1]))
473
474    outfile.close()
475    infile.close()
476
477
478def main():
479    parse_options()
480
481    if text_file != '':
482        parse_text_trace_file()
483    else:
484        parse_binary_trace_file()
485
486
487if __name__ == '__main__':
488    main()