162306a36Sopenharmony_ci#!/usr/bin/env drgn
262306a36Sopenharmony_ci#
362306a36Sopenharmony_ci# Copyright (C) 2023 Tejun Heo <tj@kernel.org>
462306a36Sopenharmony_ci# Copyright (C) 2023 Meta Platforms, Inc. and affiliates.
562306a36Sopenharmony_ci
662306a36Sopenharmony_cidesc = """
762306a36Sopenharmony_ciThis is a drgn script to monitor workqueues. For more info on drgn, visit
862306a36Sopenharmony_cihttps://github.com/osandov/drgn.
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci  total    Total number of work items executed by the workqueue.
1162306a36Sopenharmony_ci
1262306a36Sopenharmony_ci  infl     The number of currently in-flight work items.
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci  CPUtime  Total CPU time consumed by the workqueue in seconds. This is
1562306a36Sopenharmony_ci           sampled from scheduler ticks and only provides ballpark
1662306a36Sopenharmony_ci           measurement. "nohz_full=" CPUs are excluded from measurement.
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci  CPUitsv  The number of times a concurrency-managed work item hogged CPU
1962306a36Sopenharmony_ci           longer than the threshold (workqueue.cpu_intensive_thresh_us)
2062306a36Sopenharmony_ci           and got excluded from concurrency management to avoid stalling
2162306a36Sopenharmony_ci           other work items.
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci  CMW/RPR  For per-cpu workqueues, the number of concurrency-management
2462306a36Sopenharmony_ci           wake-ups while executing a work item of the workqueue. For
2562306a36Sopenharmony_ci           unbound workqueues, the number of times a worker was repatriated
2662306a36Sopenharmony_ci           to its affinity scope after being migrated to an off-scope CPU by
2762306a36Sopenharmony_ci           the scheduler.
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci  mayday   The number of times the rescuer was requested while waiting for
3062306a36Sopenharmony_ci           new worker creation.
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_ci  rescued  The number of work items executed by the rescuer.
3362306a36Sopenharmony_ci"""
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ciimport sys
3662306a36Sopenharmony_ciimport signal
3762306a36Sopenharmony_ciimport os
3862306a36Sopenharmony_ciimport re
3962306a36Sopenharmony_ciimport time
4062306a36Sopenharmony_ciimport json
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ciimport drgn
4362306a36Sopenharmony_cifrom drgn.helpers.linux.list import list_for_each_entry,list_empty
4462306a36Sopenharmony_cifrom drgn.helpers.linux.cpumask import for_each_possible_cpu
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ciimport argparse
4762306a36Sopenharmony_ciparser = argparse.ArgumentParser(description=desc,
4862306a36Sopenharmony_ci                                 formatter_class=argparse.RawTextHelpFormatter)
4962306a36Sopenharmony_ciparser.add_argument('workqueue', metavar='REGEX', nargs='*',
5062306a36Sopenharmony_ci                    help='Target workqueue name patterns (all if empty)')
5162306a36Sopenharmony_ciparser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1,
5262306a36Sopenharmony_ci                    help='Monitoring interval (0 to print once and exit)')
5362306a36Sopenharmony_ciparser.add_argument('-j', '--json', action='store_true',
5462306a36Sopenharmony_ci                    help='Output in json')
5562306a36Sopenharmony_ciargs = parser.parse_args()
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_cidef err(s):
5862306a36Sopenharmony_ci    print(s, file=sys.stderr, flush=True)
5962306a36Sopenharmony_ci    sys.exit(1)
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ciworkqueues              = prog['workqueues']
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ciWQ_UNBOUND              = prog['WQ_UNBOUND']
6462306a36Sopenharmony_ciWQ_MEM_RECLAIM          = prog['WQ_MEM_RECLAIM']
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ciPWQ_STAT_STARTED        = prog['PWQ_STAT_STARTED']      # work items started execution
6762306a36Sopenharmony_ciPWQ_STAT_COMPLETED      = prog['PWQ_STAT_COMPLETED']	# work items completed execution
6862306a36Sopenharmony_ciPWQ_STAT_CPU_TIME       = prog['PWQ_STAT_CPU_TIME']     # total CPU time consumed
6962306a36Sopenharmony_ciPWQ_STAT_CPU_INTENSIVE  = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations
7062306a36Sopenharmony_ciPWQ_STAT_CM_WAKEUP      = prog['PWQ_STAT_CM_WAKEUP']    # concurrency-management worker wakeups
7162306a36Sopenharmony_ciPWQ_STAT_REPATRIATED    = prog['PWQ_STAT_REPATRIATED']  # unbound workers brought back into scope
7262306a36Sopenharmony_ciPWQ_STAT_MAYDAY         = prog['PWQ_STAT_MAYDAY']	# maydays to rescuer
7362306a36Sopenharmony_ciPWQ_STAT_RESCUED        = prog['PWQ_STAT_RESCUED']	# linked work items executed by rescuer
7462306a36Sopenharmony_ciPWQ_NR_STATS            = prog['PWQ_NR_STATS']
7562306a36Sopenharmony_ci
7662306a36Sopenharmony_ciclass WqStats:
7762306a36Sopenharmony_ci    def __init__(self, wq):
7862306a36Sopenharmony_ci        self.name = wq.name.string_().decode()
7962306a36Sopenharmony_ci        self.unbound = wq.flags & WQ_UNBOUND != 0
8062306a36Sopenharmony_ci        self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0
8162306a36Sopenharmony_ci        self.stats = [0] * PWQ_NR_STATS
8262306a36Sopenharmony_ci        for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'):
8362306a36Sopenharmony_ci            for i in range(PWQ_NR_STATS):
8462306a36Sopenharmony_ci                self.stats[i] += int(pwq.stats[i])
8562306a36Sopenharmony_ci
8662306a36Sopenharmony_ci    def dict(self, now):
8762306a36Sopenharmony_ci        return { 'timestamp'            : now,
8862306a36Sopenharmony_ci                 'name'                 : self.name,
8962306a36Sopenharmony_ci                 'unbound'              : self.unbound,
9062306a36Sopenharmony_ci                 'mem_reclaim'          : self.mem_reclaim,
9162306a36Sopenharmony_ci                 'started'              : self.stats[PWQ_STAT_STARTED],
9262306a36Sopenharmony_ci                 'completed'            : self.stats[PWQ_STAT_COMPLETED],
9362306a36Sopenharmony_ci                 'cpu_time'             : self.stats[PWQ_STAT_CPU_TIME],
9462306a36Sopenharmony_ci                 'cpu_intensive'        : self.stats[PWQ_STAT_CPU_INTENSIVE],
9562306a36Sopenharmony_ci                 'cm_wakeup'            : self.stats[PWQ_STAT_CM_WAKEUP],
9662306a36Sopenharmony_ci                 'repatriated'          : self.stats[PWQ_STAT_REPATRIATED],
9762306a36Sopenharmony_ci                 'mayday'               : self.stats[PWQ_STAT_MAYDAY],
9862306a36Sopenharmony_ci                 'rescued'              : self.stats[PWQ_STAT_RESCUED], }
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci    def table_header_str():
10162306a36Sopenharmony_ci        return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\
10262306a36Sopenharmony_ci            f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}'
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci    def table_row_str(self):
10562306a36Sopenharmony_ci        cpu_intensive = '-'
10662306a36Sopenharmony_ci        cmw_rpr = '-'
10762306a36Sopenharmony_ci        mayday = '-'
10862306a36Sopenharmony_ci        rescued = '-'
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci        if self.unbound:
11162306a36Sopenharmony_ci            cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]);
11262306a36Sopenharmony_ci        else:
11362306a36Sopenharmony_ci            cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE])
11462306a36Sopenharmony_ci            cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP])
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci        if self.mem_reclaim:
11762306a36Sopenharmony_ci            mayday = str(self.stats[PWQ_STAT_MAYDAY])
11862306a36Sopenharmony_ci            rescued = str(self.stats[PWQ_STAT_RESCUED])
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci        out = f'{self.name[-24:]:24} ' \
12162306a36Sopenharmony_ci              f'{self.stats[PWQ_STAT_STARTED]:8} ' \
12262306a36Sopenharmony_ci              f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \
12362306a36Sopenharmony_ci              f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \
12462306a36Sopenharmony_ci              f'{cpu_intensive:>7} ' \
12562306a36Sopenharmony_ci              f'{cmw_rpr:>7} ' \
12662306a36Sopenharmony_ci              f'{mayday:>7} ' \
12762306a36Sopenharmony_ci              f'{rescued:>7} '
12862306a36Sopenharmony_ci        return out.rstrip(':')
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ciexit_req = False
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_cidef sigint_handler(signr, frame):
13362306a36Sopenharmony_ci    global exit_req
13462306a36Sopenharmony_ci    exit_req = True
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_cidef main():
13762306a36Sopenharmony_ci    # handle args
13862306a36Sopenharmony_ci    table_fmt = not args.json
13962306a36Sopenharmony_ci    interval = args.interval
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci    re_str = None
14262306a36Sopenharmony_ci    if args.workqueue:
14362306a36Sopenharmony_ci        for r in args.workqueue:
14462306a36Sopenharmony_ci            if re_str is None:
14562306a36Sopenharmony_ci                re_str = r
14662306a36Sopenharmony_ci            else:
14762306a36Sopenharmony_ci                re_str += '|' + r
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci    filter_re = re.compile(re_str) if re_str else None
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci    # monitoring loop
15262306a36Sopenharmony_ci    signal.signal(signal.SIGINT, sigint_handler)
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci    while not exit_req:
15562306a36Sopenharmony_ci        now = time.time()
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci        if table_fmt:
15862306a36Sopenharmony_ci            print()
15962306a36Sopenharmony_ci            print(WqStats.table_header_str())
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci        for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'):
16262306a36Sopenharmony_ci            stats = WqStats(wq)
16362306a36Sopenharmony_ci            if filter_re and not filter_re.search(stats.name):
16462306a36Sopenharmony_ci                continue
16562306a36Sopenharmony_ci            if table_fmt:
16662306a36Sopenharmony_ci                print(stats.table_row_str())
16762306a36Sopenharmony_ci            else:
16862306a36Sopenharmony_ci                print(stats.dict(now))
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci        if interval == 0:
17162306a36Sopenharmony_ci            break
17262306a36Sopenharmony_ci        time.sleep(interval)
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ciif __name__ == "__main__":
17562306a36Sopenharmony_ci    main()
176