162306a36Sopenharmony_ci#!/usr/bin/env drgn 262306a36Sopenharmony_ci# 362306a36Sopenharmony_ci# Copyright (C) 2023 Tejun Heo <tj@kernel.org> 462306a36Sopenharmony_ci# Copyright (C) 2023 Meta Platforms, Inc. and affiliates. 562306a36Sopenharmony_ci 662306a36Sopenharmony_cidesc = """ 762306a36Sopenharmony_ciThis is a drgn script to monitor workqueues. For more info on drgn, visit 862306a36Sopenharmony_cihttps://github.com/osandov/drgn. 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci total Total number of work items executed by the workqueue. 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci infl The number of currently in-flight work items. 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci CPUtime Total CPU time consumed by the workqueue in seconds. This is 1562306a36Sopenharmony_ci sampled from scheduler ticks and only provides ballpark 1662306a36Sopenharmony_ci measurement. "nohz_full=" CPUs are excluded from measurement. 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci CPUitsv The number of times a concurrency-managed work item hogged CPU 1962306a36Sopenharmony_ci longer than the threshold (workqueue.cpu_intensive_thresh_us) 2062306a36Sopenharmony_ci and got excluded from concurrency management to avoid stalling 2162306a36Sopenharmony_ci other work items. 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_ci CMW/RPR For per-cpu workqueues, the number of concurrency-management 2462306a36Sopenharmony_ci wake-ups while executing a work item of the workqueue. For 2562306a36Sopenharmony_ci unbound workqueues, the number of times a worker was repatriated 2662306a36Sopenharmony_ci to its affinity scope after being migrated to an off-scope CPU by 2762306a36Sopenharmony_ci the scheduler. 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci mayday The number of times the rescuer was requested while waiting for 3062306a36Sopenharmony_ci new worker creation. 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_ci rescued The number of work items executed by the rescuer. 3362306a36Sopenharmony_ci""" 3462306a36Sopenharmony_ci 3562306a36Sopenharmony_ciimport sys 3662306a36Sopenharmony_ciimport signal 3762306a36Sopenharmony_ciimport os 3862306a36Sopenharmony_ciimport re 3962306a36Sopenharmony_ciimport time 4062306a36Sopenharmony_ciimport json 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ciimport drgn 4362306a36Sopenharmony_cifrom drgn.helpers.linux.list import list_for_each_entry,list_empty 4462306a36Sopenharmony_cifrom drgn.helpers.linux.cpumask import for_each_possible_cpu 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ciimport argparse 4762306a36Sopenharmony_ciparser = argparse.ArgumentParser(description=desc, 4862306a36Sopenharmony_ci formatter_class=argparse.RawTextHelpFormatter) 4962306a36Sopenharmony_ciparser.add_argument('workqueue', metavar='REGEX', nargs='*', 5062306a36Sopenharmony_ci help='Target workqueue name patterns (all if empty)') 5162306a36Sopenharmony_ciparser.add_argument('-i', '--interval', metavar='SECS', type=float, default=1, 5262306a36Sopenharmony_ci help='Monitoring interval (0 to print once and exit)') 5362306a36Sopenharmony_ciparser.add_argument('-j', '--json', action='store_true', 5462306a36Sopenharmony_ci help='Output in json') 5562306a36Sopenharmony_ciargs = parser.parse_args() 5662306a36Sopenharmony_ci 5762306a36Sopenharmony_cidef err(s): 5862306a36Sopenharmony_ci print(s, file=sys.stderr, flush=True) 5962306a36Sopenharmony_ci sys.exit(1) 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ciworkqueues = prog['workqueues'] 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ciWQ_UNBOUND = prog['WQ_UNBOUND'] 6462306a36Sopenharmony_ciWQ_MEM_RECLAIM = prog['WQ_MEM_RECLAIM'] 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ciPWQ_STAT_STARTED = prog['PWQ_STAT_STARTED'] # work items started execution 6762306a36Sopenharmony_ciPWQ_STAT_COMPLETED = prog['PWQ_STAT_COMPLETED'] # work items completed execution 6862306a36Sopenharmony_ciPWQ_STAT_CPU_TIME = prog['PWQ_STAT_CPU_TIME'] # total CPU time consumed 6962306a36Sopenharmony_ciPWQ_STAT_CPU_INTENSIVE = prog['PWQ_STAT_CPU_INTENSIVE'] # wq_cpu_intensive_thresh_us violations 7062306a36Sopenharmony_ciPWQ_STAT_CM_WAKEUP = prog['PWQ_STAT_CM_WAKEUP'] # concurrency-management worker wakeups 7162306a36Sopenharmony_ciPWQ_STAT_REPATRIATED = prog['PWQ_STAT_REPATRIATED'] # unbound workers brought back into scope 7262306a36Sopenharmony_ciPWQ_STAT_MAYDAY = prog['PWQ_STAT_MAYDAY'] # maydays to rescuer 7362306a36Sopenharmony_ciPWQ_STAT_RESCUED = prog['PWQ_STAT_RESCUED'] # linked work items executed by rescuer 7462306a36Sopenharmony_ciPWQ_NR_STATS = prog['PWQ_NR_STATS'] 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ciclass WqStats: 7762306a36Sopenharmony_ci def __init__(self, wq): 7862306a36Sopenharmony_ci self.name = wq.name.string_().decode() 7962306a36Sopenharmony_ci self.unbound = wq.flags & WQ_UNBOUND != 0 8062306a36Sopenharmony_ci self.mem_reclaim = wq.flags & WQ_MEM_RECLAIM != 0 8162306a36Sopenharmony_ci self.stats = [0] * PWQ_NR_STATS 8262306a36Sopenharmony_ci for pwq in list_for_each_entry('struct pool_workqueue', wq.pwqs.address_of_(), 'pwqs_node'): 8362306a36Sopenharmony_ci for i in range(PWQ_NR_STATS): 8462306a36Sopenharmony_ci self.stats[i] += int(pwq.stats[i]) 8562306a36Sopenharmony_ci 8662306a36Sopenharmony_ci def dict(self, now): 8762306a36Sopenharmony_ci return { 'timestamp' : now, 8862306a36Sopenharmony_ci 'name' : self.name, 8962306a36Sopenharmony_ci 'unbound' : self.unbound, 9062306a36Sopenharmony_ci 'mem_reclaim' : self.mem_reclaim, 9162306a36Sopenharmony_ci 'started' : self.stats[PWQ_STAT_STARTED], 9262306a36Sopenharmony_ci 'completed' : self.stats[PWQ_STAT_COMPLETED], 9362306a36Sopenharmony_ci 'cpu_time' : self.stats[PWQ_STAT_CPU_TIME], 9462306a36Sopenharmony_ci 'cpu_intensive' : self.stats[PWQ_STAT_CPU_INTENSIVE], 9562306a36Sopenharmony_ci 'cm_wakeup' : self.stats[PWQ_STAT_CM_WAKEUP], 9662306a36Sopenharmony_ci 'repatriated' : self.stats[PWQ_STAT_REPATRIATED], 9762306a36Sopenharmony_ci 'mayday' : self.stats[PWQ_STAT_MAYDAY], 9862306a36Sopenharmony_ci 'rescued' : self.stats[PWQ_STAT_RESCUED], } 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci def table_header_str(): 10162306a36Sopenharmony_ci return f'{"":>24} {"total":>8} {"infl":>5} {"CPUtime":>8} '\ 10262306a36Sopenharmony_ci f'{"CPUitsv":>7} {"CMW/RPR":>7} {"mayday":>7} {"rescued":>7}' 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci def table_row_str(self): 10562306a36Sopenharmony_ci cpu_intensive = '-' 10662306a36Sopenharmony_ci cmw_rpr = '-' 10762306a36Sopenharmony_ci mayday = '-' 10862306a36Sopenharmony_ci rescued = '-' 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci if self.unbound: 11162306a36Sopenharmony_ci cmw_rpr = str(self.stats[PWQ_STAT_REPATRIATED]); 11262306a36Sopenharmony_ci else: 11362306a36Sopenharmony_ci cpu_intensive = str(self.stats[PWQ_STAT_CPU_INTENSIVE]) 11462306a36Sopenharmony_ci cmw_rpr = str(self.stats[PWQ_STAT_CM_WAKEUP]) 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ci if self.mem_reclaim: 11762306a36Sopenharmony_ci mayday = str(self.stats[PWQ_STAT_MAYDAY]) 11862306a36Sopenharmony_ci rescued = str(self.stats[PWQ_STAT_RESCUED]) 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci out = f'{self.name[-24:]:24} ' \ 12162306a36Sopenharmony_ci f'{self.stats[PWQ_STAT_STARTED]:8} ' \ 12262306a36Sopenharmony_ci f'{max(self.stats[PWQ_STAT_STARTED] - self.stats[PWQ_STAT_COMPLETED], 0):5} ' \ 12362306a36Sopenharmony_ci f'{self.stats[PWQ_STAT_CPU_TIME] / 1000000:8.1f} ' \ 12462306a36Sopenharmony_ci f'{cpu_intensive:>7} ' \ 12562306a36Sopenharmony_ci f'{cmw_rpr:>7} ' \ 12662306a36Sopenharmony_ci f'{mayday:>7} ' \ 12762306a36Sopenharmony_ci f'{rescued:>7} ' 12862306a36Sopenharmony_ci return out.rstrip(':') 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ciexit_req = False 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_cidef sigint_handler(signr, frame): 13362306a36Sopenharmony_ci global exit_req 13462306a36Sopenharmony_ci exit_req = True 13562306a36Sopenharmony_ci 13662306a36Sopenharmony_cidef main(): 13762306a36Sopenharmony_ci # handle args 13862306a36Sopenharmony_ci table_fmt = not args.json 13962306a36Sopenharmony_ci interval = args.interval 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci re_str = None 14262306a36Sopenharmony_ci if args.workqueue: 14362306a36Sopenharmony_ci for r in args.workqueue: 14462306a36Sopenharmony_ci if re_str is None: 14562306a36Sopenharmony_ci re_str = r 14662306a36Sopenharmony_ci else: 14762306a36Sopenharmony_ci re_str += '|' + r 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_ci filter_re = re.compile(re_str) if re_str else None 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci # monitoring loop 15262306a36Sopenharmony_ci signal.signal(signal.SIGINT, sigint_handler) 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci while not exit_req: 15562306a36Sopenharmony_ci now = time.time() 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci if table_fmt: 15862306a36Sopenharmony_ci print() 15962306a36Sopenharmony_ci print(WqStats.table_header_str()) 16062306a36Sopenharmony_ci 16162306a36Sopenharmony_ci for wq in list_for_each_entry('struct workqueue_struct', workqueues.address_of_(), 'list'): 16262306a36Sopenharmony_ci stats = WqStats(wq) 16362306a36Sopenharmony_ci if filter_re and not filter_re.search(stats.name): 16462306a36Sopenharmony_ci continue 16562306a36Sopenharmony_ci if table_fmt: 16662306a36Sopenharmony_ci print(stats.table_row_str()) 16762306a36Sopenharmony_ci else: 16862306a36Sopenharmony_ci print(stats.dict(now)) 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci if interval == 0: 17162306a36Sopenharmony_ci break 17262306a36Sopenharmony_ci time.sleep(interval) 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ciif __name__ == "__main__": 17562306a36Sopenharmony_ci main() 176