1/* Copyright (c) 2016 Facebook 2 * 3 * This program is free software; you can redistribute it and/or 4 * modify it under the terms of version 2 of the GNU General Public 5 * License as published by the Free Software Foundation. 6 */ 7#include <uapi/linux/bpf.h> 8#include <uapi/linux/ptrace.h> 9#include <uapi/linux/perf_event.h> 10#include <linux/version.h> 11#include <linux/sched.h> 12#include <bpf/bpf_helpers.h> 13#include <bpf/bpf_tracing.h> 14 15#define _(P) \ 16 ({ \ 17 typeof(P) val; \ 18 bpf_probe_read_kernel(&val, sizeof(val), &(P)); \ 19 val; \ 20 }) 21 22#define MINBLOCK_US 1 23 24struct key_t { 25 char waker[TASK_COMM_LEN]; 26 char target[TASK_COMM_LEN]; 27 u32 wret; 28 u32 tret; 29}; 30 31struct { 32 __uint(type, BPF_MAP_TYPE_HASH); 33 __type(key, struct key_t); 34 __type(value, u64); 35 __uint(max_entries, 10000); 36} counts SEC(".maps"); 37 38struct { 39 __uint(type, BPF_MAP_TYPE_HASH); 40 __type(key, u32); 41 __type(value, u64); 42 __uint(max_entries, 10000); 43} start SEC(".maps"); 44 45struct wokeby_t { 46 char name[TASK_COMM_LEN]; 47 u32 ret; 48}; 49 50struct { 51 __uint(type, BPF_MAP_TYPE_HASH); 52 __type(key, u32); 53 __type(value, struct wokeby_t); 54 __uint(max_entries, 10000); 55} wokeby SEC(".maps"); 56 57struct { 58 __uint(type, BPF_MAP_TYPE_STACK_TRACE); 59 __uint(key_size, sizeof(u32)); 60 __uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64)); 61 __uint(max_entries, 10000); 62} stackmap SEC(".maps"); 63 64#define STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) 65 66SEC("kprobe/try_to_wake_up") 67int waker(struct pt_regs *ctx) 68{ 69 struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 70 struct wokeby_t woke; 71 u32 pid; 72 73 pid = _(p->pid); 74 75 bpf_get_current_comm(&woke.name, sizeof(woke.name)); 76 woke.ret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); 77 78 bpf_map_update_elem(&wokeby, &pid, &woke, BPF_ANY); 79 return 0; 80} 81 82static inline int update_counts(void *ctx, u32 pid, u64 delta) 83{ 84 struct wokeby_t *woke; 85 u64 zero = 0, *val; 86 struct key_t key; 87 88 __builtin_memset(&key.waker, 0, sizeof(key.waker)); 89 bpf_get_current_comm(&key.target, sizeof(key.target)); 90 key.tret = bpf_get_stackid(ctx, &stackmap, STACKID_FLAGS); 91 key.wret = 0; 92 93 woke = bpf_map_lookup_elem(&wokeby, &pid); 94 if (woke) { 95 key.wret = woke->ret; 96 __builtin_memcpy(&key.waker, woke->name, sizeof(key.waker)); 97 bpf_map_delete_elem(&wokeby, &pid); 98 } 99 100 val = bpf_map_lookup_elem(&counts, &key); 101 if (!val) { 102 bpf_map_update_elem(&counts, &key, &zero, BPF_NOEXIST); 103 val = bpf_map_lookup_elem(&counts, &key); 104 if (!val) 105 return 0; 106 } 107 (*val) += delta; 108 return 0; 109} 110 111#if 1 112/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */ 113struct sched_switch_args { 114 unsigned long long pad; 115 char prev_comm[16]; 116 int prev_pid; 117 int prev_prio; 118 long long prev_state; 119 char next_comm[16]; 120 int next_pid; 121 int next_prio; 122}; 123SEC("tracepoint/sched/sched_switch") 124int oncpu(struct sched_switch_args *ctx) 125{ 126 /* record previous thread sleep time */ 127 u32 pid = ctx->prev_pid; 128#else 129SEC("kprobe/finish_task_switch") 130int oncpu(struct pt_regs *ctx) 131{ 132 struct task_struct *p = (void *) PT_REGS_PARM1(ctx); 133 /* record previous thread sleep time */ 134 u32 pid = _(p->pid); 135#endif 136 u64 delta, ts, *tsp; 137 138 ts = bpf_ktime_get_ns(); 139 bpf_map_update_elem(&start, &pid, &ts, BPF_ANY); 140 141 /* calculate current thread's delta time */ 142 pid = bpf_get_current_pid_tgid(); 143 tsp = bpf_map_lookup_elem(&start, &pid); 144 if (!tsp) 145 /* missed start or filtered */ 146 return 0; 147 148 delta = bpf_ktime_get_ns() - *tsp; 149 bpf_map_delete_elem(&start, &pid); 150 delta = delta / 1000; 151 if (delta < MINBLOCK_US) 152 return 0; 153 154 return update_counts(ctx, pid, delta); 155} 156char _license[] SEC("license") = "GPL"; 157u32 _version SEC("version") = LINUX_VERSION_CODE; 158