162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci 362306a36Sopenharmony_ci#include <linux/version.h> 462306a36Sopenharmony_ci#include <linux/ptrace.h> 562306a36Sopenharmony_ci#include <uapi/linux/bpf.h> 662306a36Sopenharmony_ci#include <bpf/bpf_helpers.h> 762306a36Sopenharmony_ci 862306a36Sopenharmony_ci/* 962306a36Sopenharmony_ci * The CPU number, cstate number and pstate number are based 1062306a36Sopenharmony_ci * on 96boards Hikey with octa CA53 CPUs. 1162306a36Sopenharmony_ci * 1262306a36Sopenharmony_ci * Every CPU have three idle states for cstate: 1362306a36Sopenharmony_ci * WFI, CPU_OFF, CLUSTER_OFF 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * Every CPU have 5 operating points: 1662306a36Sopenharmony_ci * 208MHz, 432MHz, 729MHz, 960MHz, 1200MHz 1762306a36Sopenharmony_ci * 1862306a36Sopenharmony_ci * This code is based on these assumption and other platforms 1962306a36Sopenharmony_ci * need to adjust these definitions. 2062306a36Sopenharmony_ci */ 2162306a36Sopenharmony_ci#define MAX_CPU 8 2262306a36Sopenharmony_ci#define MAX_PSTATE_ENTRIES 5 2362306a36Sopenharmony_ci#define MAX_CSTATE_ENTRIES 3 2462306a36Sopenharmony_ci 2562306a36Sopenharmony_cistatic int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 }; 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* 2862306a36Sopenharmony_ci * my_map structure is used to record cstate and pstate index and 2962306a36Sopenharmony_ci * timestamp (Idx, Ts), when new event incoming we need to update 3062306a36Sopenharmony_ci * combination for new state index and timestamp (Idx`, Ts`). 3162306a36Sopenharmony_ci * 3262306a36Sopenharmony_ci * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time 3362306a36Sopenharmony_ci * interval for the previous state: Duration(Idx) = Ts` - Ts. 3462306a36Sopenharmony_ci * 3562306a36Sopenharmony_ci * Every CPU has one below array for recording state index and 3662306a36Sopenharmony_ci * timestamp, and record for cstate and pstate saperately: 3762306a36Sopenharmony_ci * 3862306a36Sopenharmony_ci * +--------------------------+ 3962306a36Sopenharmony_ci * | cstate timestamp | 4062306a36Sopenharmony_ci * +--------------------------+ 4162306a36Sopenharmony_ci * | cstate index | 4262306a36Sopenharmony_ci * +--------------------------+ 4362306a36Sopenharmony_ci * | pstate timestamp | 4462306a36Sopenharmony_ci * +--------------------------+ 4562306a36Sopenharmony_ci * | pstate index | 4662306a36Sopenharmony_ci * +--------------------------+ 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci#define MAP_OFF_CSTATE_TIME 0 4962306a36Sopenharmony_ci#define MAP_OFF_CSTATE_IDX 1 5062306a36Sopenharmony_ci#define MAP_OFF_PSTATE_TIME 2 5162306a36Sopenharmony_ci#define MAP_OFF_PSTATE_IDX 3 5262306a36Sopenharmony_ci#define MAP_OFF_NUM 4 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistruct { 5562306a36Sopenharmony_ci __uint(type, BPF_MAP_TYPE_ARRAY); 5662306a36Sopenharmony_ci __type(key, u32); 5762306a36Sopenharmony_ci __type(value, u64); 5862306a36Sopenharmony_ci __uint(max_entries, MAX_CPU * MAP_OFF_NUM); 5962306a36Sopenharmony_ci} my_map SEC(".maps"); 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci/* cstate_duration records duration time for every idle state per CPU */ 6262306a36Sopenharmony_cistruct { 6362306a36Sopenharmony_ci __uint(type, BPF_MAP_TYPE_ARRAY); 6462306a36Sopenharmony_ci __type(key, u32); 6562306a36Sopenharmony_ci __type(value, u64); 6662306a36Sopenharmony_ci __uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES); 6762306a36Sopenharmony_ci} cstate_duration SEC(".maps"); 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci/* pstate_duration records duration time for every operating point per CPU */ 7062306a36Sopenharmony_cistruct { 7162306a36Sopenharmony_ci __uint(type, BPF_MAP_TYPE_ARRAY); 7262306a36Sopenharmony_ci __type(key, u32); 7362306a36Sopenharmony_ci __type(value, u64); 7462306a36Sopenharmony_ci __uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES); 7562306a36Sopenharmony_ci} pstate_duration SEC(".maps"); 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci/* 7862306a36Sopenharmony_ci * The trace events for cpu_idle and cpu_frequency are taken from: 7962306a36Sopenharmony_ci * /sys/kernel/tracing/events/power/cpu_idle/format 8062306a36Sopenharmony_ci * /sys/kernel/tracing/events/power/cpu_frequency/format 8162306a36Sopenharmony_ci * 8262306a36Sopenharmony_ci * These two events have same format, so define one common structure. 8362306a36Sopenharmony_ci */ 8462306a36Sopenharmony_cistruct cpu_args { 8562306a36Sopenharmony_ci u64 pad; 8662306a36Sopenharmony_ci u32 state; 8762306a36Sopenharmony_ci u32 cpu_id; 8862306a36Sopenharmony_ci}; 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */ 9162306a36Sopenharmony_cistatic u32 find_cpu_pstate_idx(u32 frequency) 9262306a36Sopenharmony_ci{ 9362306a36Sopenharmony_ci u32 i; 9462306a36Sopenharmony_ci 9562306a36Sopenharmony_ci for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) { 9662306a36Sopenharmony_ci if (frequency == cpu_opps[i]) 9762306a36Sopenharmony_ci return i; 9862306a36Sopenharmony_ci } 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci return i; 10162306a36Sopenharmony_ci} 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ciSEC("tracepoint/power/cpu_idle") 10462306a36Sopenharmony_ciint bpf_prog1(struct cpu_args *ctx) 10562306a36Sopenharmony_ci{ 10662306a36Sopenharmony_ci u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta; 10762306a36Sopenharmony_ci u32 key, cpu, pstate_idx; 10862306a36Sopenharmony_ci u64 *val; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci if (ctx->cpu_id > MAX_CPU) 11162306a36Sopenharmony_ci return 0; 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ci cpu = ctx->cpu_id; 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME; 11662306a36Sopenharmony_ci cts = bpf_map_lookup_elem(&my_map, &key); 11762306a36Sopenharmony_ci if (!cts) 11862306a36Sopenharmony_ci return 0; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; 12162306a36Sopenharmony_ci cstate = bpf_map_lookup_elem(&my_map, &key); 12262306a36Sopenharmony_ci if (!cstate) 12362306a36Sopenharmony_ci return 0; 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; 12662306a36Sopenharmony_ci pts = bpf_map_lookup_elem(&my_map, &key); 12762306a36Sopenharmony_ci if (!pts) 12862306a36Sopenharmony_ci return 0; 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; 13162306a36Sopenharmony_ci pstate = bpf_map_lookup_elem(&my_map, &key); 13262306a36Sopenharmony_ci if (!pstate) 13362306a36Sopenharmony_ci return 0; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci prev_state = *cstate; 13662306a36Sopenharmony_ci *cstate = ctx->state; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if (!*cts) { 13962306a36Sopenharmony_ci *cts = bpf_ktime_get_ns(); 14062306a36Sopenharmony_ci return 0; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci 14362306a36Sopenharmony_ci cur_ts = bpf_ktime_get_ns(); 14462306a36Sopenharmony_ci delta = cur_ts - *cts; 14562306a36Sopenharmony_ci *cts = cur_ts; 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_ci /* 14862306a36Sopenharmony_ci * When state doesn't equal to (u32)-1, the cpu will enter 14962306a36Sopenharmony_ci * one idle state; for this case we need to record interval 15062306a36Sopenharmony_ci * for the pstate. 15162306a36Sopenharmony_ci * 15262306a36Sopenharmony_ci * OPP2 15362306a36Sopenharmony_ci * +---------------------+ 15462306a36Sopenharmony_ci * OPP1 | | 15562306a36Sopenharmony_ci * ---------+ | 15662306a36Sopenharmony_ci * | Idle state 15762306a36Sopenharmony_ci * +--------------- 15862306a36Sopenharmony_ci * 15962306a36Sopenharmony_ci * |<- pstate duration ->| 16062306a36Sopenharmony_ci * ^ ^ 16162306a36Sopenharmony_ci * pts cur_ts 16262306a36Sopenharmony_ci */ 16362306a36Sopenharmony_ci if (ctx->state != (u32)-1) { 16462306a36Sopenharmony_ci 16562306a36Sopenharmony_ci /* record pstate after have first cpu_frequency event */ 16662306a36Sopenharmony_ci if (!*pts) 16762306a36Sopenharmony_ci return 0; 16862306a36Sopenharmony_ci 16962306a36Sopenharmony_ci delta = cur_ts - *pts; 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci pstate_idx = find_cpu_pstate_idx(*pstate); 17262306a36Sopenharmony_ci if (pstate_idx >= MAX_PSTATE_ENTRIES) 17362306a36Sopenharmony_ci return 0; 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_ci key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; 17662306a36Sopenharmony_ci val = bpf_map_lookup_elem(&pstate_duration, &key); 17762306a36Sopenharmony_ci if (val) 17862306a36Sopenharmony_ci __sync_fetch_and_add((long *)val, delta); 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci /* 18162306a36Sopenharmony_ci * When state equal to (u32)-1, the cpu just exits from one 18262306a36Sopenharmony_ci * specific idle state; for this case we need to record 18362306a36Sopenharmony_ci * interval for the pstate. 18462306a36Sopenharmony_ci * 18562306a36Sopenharmony_ci * OPP2 18662306a36Sopenharmony_ci * -----------+ 18762306a36Sopenharmony_ci * | OPP1 18862306a36Sopenharmony_ci * | +----------- 18962306a36Sopenharmony_ci * | Idle state | 19062306a36Sopenharmony_ci * +---------------------+ 19162306a36Sopenharmony_ci * 19262306a36Sopenharmony_ci * |<- cstate duration ->| 19362306a36Sopenharmony_ci * ^ ^ 19462306a36Sopenharmony_ci * cts cur_ts 19562306a36Sopenharmony_ci */ 19662306a36Sopenharmony_ci } else { 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ci key = cpu * MAX_CSTATE_ENTRIES + prev_state; 19962306a36Sopenharmony_ci val = bpf_map_lookup_elem(&cstate_duration, &key); 20062306a36Sopenharmony_ci if (val) 20162306a36Sopenharmony_ci __sync_fetch_and_add((long *)val, delta); 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci 20462306a36Sopenharmony_ci /* Update timestamp for pstate as new start time */ 20562306a36Sopenharmony_ci if (*pts) 20662306a36Sopenharmony_ci *pts = cur_ts; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci return 0; 20962306a36Sopenharmony_ci} 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ciSEC("tracepoint/power/cpu_frequency") 21262306a36Sopenharmony_ciint bpf_prog2(struct cpu_args *ctx) 21362306a36Sopenharmony_ci{ 21462306a36Sopenharmony_ci u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta; 21562306a36Sopenharmony_ci u32 key, cpu, pstate_idx; 21662306a36Sopenharmony_ci u64 *val; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci cpu = ctx->cpu_id; 21962306a36Sopenharmony_ci 22062306a36Sopenharmony_ci key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME; 22162306a36Sopenharmony_ci pts = bpf_map_lookup_elem(&my_map, &key); 22262306a36Sopenharmony_ci if (!pts) 22362306a36Sopenharmony_ci return 0; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX; 22662306a36Sopenharmony_ci pstate = bpf_map_lookup_elem(&my_map, &key); 22762306a36Sopenharmony_ci if (!pstate) 22862306a36Sopenharmony_ci return 0; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX; 23162306a36Sopenharmony_ci cstate = bpf_map_lookup_elem(&my_map, &key); 23262306a36Sopenharmony_ci if (!cstate) 23362306a36Sopenharmony_ci return 0; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci prev_state = *pstate; 23662306a36Sopenharmony_ci *pstate = ctx->state; 23762306a36Sopenharmony_ci 23862306a36Sopenharmony_ci if (!*pts) { 23962306a36Sopenharmony_ci *pts = bpf_ktime_get_ns(); 24062306a36Sopenharmony_ci return 0; 24162306a36Sopenharmony_ci } 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci cur_ts = bpf_ktime_get_ns(); 24462306a36Sopenharmony_ci delta = cur_ts - *pts; 24562306a36Sopenharmony_ci *pts = cur_ts; 24662306a36Sopenharmony_ci 24762306a36Sopenharmony_ci /* When CPU is in idle, bail out to skip pstate statistics */ 24862306a36Sopenharmony_ci if (*cstate != (u32)(-1)) 24962306a36Sopenharmony_ci return 0; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci /* 25262306a36Sopenharmony_ci * The cpu changes to another different OPP (in below diagram 25362306a36Sopenharmony_ci * change frequency from OPP3 to OPP1), need recording interval 25462306a36Sopenharmony_ci * for previous frequency OPP3 and update timestamp as start 25562306a36Sopenharmony_ci * time for new frequency OPP1. 25662306a36Sopenharmony_ci * 25762306a36Sopenharmony_ci * OPP3 25862306a36Sopenharmony_ci * +---------------------+ 25962306a36Sopenharmony_ci * OPP2 | | 26062306a36Sopenharmony_ci * ---------+ | 26162306a36Sopenharmony_ci * | OPP1 26262306a36Sopenharmony_ci * +--------------- 26362306a36Sopenharmony_ci * 26462306a36Sopenharmony_ci * |<- pstate duration ->| 26562306a36Sopenharmony_ci * ^ ^ 26662306a36Sopenharmony_ci * pts cur_ts 26762306a36Sopenharmony_ci */ 26862306a36Sopenharmony_ci pstate_idx = find_cpu_pstate_idx(*pstate); 26962306a36Sopenharmony_ci if (pstate_idx >= MAX_PSTATE_ENTRIES) 27062306a36Sopenharmony_ci return 0; 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci key = cpu * MAX_PSTATE_ENTRIES + pstate_idx; 27362306a36Sopenharmony_ci val = bpf_map_lookup_elem(&pstate_duration, &key); 27462306a36Sopenharmony_ci if (val) 27562306a36Sopenharmony_ci __sync_fetch_and_add((long *)val, delta); 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci return 0; 27862306a36Sopenharmony_ci} 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_cichar _license[] SEC("license") = "GPL"; 28162306a36Sopenharmony_ciu32 _version SEC("version") = LINUX_VERSION_CODE; 282