162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci#include <linux/version.h>
462306a36Sopenharmony_ci#include <linux/ptrace.h>
562306a36Sopenharmony_ci#include <uapi/linux/bpf.h>
662306a36Sopenharmony_ci#include <bpf/bpf_helpers.h>
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci/*
962306a36Sopenharmony_ci * The CPU number, cstate number and pstate number are based
1062306a36Sopenharmony_ci * on 96boards Hikey with octa CA53 CPUs.
1162306a36Sopenharmony_ci *
1262306a36Sopenharmony_ci * Every CPU have three idle states for cstate:
1362306a36Sopenharmony_ci *   WFI, CPU_OFF, CLUSTER_OFF
1462306a36Sopenharmony_ci *
1562306a36Sopenharmony_ci * Every CPU have 5 operating points:
1662306a36Sopenharmony_ci *   208MHz, 432MHz, 729MHz, 960MHz, 1200MHz
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * This code is based on these assumption and other platforms
1962306a36Sopenharmony_ci * need to adjust these definitions.
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ci#define MAX_CPU			8
2262306a36Sopenharmony_ci#define MAX_PSTATE_ENTRIES	5
2362306a36Sopenharmony_ci#define MAX_CSTATE_ENTRIES	3
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_cistatic int cpu_opps[] = { 208000, 432000, 729000, 960000, 1200000 };
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_ci/*
2862306a36Sopenharmony_ci * my_map structure is used to record cstate and pstate index and
2962306a36Sopenharmony_ci * timestamp (Idx, Ts), when new event incoming we need to update
3062306a36Sopenharmony_ci * combination for new state index and timestamp (Idx`, Ts`).
3162306a36Sopenharmony_ci *
3262306a36Sopenharmony_ci * Based on (Idx, Ts) and (Idx`, Ts`) we can calculate the time
3362306a36Sopenharmony_ci * interval for the previous state: Duration(Idx) = Ts` - Ts.
3462306a36Sopenharmony_ci *
3562306a36Sopenharmony_ci * Every CPU has one below array for recording state index and
3662306a36Sopenharmony_ci * timestamp, and record for cstate and pstate saperately:
3762306a36Sopenharmony_ci *
3862306a36Sopenharmony_ci * +--------------------------+
3962306a36Sopenharmony_ci * | cstate timestamp         |
4062306a36Sopenharmony_ci * +--------------------------+
4162306a36Sopenharmony_ci * | cstate index             |
4262306a36Sopenharmony_ci * +--------------------------+
4362306a36Sopenharmony_ci * | pstate timestamp         |
4462306a36Sopenharmony_ci * +--------------------------+
4562306a36Sopenharmony_ci * | pstate index             |
4662306a36Sopenharmony_ci * +--------------------------+
4762306a36Sopenharmony_ci */
4862306a36Sopenharmony_ci#define MAP_OFF_CSTATE_TIME	0
4962306a36Sopenharmony_ci#define MAP_OFF_CSTATE_IDX	1
5062306a36Sopenharmony_ci#define MAP_OFF_PSTATE_TIME	2
5162306a36Sopenharmony_ci#define MAP_OFF_PSTATE_IDX	3
5262306a36Sopenharmony_ci#define MAP_OFF_NUM		4
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_cistruct {
5562306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_ARRAY);
5662306a36Sopenharmony_ci	__type(key, u32);
5762306a36Sopenharmony_ci	__type(value, u64);
5862306a36Sopenharmony_ci	__uint(max_entries, MAX_CPU * MAP_OFF_NUM);
5962306a36Sopenharmony_ci} my_map SEC(".maps");
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/* cstate_duration records duration time for every idle state per CPU */
6262306a36Sopenharmony_cistruct {
6362306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_ARRAY);
6462306a36Sopenharmony_ci	__type(key, u32);
6562306a36Sopenharmony_ci	__type(value, u64);
6662306a36Sopenharmony_ci	__uint(max_entries, MAX_CPU * MAX_CSTATE_ENTRIES);
6762306a36Sopenharmony_ci} cstate_duration SEC(".maps");
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci/* pstate_duration records duration time for every operating point per CPU */
7062306a36Sopenharmony_cistruct {
7162306a36Sopenharmony_ci	__uint(type, BPF_MAP_TYPE_ARRAY);
7262306a36Sopenharmony_ci	__type(key, u32);
7362306a36Sopenharmony_ci	__type(value, u64);
7462306a36Sopenharmony_ci	__uint(max_entries, MAX_CPU * MAX_PSTATE_ENTRIES);
7562306a36Sopenharmony_ci} pstate_duration SEC(".maps");
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci/*
7862306a36Sopenharmony_ci * The trace events for cpu_idle and cpu_frequency are taken from:
7962306a36Sopenharmony_ci * /sys/kernel/tracing/events/power/cpu_idle/format
8062306a36Sopenharmony_ci * /sys/kernel/tracing/events/power/cpu_frequency/format
8162306a36Sopenharmony_ci *
8262306a36Sopenharmony_ci * These two events have same format, so define one common structure.
8362306a36Sopenharmony_ci */
8462306a36Sopenharmony_cistruct cpu_args {
8562306a36Sopenharmony_ci	u64 pad;
8662306a36Sopenharmony_ci	u32 state;
8762306a36Sopenharmony_ci	u32 cpu_id;
8862306a36Sopenharmony_ci};
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci/* calculate pstate index, returns MAX_PSTATE_ENTRIES for failure */
9162306a36Sopenharmony_cistatic u32 find_cpu_pstate_idx(u32 frequency)
9262306a36Sopenharmony_ci{
9362306a36Sopenharmony_ci	u32 i;
9462306a36Sopenharmony_ci
9562306a36Sopenharmony_ci	for (i = 0; i < sizeof(cpu_opps) / sizeof(u32); i++) {
9662306a36Sopenharmony_ci		if (frequency == cpu_opps[i])
9762306a36Sopenharmony_ci			return i;
9862306a36Sopenharmony_ci	}
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	return i;
10162306a36Sopenharmony_ci}
10262306a36Sopenharmony_ci
10362306a36Sopenharmony_ciSEC("tracepoint/power/cpu_idle")
10462306a36Sopenharmony_ciint bpf_prog1(struct cpu_args *ctx)
10562306a36Sopenharmony_ci{
10662306a36Sopenharmony_ci	u64 *cts, *pts, *cstate, *pstate, prev_state, cur_ts, delta;
10762306a36Sopenharmony_ci	u32 key, cpu, pstate_idx;
10862306a36Sopenharmony_ci	u64 *val;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	if (ctx->cpu_id > MAX_CPU)
11162306a36Sopenharmony_ci		return 0;
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ci	cpu = ctx->cpu_id;
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_ci	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_TIME;
11662306a36Sopenharmony_ci	cts = bpf_map_lookup_elem(&my_map, &key);
11762306a36Sopenharmony_ci	if (!cts)
11862306a36Sopenharmony_ci		return 0;
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ci	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
12162306a36Sopenharmony_ci	cstate = bpf_map_lookup_elem(&my_map, &key);
12262306a36Sopenharmony_ci	if (!cstate)
12362306a36Sopenharmony_ci		return 0;
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
12662306a36Sopenharmony_ci	pts = bpf_map_lookup_elem(&my_map, &key);
12762306a36Sopenharmony_ci	if (!pts)
12862306a36Sopenharmony_ci		return 0;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
13162306a36Sopenharmony_ci	pstate = bpf_map_lookup_elem(&my_map, &key);
13262306a36Sopenharmony_ci	if (!pstate)
13362306a36Sopenharmony_ci		return 0;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	prev_state = *cstate;
13662306a36Sopenharmony_ci	*cstate = ctx->state;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	if (!*cts) {
13962306a36Sopenharmony_ci		*cts = bpf_ktime_get_ns();
14062306a36Sopenharmony_ci		return 0;
14162306a36Sopenharmony_ci	}
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	cur_ts = bpf_ktime_get_ns();
14462306a36Sopenharmony_ci	delta = cur_ts - *cts;
14562306a36Sopenharmony_ci	*cts = cur_ts;
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	/*
14862306a36Sopenharmony_ci	 * When state doesn't equal to (u32)-1, the cpu will enter
14962306a36Sopenharmony_ci	 * one idle state; for this case we need to record interval
15062306a36Sopenharmony_ci	 * for the pstate.
15162306a36Sopenharmony_ci	 *
15262306a36Sopenharmony_ci	 *                 OPP2
15362306a36Sopenharmony_ci	 *            +---------------------+
15462306a36Sopenharmony_ci	 *     OPP1   |                     |
15562306a36Sopenharmony_ci	 *   ---------+                     |
15662306a36Sopenharmony_ci	 *                                  |  Idle state
15762306a36Sopenharmony_ci	 *                                  +---------------
15862306a36Sopenharmony_ci	 *
15962306a36Sopenharmony_ci	 *            |<- pstate duration ->|
16062306a36Sopenharmony_ci	 *            ^                     ^
16162306a36Sopenharmony_ci	 *           pts                  cur_ts
16262306a36Sopenharmony_ci	 */
16362306a36Sopenharmony_ci	if (ctx->state != (u32)-1) {
16462306a36Sopenharmony_ci
16562306a36Sopenharmony_ci		/* record pstate after have first cpu_frequency event */
16662306a36Sopenharmony_ci		if (!*pts)
16762306a36Sopenharmony_ci			return 0;
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_ci		delta = cur_ts - *pts;
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci		pstate_idx = find_cpu_pstate_idx(*pstate);
17262306a36Sopenharmony_ci		if (pstate_idx >= MAX_PSTATE_ENTRIES)
17362306a36Sopenharmony_ci			return 0;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci		key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
17662306a36Sopenharmony_ci		val = bpf_map_lookup_elem(&pstate_duration, &key);
17762306a36Sopenharmony_ci		if (val)
17862306a36Sopenharmony_ci			__sync_fetch_and_add((long *)val, delta);
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	/*
18162306a36Sopenharmony_ci	 * When state equal to (u32)-1, the cpu just exits from one
18262306a36Sopenharmony_ci	 * specific idle state; for this case we need to record
18362306a36Sopenharmony_ci	 * interval for the pstate.
18462306a36Sopenharmony_ci	 *
18562306a36Sopenharmony_ci	 *       OPP2
18662306a36Sopenharmony_ci	 *   -----------+
18762306a36Sopenharmony_ci	 *              |                          OPP1
18862306a36Sopenharmony_ci	 *              |                     +-----------
18962306a36Sopenharmony_ci	 *              |     Idle state      |
19062306a36Sopenharmony_ci	 *              +---------------------+
19162306a36Sopenharmony_ci	 *
19262306a36Sopenharmony_ci	 *              |<- cstate duration ->|
19362306a36Sopenharmony_ci	 *              ^                     ^
19462306a36Sopenharmony_ci	 *             cts                  cur_ts
19562306a36Sopenharmony_ci	 */
19662306a36Sopenharmony_ci	} else {
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci		key = cpu * MAX_CSTATE_ENTRIES + prev_state;
19962306a36Sopenharmony_ci		val = bpf_map_lookup_elem(&cstate_duration, &key);
20062306a36Sopenharmony_ci		if (val)
20162306a36Sopenharmony_ci			__sync_fetch_and_add((long *)val, delta);
20262306a36Sopenharmony_ci	}
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	/* Update timestamp for pstate as new start time */
20562306a36Sopenharmony_ci	if (*pts)
20662306a36Sopenharmony_ci		*pts = cur_ts;
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	return 0;
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ciSEC("tracepoint/power/cpu_frequency")
21262306a36Sopenharmony_ciint bpf_prog2(struct cpu_args *ctx)
21362306a36Sopenharmony_ci{
21462306a36Sopenharmony_ci	u64 *pts, *cstate, *pstate, prev_state, cur_ts, delta;
21562306a36Sopenharmony_ci	u32 key, cpu, pstate_idx;
21662306a36Sopenharmony_ci	u64 *val;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	cpu = ctx->cpu_id;
21962306a36Sopenharmony_ci
22062306a36Sopenharmony_ci	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_TIME;
22162306a36Sopenharmony_ci	pts = bpf_map_lookup_elem(&my_map, &key);
22262306a36Sopenharmony_ci	if (!pts)
22362306a36Sopenharmony_ci		return 0;
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_ci	key = cpu * MAP_OFF_NUM + MAP_OFF_PSTATE_IDX;
22662306a36Sopenharmony_ci	pstate = bpf_map_lookup_elem(&my_map, &key);
22762306a36Sopenharmony_ci	if (!pstate)
22862306a36Sopenharmony_ci		return 0;
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci	key = cpu * MAP_OFF_NUM + MAP_OFF_CSTATE_IDX;
23162306a36Sopenharmony_ci	cstate = bpf_map_lookup_elem(&my_map, &key);
23262306a36Sopenharmony_ci	if (!cstate)
23362306a36Sopenharmony_ci		return 0;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	prev_state = *pstate;
23662306a36Sopenharmony_ci	*pstate = ctx->state;
23762306a36Sopenharmony_ci
23862306a36Sopenharmony_ci	if (!*pts) {
23962306a36Sopenharmony_ci		*pts = bpf_ktime_get_ns();
24062306a36Sopenharmony_ci		return 0;
24162306a36Sopenharmony_ci	}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	cur_ts = bpf_ktime_get_ns();
24462306a36Sopenharmony_ci	delta = cur_ts - *pts;
24562306a36Sopenharmony_ci	*pts = cur_ts;
24662306a36Sopenharmony_ci
24762306a36Sopenharmony_ci	/* When CPU is in idle, bail out to skip pstate statistics */
24862306a36Sopenharmony_ci	if (*cstate != (u32)(-1))
24962306a36Sopenharmony_ci		return 0;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	/*
25262306a36Sopenharmony_ci	 * The cpu changes to another different OPP (in below diagram
25362306a36Sopenharmony_ci	 * change frequency from OPP3 to OPP1), need recording interval
25462306a36Sopenharmony_ci	 * for previous frequency OPP3 and update timestamp as start
25562306a36Sopenharmony_ci	 * time for new frequency OPP1.
25662306a36Sopenharmony_ci	 *
25762306a36Sopenharmony_ci	 *                 OPP3
25862306a36Sopenharmony_ci	 *            +---------------------+
25962306a36Sopenharmony_ci	 *     OPP2   |                     |
26062306a36Sopenharmony_ci	 *   ---------+                     |
26162306a36Sopenharmony_ci	 *                                  |    OPP1
26262306a36Sopenharmony_ci	 *                                  +---------------
26362306a36Sopenharmony_ci	 *
26462306a36Sopenharmony_ci	 *            |<- pstate duration ->|
26562306a36Sopenharmony_ci	 *            ^                     ^
26662306a36Sopenharmony_ci	 *           pts                  cur_ts
26762306a36Sopenharmony_ci	 */
26862306a36Sopenharmony_ci	pstate_idx = find_cpu_pstate_idx(*pstate);
26962306a36Sopenharmony_ci	if (pstate_idx >= MAX_PSTATE_ENTRIES)
27062306a36Sopenharmony_ci		return 0;
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ci	key = cpu * MAX_PSTATE_ENTRIES + pstate_idx;
27362306a36Sopenharmony_ci	val = bpf_map_lookup_elem(&pstate_duration, &key);
27462306a36Sopenharmony_ci	if (val)
27562306a36Sopenharmony_ci		__sync_fetch_and_add((long *)val, delta);
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	return 0;
27862306a36Sopenharmony_ci}
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_cichar _license[] SEC("license") = "GPL";
28162306a36Sopenharmony_ciu32 _version SEC("version") = LINUX_VERSION_CODE;
282