162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci/* Copyright (c) 2021 Facebook */
462306a36Sopenharmony_ci/* Copyright (c) 2021 Google */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <assert.h>
762306a36Sopenharmony_ci#include <limits.h>
862306a36Sopenharmony_ci#include <unistd.h>
962306a36Sopenharmony_ci#include <sys/file.h>
1062306a36Sopenharmony_ci#include <sys/time.h>
1162306a36Sopenharmony_ci#include <sys/resource.h>
1262306a36Sopenharmony_ci#include <linux/err.h>
1362306a36Sopenharmony_ci#include <linux/zalloc.h>
1462306a36Sopenharmony_ci#include <linux/perf_event.h>
1562306a36Sopenharmony_ci#include <api/fs/fs.h>
1662306a36Sopenharmony_ci#include <perf/bpf_perf.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#include "affinity.h"
1962306a36Sopenharmony_ci#include "bpf_counter.h"
2062306a36Sopenharmony_ci#include "cgroup.h"
2162306a36Sopenharmony_ci#include "counts.h"
2262306a36Sopenharmony_ci#include "debug.h"
2362306a36Sopenharmony_ci#include "evsel.h"
2462306a36Sopenharmony_ci#include "evlist.h"
2562306a36Sopenharmony_ci#include "target.h"
2662306a36Sopenharmony_ci#include "cpumap.h"
2762306a36Sopenharmony_ci#include "thread_map.h"
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci#include "bpf_skel/bperf_cgroup.skel.h"
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic struct perf_event_attr cgrp_switch_attr = {
3262306a36Sopenharmony_ci	.type = PERF_TYPE_SOFTWARE,
3362306a36Sopenharmony_ci	.config = PERF_COUNT_SW_CGROUP_SWITCHES,
3462306a36Sopenharmony_ci	.size = sizeof(cgrp_switch_attr),
3562306a36Sopenharmony_ci	.sample_period = 1,
3662306a36Sopenharmony_ci	.disabled = 1,
3762306a36Sopenharmony_ci};
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_cistatic struct evsel *cgrp_switch;
4062306a36Sopenharmony_cistatic struct bperf_cgroup_bpf *skel;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci#define FD(evt, cpu) (*(int *)xyarray__entry(evt->core.fd, cpu, 0))
4362306a36Sopenharmony_ci
4462306a36Sopenharmony_cistatic int bperf_load_program(struct evlist *evlist)
4562306a36Sopenharmony_ci{
4662306a36Sopenharmony_ci	struct bpf_link *link;
4762306a36Sopenharmony_ci	struct evsel *evsel;
4862306a36Sopenharmony_ci	struct cgroup *cgrp, *leader_cgrp;
4962306a36Sopenharmony_ci	int i, j;
5062306a36Sopenharmony_ci	struct perf_cpu cpu;
5162306a36Sopenharmony_ci	int total_cpus = cpu__max_cpu().cpu;
5262306a36Sopenharmony_ci	int map_size, map_fd;
5362306a36Sopenharmony_ci	int prog_fd, err;
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_ci	skel = bperf_cgroup_bpf__open();
5662306a36Sopenharmony_ci	if (!skel) {
5762306a36Sopenharmony_ci		pr_err("Failed to open cgroup skeleton\n");
5862306a36Sopenharmony_ci		return -1;
5962306a36Sopenharmony_ci	}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci	skel->rodata->num_cpus = total_cpus;
6262306a36Sopenharmony_ci	skel->rodata->num_events = evlist->core.nr_entries / nr_cgroups;
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_ci	BUG_ON(evlist->core.nr_entries % nr_cgroups != 0);
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	/* we need one copy of events per cpu for reading */
6762306a36Sopenharmony_ci	map_size = total_cpus * evlist->core.nr_entries / nr_cgroups;
6862306a36Sopenharmony_ci	bpf_map__set_max_entries(skel->maps.events, map_size);
6962306a36Sopenharmony_ci	bpf_map__set_max_entries(skel->maps.cgrp_idx, nr_cgroups);
7062306a36Sopenharmony_ci	/* previous result is saved in a per-cpu array */
7162306a36Sopenharmony_ci	map_size = evlist->core.nr_entries / nr_cgroups;
7262306a36Sopenharmony_ci	bpf_map__set_max_entries(skel->maps.prev_readings, map_size);
7362306a36Sopenharmony_ci	/* cgroup result needs all events (per-cpu) */
7462306a36Sopenharmony_ci	map_size = evlist->core.nr_entries;
7562306a36Sopenharmony_ci	bpf_map__set_max_entries(skel->maps.cgrp_readings, map_size);
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci	set_max_rlimit();
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	err = bperf_cgroup_bpf__load(skel);
8062306a36Sopenharmony_ci	if (err) {
8162306a36Sopenharmony_ci		pr_err("Failed to load cgroup skeleton\n");
8262306a36Sopenharmony_ci		goto out;
8362306a36Sopenharmony_ci	}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_ci	if (cgroup_is_v2("perf_event") > 0)
8662306a36Sopenharmony_ci		skel->bss->use_cgroup_v2 = 1;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	err = -1;
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	cgrp_switch = evsel__new(&cgrp_switch_attr);
9162306a36Sopenharmony_ci	if (evsel__open_per_cpu(cgrp_switch, evlist->core.all_cpus, -1) < 0) {
9262306a36Sopenharmony_ci		pr_err("Failed to open cgroup switches event\n");
9362306a36Sopenharmony_ci		goto out;
9462306a36Sopenharmony_ci	}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_ci	perf_cpu_map__for_each_cpu(cpu, i, evlist->core.all_cpus) {
9762306a36Sopenharmony_ci		link = bpf_program__attach_perf_event(skel->progs.on_cgrp_switch,
9862306a36Sopenharmony_ci						      FD(cgrp_switch, i));
9962306a36Sopenharmony_ci		if (IS_ERR(link)) {
10062306a36Sopenharmony_ci			pr_err("Failed to attach cgroup program\n");
10162306a36Sopenharmony_ci			err = PTR_ERR(link);
10262306a36Sopenharmony_ci			goto out;
10362306a36Sopenharmony_ci		}
10462306a36Sopenharmony_ci	}
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci	/*
10762306a36Sopenharmony_ci	 * Update cgrp_idx map from cgroup-id to event index.
10862306a36Sopenharmony_ci	 */
10962306a36Sopenharmony_ci	cgrp = NULL;
11062306a36Sopenharmony_ci	i = 0;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	evlist__for_each_entry(evlist, evsel) {
11362306a36Sopenharmony_ci		if (cgrp == NULL || evsel->cgrp == leader_cgrp) {
11462306a36Sopenharmony_ci			leader_cgrp = evsel->cgrp;
11562306a36Sopenharmony_ci			evsel->cgrp = NULL;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci			/* open single copy of the events w/o cgroup */
11862306a36Sopenharmony_ci			err = evsel__open_per_cpu(evsel, evsel->core.cpus, -1);
11962306a36Sopenharmony_ci			if (err == 0)
12062306a36Sopenharmony_ci				evsel->supported = true;
12162306a36Sopenharmony_ci
12262306a36Sopenharmony_ci			map_fd = bpf_map__fd(skel->maps.events);
12362306a36Sopenharmony_ci			perf_cpu_map__for_each_cpu(cpu, j, evsel->core.cpus) {
12462306a36Sopenharmony_ci				int fd = FD(evsel, j);
12562306a36Sopenharmony_ci				__u32 idx = evsel->core.idx * total_cpus + cpu.cpu;
12662306a36Sopenharmony_ci
12762306a36Sopenharmony_ci				bpf_map_update_elem(map_fd, &idx, &fd, BPF_ANY);
12862306a36Sopenharmony_ci			}
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci			evsel->cgrp = leader_cgrp;
13162306a36Sopenharmony_ci		}
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci		if (evsel->cgrp == cgrp)
13462306a36Sopenharmony_ci			continue;
13562306a36Sopenharmony_ci
13662306a36Sopenharmony_ci		cgrp = evsel->cgrp;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci		if (read_cgroup_id(cgrp) < 0) {
13962306a36Sopenharmony_ci			pr_err("Failed to get cgroup id\n");
14062306a36Sopenharmony_ci			err = -1;
14162306a36Sopenharmony_ci			goto out;
14262306a36Sopenharmony_ci		}
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci		map_fd = bpf_map__fd(skel->maps.cgrp_idx);
14562306a36Sopenharmony_ci		err = bpf_map_update_elem(map_fd, &cgrp->id, &i, BPF_ANY);
14662306a36Sopenharmony_ci		if (err < 0) {
14762306a36Sopenharmony_ci			pr_err("Failed to update cgroup index map\n");
14862306a36Sopenharmony_ci			goto out;
14962306a36Sopenharmony_ci		}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci		i++;
15262306a36Sopenharmony_ci	}
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	/*
15562306a36Sopenharmony_ci	 * bperf uses BPF_PROG_TEST_RUN to get accurate reading. Check
15662306a36Sopenharmony_ci	 * whether the kernel support it
15762306a36Sopenharmony_ci	 */
15862306a36Sopenharmony_ci	prog_fd = bpf_program__fd(skel->progs.trigger_read);
15962306a36Sopenharmony_ci	err = bperf_trigger_reading(prog_fd, 0);
16062306a36Sopenharmony_ci	if (err) {
16162306a36Sopenharmony_ci		pr_warning("The kernel does not support test_run for raw_tp BPF programs.\n"
16262306a36Sopenharmony_ci			   "Therefore, --for-each-cgroup might show inaccurate readings\n");
16362306a36Sopenharmony_ci		err = 0;
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ciout:
16762306a36Sopenharmony_ci	return err;
16862306a36Sopenharmony_ci}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_cistatic int bperf_cgrp__load(struct evsel *evsel,
17162306a36Sopenharmony_ci			    struct target *target __maybe_unused)
17262306a36Sopenharmony_ci{
17362306a36Sopenharmony_ci	static bool bperf_loaded = false;
17462306a36Sopenharmony_ci
17562306a36Sopenharmony_ci	evsel->bperf_leader_prog_fd = -1;
17662306a36Sopenharmony_ci	evsel->bperf_leader_link_fd = -1;
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci	if (!bperf_loaded && bperf_load_program(evsel->evlist))
17962306a36Sopenharmony_ci		return -1;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	bperf_loaded = true;
18262306a36Sopenharmony_ci	/* just to bypass bpf_counter_skip() */
18362306a36Sopenharmony_ci	evsel->follower_skel = (struct bperf_follower_bpf *)skel;
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_ci	return 0;
18662306a36Sopenharmony_ci}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_cistatic int bperf_cgrp__install_pe(struct evsel *evsel __maybe_unused,
18962306a36Sopenharmony_ci				  int cpu __maybe_unused, int fd __maybe_unused)
19062306a36Sopenharmony_ci{
19162306a36Sopenharmony_ci	/* nothing to do */
19262306a36Sopenharmony_ci	return 0;
19362306a36Sopenharmony_ci}
19462306a36Sopenharmony_ci
19562306a36Sopenharmony_ci/*
19662306a36Sopenharmony_ci * trigger the leader prog on each cpu, so the cgrp_reading map could get
19762306a36Sopenharmony_ci * the latest results.
19862306a36Sopenharmony_ci */
19962306a36Sopenharmony_cistatic int bperf_cgrp__sync_counters(struct evlist *evlist)
20062306a36Sopenharmony_ci{
20162306a36Sopenharmony_ci	struct perf_cpu cpu;
20262306a36Sopenharmony_ci	int idx;
20362306a36Sopenharmony_ci	int prog_fd = bpf_program__fd(skel->progs.trigger_read);
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	perf_cpu_map__for_each_cpu(cpu, idx, evlist->core.all_cpus)
20662306a36Sopenharmony_ci		bperf_trigger_reading(prog_fd, cpu.cpu);
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_ci	return 0;
20962306a36Sopenharmony_ci}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_cistatic int bperf_cgrp__enable(struct evsel *evsel)
21262306a36Sopenharmony_ci{
21362306a36Sopenharmony_ci	if (evsel->core.idx)
21462306a36Sopenharmony_ci		return 0;
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	bperf_cgrp__sync_counters(evsel->evlist);
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	skel->bss->enabled = 1;
21962306a36Sopenharmony_ci	return 0;
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_cistatic int bperf_cgrp__disable(struct evsel *evsel)
22362306a36Sopenharmony_ci{
22462306a36Sopenharmony_ci	if (evsel->core.idx)
22562306a36Sopenharmony_ci		return 0;
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	bperf_cgrp__sync_counters(evsel->evlist);
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	skel->bss->enabled = 0;
23062306a36Sopenharmony_ci	return 0;
23162306a36Sopenharmony_ci}
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_cistatic int bperf_cgrp__read(struct evsel *evsel)
23462306a36Sopenharmony_ci{
23562306a36Sopenharmony_ci	struct evlist *evlist = evsel->evlist;
23662306a36Sopenharmony_ci	int total_cpus = cpu__max_cpu().cpu;
23762306a36Sopenharmony_ci	struct perf_counts_values *counts;
23862306a36Sopenharmony_ci	struct bpf_perf_event_value *values;
23962306a36Sopenharmony_ci	int reading_map_fd, err = 0;
24062306a36Sopenharmony_ci
24162306a36Sopenharmony_ci	if (evsel->core.idx)
24262306a36Sopenharmony_ci		return 0;
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	bperf_cgrp__sync_counters(evsel->evlist);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci	values = calloc(total_cpus, sizeof(*values));
24762306a36Sopenharmony_ci	if (values == NULL)
24862306a36Sopenharmony_ci		return -ENOMEM;
24962306a36Sopenharmony_ci
25062306a36Sopenharmony_ci	reading_map_fd = bpf_map__fd(skel->maps.cgrp_readings);
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	evlist__for_each_entry(evlist, evsel) {
25362306a36Sopenharmony_ci		__u32 idx = evsel->core.idx;
25462306a36Sopenharmony_ci		int i;
25562306a36Sopenharmony_ci		struct perf_cpu cpu;
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci		err = bpf_map_lookup_elem(reading_map_fd, &idx, values);
25862306a36Sopenharmony_ci		if (err) {
25962306a36Sopenharmony_ci			pr_err("bpf map lookup failed: idx=%u, event=%s, cgrp=%s\n",
26062306a36Sopenharmony_ci			       idx, evsel__name(evsel), evsel->cgrp->name);
26162306a36Sopenharmony_ci			goto out;
26262306a36Sopenharmony_ci		}
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci		perf_cpu_map__for_each_cpu(cpu, i, evsel->core.cpus) {
26562306a36Sopenharmony_ci			counts = perf_counts(evsel->counts, i, 0);
26662306a36Sopenharmony_ci			counts->val = values[cpu.cpu].counter;
26762306a36Sopenharmony_ci			counts->ena = values[cpu.cpu].enabled;
26862306a36Sopenharmony_ci			counts->run = values[cpu.cpu].running;
26962306a36Sopenharmony_ci		}
27062306a36Sopenharmony_ci	}
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_ciout:
27362306a36Sopenharmony_ci	free(values);
27462306a36Sopenharmony_ci	return err;
27562306a36Sopenharmony_ci}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_cistatic int bperf_cgrp__destroy(struct evsel *evsel)
27862306a36Sopenharmony_ci{
27962306a36Sopenharmony_ci	if (evsel->core.idx)
28062306a36Sopenharmony_ci		return 0;
28162306a36Sopenharmony_ci
28262306a36Sopenharmony_ci	bperf_cgroup_bpf__destroy(skel);
28362306a36Sopenharmony_ci	evsel__delete(cgrp_switch);  // it'll destroy on_switch progs too
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	return 0;
28662306a36Sopenharmony_ci}
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_cistruct bpf_counter_ops bperf_cgrp_ops = {
28962306a36Sopenharmony_ci	.load       = bperf_cgrp__load,
29062306a36Sopenharmony_ci	.enable     = bperf_cgrp__enable,
29162306a36Sopenharmony_ci	.disable    = bperf_cgrp__disable,
29262306a36Sopenharmony_ci	.read       = bperf_cgrp__read,
29362306a36Sopenharmony_ci	.install_pe = bperf_cgrp__install_pe,
29462306a36Sopenharmony_ci	.destroy    = bperf_cgrp__destroy,
29562306a36Sopenharmony_ci};
296