162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Fast batching percpu counters.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/percpu_counter.h>
762306a36Sopenharmony_ci#include <linux/mutex.h>
862306a36Sopenharmony_ci#include <linux/init.h>
962306a36Sopenharmony_ci#include <linux/cpu.h>
1062306a36Sopenharmony_ci#include <linux/module.h>
1162306a36Sopenharmony_ci#include <linux/debugobjects.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU
1462306a36Sopenharmony_cistatic LIST_HEAD(percpu_counters);
1562306a36Sopenharmony_cistatic DEFINE_SPINLOCK(percpu_counters_lock);
1662306a36Sopenharmony_ci#endif
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_cistatic const struct debug_obj_descr percpu_counter_debug_descr;
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistatic bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
2362306a36Sopenharmony_ci{
2462306a36Sopenharmony_ci	struct percpu_counter *fbc = addr;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	switch (state) {
2762306a36Sopenharmony_ci	case ODEBUG_STATE_ACTIVE:
2862306a36Sopenharmony_ci		percpu_counter_destroy(fbc);
2962306a36Sopenharmony_ci		debug_object_free(fbc, &percpu_counter_debug_descr);
3062306a36Sopenharmony_ci		return true;
3162306a36Sopenharmony_ci	default:
3262306a36Sopenharmony_ci		return false;
3362306a36Sopenharmony_ci	}
3462306a36Sopenharmony_ci}
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_cistatic const struct debug_obj_descr percpu_counter_debug_descr = {
3762306a36Sopenharmony_ci	.name		= "percpu_counter",
3862306a36Sopenharmony_ci	.fixup_free	= percpu_counter_fixup_free,
3962306a36Sopenharmony_ci};
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_cistatic inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	debug_object_init(fbc, &percpu_counter_debug_descr);
4462306a36Sopenharmony_ci	debug_object_activate(fbc, &percpu_counter_debug_descr);
4562306a36Sopenharmony_ci}
4662306a36Sopenharmony_ci
4762306a36Sopenharmony_cistatic inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	debug_object_deactivate(fbc, &percpu_counter_debug_descr);
5062306a36Sopenharmony_ci	debug_object_free(fbc, &percpu_counter_debug_descr);
5162306a36Sopenharmony_ci}
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci#else	/* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
5462306a36Sopenharmony_cistatic inline void debug_percpu_counter_activate(struct percpu_counter *fbc)
5562306a36Sopenharmony_ci{ }
5662306a36Sopenharmony_cistatic inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
5762306a36Sopenharmony_ci{ }
5862306a36Sopenharmony_ci#endif	/* CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER */
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_civoid percpu_counter_set(struct percpu_counter *fbc, s64 amount)
6162306a36Sopenharmony_ci{
6262306a36Sopenharmony_ci	int cpu;
6362306a36Sopenharmony_ci	unsigned long flags;
6462306a36Sopenharmony_ci
6562306a36Sopenharmony_ci	raw_spin_lock_irqsave(&fbc->lock, flags);
6662306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
6762306a36Sopenharmony_ci		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
6862306a36Sopenharmony_ci		*pcount = 0;
6962306a36Sopenharmony_ci	}
7062306a36Sopenharmony_ci	fbc->count = amount;
7162306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&fbc->lock, flags);
7262306a36Sopenharmony_ci}
7362306a36Sopenharmony_ciEXPORT_SYMBOL(percpu_counter_set);
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_ci/*
7662306a36Sopenharmony_ci * local_irq_save() is needed to make the function irq safe:
7762306a36Sopenharmony_ci * - The slow path would be ok as protected by an irq-safe spinlock.
7862306a36Sopenharmony_ci * - this_cpu_add would be ok as it is irq-safe by definition.
7962306a36Sopenharmony_ci * But:
8062306a36Sopenharmony_ci * The decision slow path/fast path and the actual update must be atomic, too.
8162306a36Sopenharmony_ci * Otherwise a call in process context could check the current values and
8262306a36Sopenharmony_ci * decide that the fast path can be used. If now an interrupt occurs before
8362306a36Sopenharmony_ci * the this_cpu_add(), and the interrupt updates this_cpu(*fbc->counters),
8462306a36Sopenharmony_ci * then the this_cpu_add() that is executed after the interrupt has completed
8562306a36Sopenharmony_ci * can produce values larger than "batch" or even overflows.
8662306a36Sopenharmony_ci */
8762306a36Sopenharmony_civoid percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
8862306a36Sopenharmony_ci{
8962306a36Sopenharmony_ci	s64 count;
9062306a36Sopenharmony_ci	unsigned long flags;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	local_irq_save(flags);
9362306a36Sopenharmony_ci	count = __this_cpu_read(*fbc->counters) + amount;
9462306a36Sopenharmony_ci	if (abs(count) >= batch) {
9562306a36Sopenharmony_ci		raw_spin_lock(&fbc->lock);
9662306a36Sopenharmony_ci		fbc->count += count;
9762306a36Sopenharmony_ci		__this_cpu_sub(*fbc->counters, count - amount);
9862306a36Sopenharmony_ci		raw_spin_unlock(&fbc->lock);
9962306a36Sopenharmony_ci	} else {
10062306a36Sopenharmony_ci		this_cpu_add(*fbc->counters, amount);
10162306a36Sopenharmony_ci	}
10262306a36Sopenharmony_ci	local_irq_restore(flags);
10362306a36Sopenharmony_ci}
10462306a36Sopenharmony_ciEXPORT_SYMBOL(percpu_counter_add_batch);
10562306a36Sopenharmony_ci
10662306a36Sopenharmony_ci/*
10762306a36Sopenharmony_ci * For percpu_counter with a big batch, the devication of its count could
10862306a36Sopenharmony_ci * be big, and there is requirement to reduce the deviation, like when the
10962306a36Sopenharmony_ci * counter's batch could be runtime decreased to get a better accuracy,
11062306a36Sopenharmony_ci * which can be achieved by running this sync function on each CPU.
11162306a36Sopenharmony_ci */
11262306a36Sopenharmony_civoid percpu_counter_sync(struct percpu_counter *fbc)
11362306a36Sopenharmony_ci{
11462306a36Sopenharmony_ci	unsigned long flags;
11562306a36Sopenharmony_ci	s64 count;
11662306a36Sopenharmony_ci
11762306a36Sopenharmony_ci	raw_spin_lock_irqsave(&fbc->lock, flags);
11862306a36Sopenharmony_ci	count = __this_cpu_read(*fbc->counters);
11962306a36Sopenharmony_ci	fbc->count += count;
12062306a36Sopenharmony_ci	__this_cpu_sub(*fbc->counters, count);
12162306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&fbc->lock, flags);
12262306a36Sopenharmony_ci}
12362306a36Sopenharmony_ciEXPORT_SYMBOL(percpu_counter_sync);
12462306a36Sopenharmony_ci
12562306a36Sopenharmony_ci/*
12662306a36Sopenharmony_ci * Add up all the per-cpu counts, return the result.  This is a more accurate
12762306a36Sopenharmony_ci * but much slower version of percpu_counter_read_positive().
12862306a36Sopenharmony_ci *
12962306a36Sopenharmony_ci * We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums
13062306a36Sopenharmony_ci * from CPUs that are in the process of being taken offline. Dying cpus have
13162306a36Sopenharmony_ci * been removed from the online mask, but may not have had the hotplug dead
13262306a36Sopenharmony_ci * notifier called to fold the percpu count back into the global counter sum.
13362306a36Sopenharmony_ci * By including dying CPUs in the iteration mask, we avoid this race condition
13462306a36Sopenharmony_ci * so __percpu_counter_sum() just does the right thing when CPUs are being taken
13562306a36Sopenharmony_ci * offline.
13662306a36Sopenharmony_ci */
13762306a36Sopenharmony_cis64 __percpu_counter_sum(struct percpu_counter *fbc)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	s64 ret;
14062306a36Sopenharmony_ci	int cpu;
14162306a36Sopenharmony_ci	unsigned long flags;
14262306a36Sopenharmony_ci
14362306a36Sopenharmony_ci	raw_spin_lock_irqsave(&fbc->lock, flags);
14462306a36Sopenharmony_ci	ret = fbc->count;
14562306a36Sopenharmony_ci	for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
14662306a36Sopenharmony_ci		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
14762306a36Sopenharmony_ci		ret += *pcount;
14862306a36Sopenharmony_ci	}
14962306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&fbc->lock, flags);
15062306a36Sopenharmony_ci	return ret;
15162306a36Sopenharmony_ci}
15262306a36Sopenharmony_ciEXPORT_SYMBOL(__percpu_counter_sum);
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ciint __percpu_counter_init_many(struct percpu_counter *fbc, s64 amount,
15562306a36Sopenharmony_ci			       gfp_t gfp, u32 nr_counters,
15662306a36Sopenharmony_ci			       struct lock_class_key *key)
15762306a36Sopenharmony_ci{
15862306a36Sopenharmony_ci	unsigned long flags __maybe_unused;
15962306a36Sopenharmony_ci	size_t counter_size;
16062306a36Sopenharmony_ci	s32 __percpu *counters;
16162306a36Sopenharmony_ci	u32 i;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	counter_size = ALIGN(sizeof(*counters), __alignof__(*counters));
16462306a36Sopenharmony_ci	counters = __alloc_percpu_gfp(nr_counters * counter_size,
16562306a36Sopenharmony_ci				      __alignof__(*counters), gfp);
16662306a36Sopenharmony_ci	if (!counters) {
16762306a36Sopenharmony_ci		fbc[0].counters = NULL;
16862306a36Sopenharmony_ci		return -ENOMEM;
16962306a36Sopenharmony_ci	}
17062306a36Sopenharmony_ci
17162306a36Sopenharmony_ci	for (i = 0; i < nr_counters; i++) {
17262306a36Sopenharmony_ci		raw_spin_lock_init(&fbc[i].lock);
17362306a36Sopenharmony_ci		lockdep_set_class(&fbc[i].lock, key);
17462306a36Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU
17562306a36Sopenharmony_ci		INIT_LIST_HEAD(&fbc[i].list);
17662306a36Sopenharmony_ci#endif
17762306a36Sopenharmony_ci		fbc[i].count = amount;
17862306a36Sopenharmony_ci		fbc[i].counters = (void *)counters + (i * counter_size);
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci		debug_percpu_counter_activate(&fbc[i]);
18162306a36Sopenharmony_ci	}
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU
18462306a36Sopenharmony_ci	spin_lock_irqsave(&percpu_counters_lock, flags);
18562306a36Sopenharmony_ci	for (i = 0; i < nr_counters; i++)
18662306a36Sopenharmony_ci		list_add(&fbc[i].list, &percpu_counters);
18762306a36Sopenharmony_ci	spin_unlock_irqrestore(&percpu_counters_lock, flags);
18862306a36Sopenharmony_ci#endif
18962306a36Sopenharmony_ci	return 0;
19062306a36Sopenharmony_ci}
19162306a36Sopenharmony_ciEXPORT_SYMBOL(__percpu_counter_init_many);
19262306a36Sopenharmony_ci
19362306a36Sopenharmony_civoid percpu_counter_destroy_many(struct percpu_counter *fbc, u32 nr_counters)
19462306a36Sopenharmony_ci{
19562306a36Sopenharmony_ci	unsigned long flags __maybe_unused;
19662306a36Sopenharmony_ci	u32 i;
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	if (WARN_ON_ONCE(!fbc))
19962306a36Sopenharmony_ci		return;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci	if (!fbc[0].counters)
20262306a36Sopenharmony_ci		return;
20362306a36Sopenharmony_ci
20462306a36Sopenharmony_ci	for (i = 0; i < nr_counters; i++)
20562306a36Sopenharmony_ci		debug_percpu_counter_deactivate(&fbc[i]);
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU
20862306a36Sopenharmony_ci	spin_lock_irqsave(&percpu_counters_lock, flags);
20962306a36Sopenharmony_ci	for (i = 0; i < nr_counters; i++)
21062306a36Sopenharmony_ci		list_del(&fbc[i].list);
21162306a36Sopenharmony_ci	spin_unlock_irqrestore(&percpu_counters_lock, flags);
21262306a36Sopenharmony_ci#endif
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	free_percpu(fbc[0].counters);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	for (i = 0; i < nr_counters; i++)
21762306a36Sopenharmony_ci		fbc[i].counters = NULL;
21862306a36Sopenharmony_ci}
21962306a36Sopenharmony_ciEXPORT_SYMBOL(percpu_counter_destroy_many);
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ciint percpu_counter_batch __read_mostly = 32;
22262306a36Sopenharmony_ciEXPORT_SYMBOL(percpu_counter_batch);
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_cistatic int compute_batch_value(unsigned int cpu)
22562306a36Sopenharmony_ci{
22662306a36Sopenharmony_ci	int nr = num_online_cpus();
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ci	percpu_counter_batch = max(32, nr*2);
22962306a36Sopenharmony_ci	return 0;
23062306a36Sopenharmony_ci}
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_cistatic int percpu_counter_cpu_dead(unsigned int cpu)
23362306a36Sopenharmony_ci{
23462306a36Sopenharmony_ci#ifdef CONFIG_HOTPLUG_CPU
23562306a36Sopenharmony_ci	struct percpu_counter *fbc;
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_ci	compute_batch_value(cpu);
23862306a36Sopenharmony_ci
23962306a36Sopenharmony_ci	spin_lock_irq(&percpu_counters_lock);
24062306a36Sopenharmony_ci	list_for_each_entry(fbc, &percpu_counters, list) {
24162306a36Sopenharmony_ci		s32 *pcount;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci		raw_spin_lock(&fbc->lock);
24462306a36Sopenharmony_ci		pcount = per_cpu_ptr(fbc->counters, cpu);
24562306a36Sopenharmony_ci		fbc->count += *pcount;
24662306a36Sopenharmony_ci		*pcount = 0;
24762306a36Sopenharmony_ci		raw_spin_unlock(&fbc->lock);
24862306a36Sopenharmony_ci	}
24962306a36Sopenharmony_ci	spin_unlock_irq(&percpu_counters_lock);
25062306a36Sopenharmony_ci#endif
25162306a36Sopenharmony_ci	return 0;
25262306a36Sopenharmony_ci}
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_ci/*
25562306a36Sopenharmony_ci * Compare counter against given value.
25662306a36Sopenharmony_ci * Return 1 if greater, 0 if equal and -1 if less
25762306a36Sopenharmony_ci */
25862306a36Sopenharmony_ciint __percpu_counter_compare(struct percpu_counter *fbc, s64 rhs, s32 batch)
25962306a36Sopenharmony_ci{
26062306a36Sopenharmony_ci	s64	count;
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_ci	count = percpu_counter_read(fbc);
26362306a36Sopenharmony_ci	/* Check to see if rough count will be sufficient for comparison */
26462306a36Sopenharmony_ci	if (abs(count - rhs) > (batch * num_online_cpus())) {
26562306a36Sopenharmony_ci		if (count > rhs)
26662306a36Sopenharmony_ci			return 1;
26762306a36Sopenharmony_ci		else
26862306a36Sopenharmony_ci			return -1;
26962306a36Sopenharmony_ci	}
27062306a36Sopenharmony_ci	/* Need to use precise count */
27162306a36Sopenharmony_ci	count = percpu_counter_sum(fbc);
27262306a36Sopenharmony_ci	if (count > rhs)
27362306a36Sopenharmony_ci		return 1;
27462306a36Sopenharmony_ci	else if (count < rhs)
27562306a36Sopenharmony_ci		return -1;
27662306a36Sopenharmony_ci	else
27762306a36Sopenharmony_ci		return 0;
27862306a36Sopenharmony_ci}
27962306a36Sopenharmony_ciEXPORT_SYMBOL(__percpu_counter_compare);
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_cistatic int __init percpu_counter_startup(void)
28262306a36Sopenharmony_ci{
28362306a36Sopenharmony_ci	int ret;
28462306a36Sopenharmony_ci
28562306a36Sopenharmony_ci	ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "lib/percpu_cnt:online",
28662306a36Sopenharmony_ci				compute_batch_value, NULL);
28762306a36Sopenharmony_ci	WARN_ON(ret < 0);
28862306a36Sopenharmony_ci	ret = cpuhp_setup_state_nocalls(CPUHP_PERCPU_CNT_DEAD,
28962306a36Sopenharmony_ci					"lib/percpu_cnt:dead", NULL,
29062306a36Sopenharmony_ci					percpu_counter_cpu_dead);
29162306a36Sopenharmony_ci	WARN_ON(ret < 0);
29262306a36Sopenharmony_ci	return 0;
29362306a36Sopenharmony_ci}
29462306a36Sopenharmony_cimodule_init(percpu_counter_startup);
295