162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * cpu_rmap.c: CPU affinity reverse-map support
462306a36Sopenharmony_ci * Copyright 2011 Solarflare Communications Inc.
562306a36Sopenharmony_ci */
662306a36Sopenharmony_ci
762306a36Sopenharmony_ci#include <linux/cpu_rmap.h>
862306a36Sopenharmony_ci#include <linux/interrupt.h>
962306a36Sopenharmony_ci#include <linux/export.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci/*
1262306a36Sopenharmony_ci * These functions maintain a mapping from CPUs to some ordered set of
1362306a36Sopenharmony_ci * objects with CPU affinities.  This can be seen as a reverse-map of
1462306a36Sopenharmony_ci * CPU affinity.  However, we do not assume that the object affinities
1562306a36Sopenharmony_ci * cover all CPUs in the system.  For those CPUs not directly covered
1662306a36Sopenharmony_ci * by object affinities, we attempt to find a nearest object based on
1762306a36Sopenharmony_ci * CPU topology.
1862306a36Sopenharmony_ci */
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_ci/**
2162306a36Sopenharmony_ci * alloc_cpu_rmap - allocate CPU affinity reverse-map
2262306a36Sopenharmony_ci * @size: Number of objects to be mapped
2362306a36Sopenharmony_ci * @flags: Allocation flags e.g. %GFP_KERNEL
2462306a36Sopenharmony_ci */
2562306a36Sopenharmony_cistruct cpu_rmap *alloc_cpu_rmap(unsigned int size, gfp_t flags)
2662306a36Sopenharmony_ci{
2762306a36Sopenharmony_ci	struct cpu_rmap *rmap;
2862306a36Sopenharmony_ci	unsigned int cpu;
2962306a36Sopenharmony_ci	size_t obj_offset;
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	/* This is a silly number of objects, and we use u16 indices. */
3262306a36Sopenharmony_ci	if (size > 0xffff)
3362306a36Sopenharmony_ci		return NULL;
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci	/* Offset of object pointer array from base structure */
3662306a36Sopenharmony_ci	obj_offset = ALIGN(offsetof(struct cpu_rmap, near[nr_cpu_ids]),
3762306a36Sopenharmony_ci			   sizeof(void *));
3862306a36Sopenharmony_ci
3962306a36Sopenharmony_ci	rmap = kzalloc(obj_offset + size * sizeof(rmap->obj[0]), flags);
4062306a36Sopenharmony_ci	if (!rmap)
4162306a36Sopenharmony_ci		return NULL;
4262306a36Sopenharmony_ci
4362306a36Sopenharmony_ci	kref_init(&rmap->refcount);
4462306a36Sopenharmony_ci	rmap->obj = (void **)((char *)rmap + obj_offset);
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_ci	/* Initially assign CPUs to objects on a rota, since we have
4762306a36Sopenharmony_ci	 * no idea where the objects are.  Use infinite distance, so
4862306a36Sopenharmony_ci	 * any object with known distance is preferable.  Include the
4962306a36Sopenharmony_ci	 * CPUs that are not present/online, since we definitely want
5062306a36Sopenharmony_ci	 * any newly-hotplugged CPUs to have some object assigned.
5162306a36Sopenharmony_ci	 */
5262306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
5362306a36Sopenharmony_ci		rmap->near[cpu].index = cpu % size;
5462306a36Sopenharmony_ci		rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
5562306a36Sopenharmony_ci	}
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	rmap->size = size;
5862306a36Sopenharmony_ci	return rmap;
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ciEXPORT_SYMBOL(alloc_cpu_rmap);
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/**
6362306a36Sopenharmony_ci * cpu_rmap_release - internal reclaiming helper called from kref_put
6462306a36Sopenharmony_ci * @ref: kref to struct cpu_rmap
6562306a36Sopenharmony_ci */
6662306a36Sopenharmony_cistatic void cpu_rmap_release(struct kref *ref)
6762306a36Sopenharmony_ci{
6862306a36Sopenharmony_ci	struct cpu_rmap *rmap = container_of(ref, struct cpu_rmap, refcount);
6962306a36Sopenharmony_ci	kfree(rmap);
7062306a36Sopenharmony_ci}
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci/**
7362306a36Sopenharmony_ci * cpu_rmap_get - internal helper to get new ref on a cpu_rmap
7462306a36Sopenharmony_ci * @rmap: reverse-map allocated with alloc_cpu_rmap()
7562306a36Sopenharmony_ci */
7662306a36Sopenharmony_cistatic inline void cpu_rmap_get(struct cpu_rmap *rmap)
7762306a36Sopenharmony_ci{
7862306a36Sopenharmony_ci	kref_get(&rmap->refcount);
7962306a36Sopenharmony_ci}
8062306a36Sopenharmony_ci
8162306a36Sopenharmony_ci/**
8262306a36Sopenharmony_ci * cpu_rmap_put - release ref on a cpu_rmap
8362306a36Sopenharmony_ci * @rmap: reverse-map allocated with alloc_cpu_rmap()
8462306a36Sopenharmony_ci */
8562306a36Sopenharmony_ciint cpu_rmap_put(struct cpu_rmap *rmap)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	return kref_put(&rmap->refcount, cpu_rmap_release);
8862306a36Sopenharmony_ci}
8962306a36Sopenharmony_ciEXPORT_SYMBOL(cpu_rmap_put);
9062306a36Sopenharmony_ci
9162306a36Sopenharmony_ci/* Reevaluate nearest object for given CPU, comparing with the given
9262306a36Sopenharmony_ci * neighbours at the given distance.
9362306a36Sopenharmony_ci */
9462306a36Sopenharmony_cistatic bool cpu_rmap_copy_neigh(struct cpu_rmap *rmap, unsigned int cpu,
9562306a36Sopenharmony_ci				const struct cpumask *mask, u16 dist)
9662306a36Sopenharmony_ci{
9762306a36Sopenharmony_ci	int neigh;
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	for_each_cpu(neigh, mask) {
10062306a36Sopenharmony_ci		if (rmap->near[cpu].dist > dist &&
10162306a36Sopenharmony_ci		    rmap->near[neigh].dist <= dist) {
10262306a36Sopenharmony_ci			rmap->near[cpu].index = rmap->near[neigh].index;
10362306a36Sopenharmony_ci			rmap->near[cpu].dist = dist;
10462306a36Sopenharmony_ci			return true;
10562306a36Sopenharmony_ci		}
10662306a36Sopenharmony_ci	}
10762306a36Sopenharmony_ci	return false;
10862306a36Sopenharmony_ci}
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci#ifdef DEBUG
11162306a36Sopenharmony_cistatic void debug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
11262306a36Sopenharmony_ci{
11362306a36Sopenharmony_ci	unsigned index;
11462306a36Sopenharmony_ci	unsigned int cpu;
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ci	pr_info("cpu_rmap %p, %s:\n", rmap, prefix);
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	for_each_possible_cpu(cpu) {
11962306a36Sopenharmony_ci		index = rmap->near[cpu].index;
12062306a36Sopenharmony_ci		pr_info("cpu %d -> obj %u (distance %u)\n",
12162306a36Sopenharmony_ci			cpu, index, rmap->near[cpu].dist);
12262306a36Sopenharmony_ci	}
12362306a36Sopenharmony_ci}
12462306a36Sopenharmony_ci#else
12562306a36Sopenharmony_cistatic inline void
12662306a36Sopenharmony_cidebug_print_rmap(const struct cpu_rmap *rmap, const char *prefix)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci}
12962306a36Sopenharmony_ci#endif
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_cistatic int get_free_index(struct cpu_rmap *rmap)
13262306a36Sopenharmony_ci{
13362306a36Sopenharmony_ci	int i;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	for (i = 0; i < rmap->size; i++)
13662306a36Sopenharmony_ci		if (!rmap->obj[i])
13762306a36Sopenharmony_ci			return i;
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ci	return -ENOSPC;
14062306a36Sopenharmony_ci}
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_ci/**
14362306a36Sopenharmony_ci * cpu_rmap_add - add object to a rmap
14462306a36Sopenharmony_ci * @rmap: CPU rmap allocated with alloc_cpu_rmap()
14562306a36Sopenharmony_ci * @obj: Object to add to rmap
14662306a36Sopenharmony_ci *
14762306a36Sopenharmony_ci * Return index of object or -ENOSPC if no free entry was found
14862306a36Sopenharmony_ci */
14962306a36Sopenharmony_ciint cpu_rmap_add(struct cpu_rmap *rmap, void *obj)
15062306a36Sopenharmony_ci{
15162306a36Sopenharmony_ci	int index = get_free_index(rmap);
15262306a36Sopenharmony_ci
15362306a36Sopenharmony_ci	if (index < 0)
15462306a36Sopenharmony_ci		return index;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	rmap->obj[index] = obj;
15762306a36Sopenharmony_ci	return index;
15862306a36Sopenharmony_ci}
15962306a36Sopenharmony_ciEXPORT_SYMBOL(cpu_rmap_add);
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci/**
16262306a36Sopenharmony_ci * cpu_rmap_update - update CPU rmap following a change of object affinity
16362306a36Sopenharmony_ci * @rmap: CPU rmap to update
16462306a36Sopenharmony_ci * @index: Index of object whose affinity changed
16562306a36Sopenharmony_ci * @affinity: New CPU affinity of object
16662306a36Sopenharmony_ci */
16762306a36Sopenharmony_ciint cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
16862306a36Sopenharmony_ci		    const struct cpumask *affinity)
16962306a36Sopenharmony_ci{
17062306a36Sopenharmony_ci	cpumask_var_t update_mask;
17162306a36Sopenharmony_ci	unsigned int cpu;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (unlikely(!zalloc_cpumask_var(&update_mask, GFP_KERNEL)))
17462306a36Sopenharmony_ci		return -ENOMEM;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	/* Invalidate distance for all CPUs for which this used to be
17762306a36Sopenharmony_ci	 * the nearest object.  Mark those CPUs for update.
17862306a36Sopenharmony_ci	 */
17962306a36Sopenharmony_ci	for_each_online_cpu(cpu) {
18062306a36Sopenharmony_ci		if (rmap->near[cpu].index == index) {
18162306a36Sopenharmony_ci			rmap->near[cpu].dist = CPU_RMAP_DIST_INF;
18262306a36Sopenharmony_ci			cpumask_set_cpu(cpu, update_mask);
18362306a36Sopenharmony_ci		}
18462306a36Sopenharmony_ci	}
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	debug_print_rmap(rmap, "after invalidating old distances");
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	/* Set distance to 0 for all CPUs in the new affinity mask.
18962306a36Sopenharmony_ci	 * Mark all CPUs within their NUMA nodes for update.
19062306a36Sopenharmony_ci	 */
19162306a36Sopenharmony_ci	for_each_cpu(cpu, affinity) {
19262306a36Sopenharmony_ci		rmap->near[cpu].index = index;
19362306a36Sopenharmony_ci		rmap->near[cpu].dist = 0;
19462306a36Sopenharmony_ci		cpumask_or(update_mask, update_mask,
19562306a36Sopenharmony_ci			   cpumask_of_node(cpu_to_node(cpu)));
19662306a36Sopenharmony_ci	}
19762306a36Sopenharmony_ci
19862306a36Sopenharmony_ci	debug_print_rmap(rmap, "after updating neighbours");
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci	/* Update distances based on topology */
20162306a36Sopenharmony_ci	for_each_cpu(cpu, update_mask) {
20262306a36Sopenharmony_ci		if (cpu_rmap_copy_neigh(rmap, cpu,
20362306a36Sopenharmony_ci					topology_sibling_cpumask(cpu), 1))
20462306a36Sopenharmony_ci			continue;
20562306a36Sopenharmony_ci		if (cpu_rmap_copy_neigh(rmap, cpu,
20662306a36Sopenharmony_ci					topology_core_cpumask(cpu), 2))
20762306a36Sopenharmony_ci			continue;
20862306a36Sopenharmony_ci		if (cpu_rmap_copy_neigh(rmap, cpu,
20962306a36Sopenharmony_ci					cpumask_of_node(cpu_to_node(cpu)), 3))
21062306a36Sopenharmony_ci			continue;
21162306a36Sopenharmony_ci		/* We could continue into NUMA node distances, but for now
21262306a36Sopenharmony_ci		 * we give up.
21362306a36Sopenharmony_ci		 */
21462306a36Sopenharmony_ci	}
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_ci	debug_print_rmap(rmap, "after copying neighbours");
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	free_cpumask_var(update_mask);
21962306a36Sopenharmony_ci	return 0;
22062306a36Sopenharmony_ci}
22162306a36Sopenharmony_ciEXPORT_SYMBOL(cpu_rmap_update);
22262306a36Sopenharmony_ci
22362306a36Sopenharmony_ci/* Glue between IRQ affinity notifiers and CPU rmaps */
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_cistruct irq_glue {
22662306a36Sopenharmony_ci	struct irq_affinity_notify notify;
22762306a36Sopenharmony_ci	struct cpu_rmap *rmap;
22862306a36Sopenharmony_ci	u16 index;
22962306a36Sopenharmony_ci};
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci/**
23262306a36Sopenharmony_ci * free_irq_cpu_rmap - free a CPU affinity reverse-map used for IRQs
23362306a36Sopenharmony_ci * @rmap: Reverse-map allocated with alloc_irq_cpu_map(), or %NULL
23462306a36Sopenharmony_ci *
23562306a36Sopenharmony_ci * Must be called in process context, before freeing the IRQs.
23662306a36Sopenharmony_ci */
23762306a36Sopenharmony_civoid free_irq_cpu_rmap(struct cpu_rmap *rmap)
23862306a36Sopenharmony_ci{
23962306a36Sopenharmony_ci	struct irq_glue *glue;
24062306a36Sopenharmony_ci	u16 index;
24162306a36Sopenharmony_ci
24262306a36Sopenharmony_ci	if (!rmap)
24362306a36Sopenharmony_ci		return;
24462306a36Sopenharmony_ci
24562306a36Sopenharmony_ci	for (index = 0; index < rmap->size; index++) {
24662306a36Sopenharmony_ci		glue = rmap->obj[index];
24762306a36Sopenharmony_ci		if (glue)
24862306a36Sopenharmony_ci			irq_set_affinity_notifier(glue->notify.irq, NULL);
24962306a36Sopenharmony_ci	}
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	cpu_rmap_put(rmap);
25262306a36Sopenharmony_ci}
25362306a36Sopenharmony_ciEXPORT_SYMBOL(free_irq_cpu_rmap);
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci/**
25662306a36Sopenharmony_ci * irq_cpu_rmap_notify - callback for IRQ subsystem when IRQ affinity updated
25762306a36Sopenharmony_ci * @notify: struct irq_affinity_notify passed by irq/manage.c
25862306a36Sopenharmony_ci * @mask: cpu mask for new SMP affinity
25962306a36Sopenharmony_ci *
26062306a36Sopenharmony_ci * This is executed in workqueue context.
26162306a36Sopenharmony_ci */
26262306a36Sopenharmony_cistatic void
26362306a36Sopenharmony_ciirq_cpu_rmap_notify(struct irq_affinity_notify *notify, const cpumask_t *mask)
26462306a36Sopenharmony_ci{
26562306a36Sopenharmony_ci	struct irq_glue *glue =
26662306a36Sopenharmony_ci		container_of(notify, struct irq_glue, notify);
26762306a36Sopenharmony_ci	int rc;
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	rc = cpu_rmap_update(glue->rmap, glue->index, mask);
27062306a36Sopenharmony_ci	if (rc)
27162306a36Sopenharmony_ci		pr_warn("irq_cpu_rmap_notify: update failed: %d\n", rc);
27262306a36Sopenharmony_ci}
27362306a36Sopenharmony_ci
27462306a36Sopenharmony_ci/**
27562306a36Sopenharmony_ci * irq_cpu_rmap_release - reclaiming callback for IRQ subsystem
27662306a36Sopenharmony_ci * @ref: kref to struct irq_affinity_notify passed by irq/manage.c
27762306a36Sopenharmony_ci */
27862306a36Sopenharmony_cistatic void irq_cpu_rmap_release(struct kref *ref)
27962306a36Sopenharmony_ci{
28062306a36Sopenharmony_ci	struct irq_glue *glue =
28162306a36Sopenharmony_ci		container_of(ref, struct irq_glue, notify.kref);
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	glue->rmap->obj[glue->index] = NULL;
28462306a36Sopenharmony_ci	cpu_rmap_put(glue->rmap);
28562306a36Sopenharmony_ci	kfree(glue);
28662306a36Sopenharmony_ci}
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_ci/**
28962306a36Sopenharmony_ci * irq_cpu_rmap_remove - remove an IRQ from a CPU affinity reverse-map
29062306a36Sopenharmony_ci * @rmap: The reverse-map
29162306a36Sopenharmony_ci * @irq: The IRQ number
29262306a36Sopenharmony_ci */
29362306a36Sopenharmony_ciint irq_cpu_rmap_remove(struct cpu_rmap *rmap, int irq)
29462306a36Sopenharmony_ci{
29562306a36Sopenharmony_ci	return irq_set_affinity_notifier(irq, NULL);
29662306a36Sopenharmony_ci}
29762306a36Sopenharmony_ciEXPORT_SYMBOL(irq_cpu_rmap_remove);
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci/**
30062306a36Sopenharmony_ci * irq_cpu_rmap_add - add an IRQ to a CPU affinity reverse-map
30162306a36Sopenharmony_ci * @rmap: The reverse-map
30262306a36Sopenharmony_ci * @irq: The IRQ number
30362306a36Sopenharmony_ci *
30462306a36Sopenharmony_ci * This adds an IRQ affinity notifier that will update the reverse-map
30562306a36Sopenharmony_ci * automatically.
30662306a36Sopenharmony_ci *
30762306a36Sopenharmony_ci * Must be called in process context, after the IRQ is allocated but
30862306a36Sopenharmony_ci * before it is bound with request_irq().
30962306a36Sopenharmony_ci */
31062306a36Sopenharmony_ciint irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
31162306a36Sopenharmony_ci{
31262306a36Sopenharmony_ci	struct irq_glue *glue = kzalloc(sizeof(*glue), GFP_KERNEL);
31362306a36Sopenharmony_ci	int rc;
31462306a36Sopenharmony_ci
31562306a36Sopenharmony_ci	if (!glue)
31662306a36Sopenharmony_ci		return -ENOMEM;
31762306a36Sopenharmony_ci	glue->notify.notify = irq_cpu_rmap_notify;
31862306a36Sopenharmony_ci	glue->notify.release = irq_cpu_rmap_release;
31962306a36Sopenharmony_ci	glue->rmap = rmap;
32062306a36Sopenharmony_ci	cpu_rmap_get(rmap);
32162306a36Sopenharmony_ci	rc = cpu_rmap_add(rmap, glue);
32262306a36Sopenharmony_ci	if (rc < 0)
32362306a36Sopenharmony_ci		goto err_add;
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci	glue->index = rc;
32662306a36Sopenharmony_ci	rc = irq_set_affinity_notifier(irq, &glue->notify);
32762306a36Sopenharmony_ci	if (rc)
32862306a36Sopenharmony_ci		goto err_set;
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci	return rc;
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_cierr_set:
33362306a36Sopenharmony_ci	rmap->obj[glue->index] = NULL;
33462306a36Sopenharmony_cierr_add:
33562306a36Sopenharmony_ci	cpu_rmap_put(glue->rmap);
33662306a36Sopenharmony_ci	kfree(glue);
33762306a36Sopenharmony_ci	return rc;
33862306a36Sopenharmony_ci}
33962306a36Sopenharmony_ciEXPORT_SYMBOL(irq_cpu_rmap_add);
340