162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright(c) 2015 - 2020 Intel Corporation.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/topology.h>
762306a36Sopenharmony_ci#include <linux/cpumask.h>
862306a36Sopenharmony_ci#include <linux/interrupt.h>
962306a36Sopenharmony_ci#include <linux/numa.h>
1062306a36Sopenharmony_ci
1162306a36Sopenharmony_ci#include "hfi.h"
1262306a36Sopenharmony_ci#include "affinity.h"
1362306a36Sopenharmony_ci#include "sdma.h"
1462306a36Sopenharmony_ci#include "trace.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_cistruct hfi1_affinity_node_list node_affinity = {
1762306a36Sopenharmony_ci	.list = LIST_HEAD_INIT(node_affinity.list),
1862306a36Sopenharmony_ci	.lock = __MUTEX_INITIALIZER(node_affinity.lock)
1962306a36Sopenharmony_ci};
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ci/* Name of IRQ types, indexed by enum irq_type */
2262306a36Sopenharmony_cistatic const char * const irq_type_names[] = {
2362306a36Sopenharmony_ci	"SDMA",
2462306a36Sopenharmony_ci	"RCVCTXT",
2562306a36Sopenharmony_ci	"NETDEVCTXT",
2662306a36Sopenharmony_ci	"GENERAL",
2762306a36Sopenharmony_ci	"OTHER",
2862306a36Sopenharmony_ci};
2962306a36Sopenharmony_ci
3062306a36Sopenharmony_ci/* Per NUMA node count of HFI devices */
3162306a36Sopenharmony_cistatic unsigned int *hfi1_per_node_cntr;
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic inline void init_cpu_mask_set(struct cpu_mask_set *set)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	cpumask_clear(&set->mask);
3662306a36Sopenharmony_ci	cpumask_clear(&set->used);
3762306a36Sopenharmony_ci	set->gen = 0;
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci/* Increment generation of CPU set if needed */
4162306a36Sopenharmony_cistatic void _cpu_mask_set_gen_inc(struct cpu_mask_set *set)
4262306a36Sopenharmony_ci{
4362306a36Sopenharmony_ci	if (cpumask_equal(&set->mask, &set->used)) {
4462306a36Sopenharmony_ci		/*
4562306a36Sopenharmony_ci		 * We've used up all the CPUs, bump up the generation
4662306a36Sopenharmony_ci		 * and reset the 'used' map
4762306a36Sopenharmony_ci		 */
4862306a36Sopenharmony_ci		set->gen++;
4962306a36Sopenharmony_ci		cpumask_clear(&set->used);
5062306a36Sopenharmony_ci	}
5162306a36Sopenharmony_ci}
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistatic void _cpu_mask_set_gen_dec(struct cpu_mask_set *set)
5462306a36Sopenharmony_ci{
5562306a36Sopenharmony_ci	if (cpumask_empty(&set->used) && set->gen) {
5662306a36Sopenharmony_ci		set->gen--;
5762306a36Sopenharmony_ci		cpumask_copy(&set->used, &set->mask);
5862306a36Sopenharmony_ci	}
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/* Get the first CPU from the list of unused CPUs in a CPU set data structure */
6262306a36Sopenharmony_cistatic int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff)
6362306a36Sopenharmony_ci{
6462306a36Sopenharmony_ci	int cpu;
6562306a36Sopenharmony_ci
6662306a36Sopenharmony_ci	if (!diff || !set)
6762306a36Sopenharmony_ci		return -EINVAL;
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_ci	_cpu_mask_set_gen_inc(set);
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	/* Find out CPUs left in CPU mask */
7262306a36Sopenharmony_ci	cpumask_andnot(diff, &set->mask, &set->used);
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	cpu = cpumask_first(diff);
7562306a36Sopenharmony_ci	if (cpu >= nr_cpu_ids) /* empty */
7662306a36Sopenharmony_ci		cpu = -EINVAL;
7762306a36Sopenharmony_ci	else
7862306a36Sopenharmony_ci		cpumask_set_cpu(cpu, &set->used);
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	return cpu;
8162306a36Sopenharmony_ci}
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_cistatic void cpu_mask_set_put(struct cpu_mask_set *set, int cpu)
8462306a36Sopenharmony_ci{
8562306a36Sopenharmony_ci	if (!set)
8662306a36Sopenharmony_ci		return;
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_ci	cpumask_clear_cpu(cpu, &set->used);
8962306a36Sopenharmony_ci	_cpu_mask_set_gen_dec(set);
9062306a36Sopenharmony_ci}
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci/* Initialize non-HT cpu cores mask */
9362306a36Sopenharmony_civoid init_real_cpu_mask(void)
9462306a36Sopenharmony_ci{
9562306a36Sopenharmony_ci	int possible, curr_cpu, i, ht;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	cpumask_clear(&node_affinity.real_cpu_mask);
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	/* Start with cpu online mask as the real cpu mask */
10062306a36Sopenharmony_ci	cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
10162306a36Sopenharmony_ci
10262306a36Sopenharmony_ci	/*
10362306a36Sopenharmony_ci	 * Remove HT cores from the real cpu mask.  Do this in two steps below.
10462306a36Sopenharmony_ci	 */
10562306a36Sopenharmony_ci	possible = cpumask_weight(&node_affinity.real_cpu_mask);
10662306a36Sopenharmony_ci	ht = cpumask_weight(topology_sibling_cpumask(
10762306a36Sopenharmony_ci				cpumask_first(&node_affinity.real_cpu_mask)));
10862306a36Sopenharmony_ci	/*
10962306a36Sopenharmony_ci	 * Step 1.  Skip over the first N HT siblings and use them as the
11062306a36Sopenharmony_ci	 * "real" cores.  Assumes that HT cores are not enumerated in
11162306a36Sopenharmony_ci	 * succession (except in the single core case).
11262306a36Sopenharmony_ci	 */
11362306a36Sopenharmony_ci	curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
11462306a36Sopenharmony_ci	for (i = 0; i < possible / ht; i++)
11562306a36Sopenharmony_ci		curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
11662306a36Sopenharmony_ci	/*
11762306a36Sopenharmony_ci	 * Step 2.  Remove the remaining HT siblings.  Use cpumask_next() to
11862306a36Sopenharmony_ci	 * skip any gaps.
11962306a36Sopenharmony_ci	 */
12062306a36Sopenharmony_ci	for (; i < possible; i++) {
12162306a36Sopenharmony_ci		cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
12262306a36Sopenharmony_ci		curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
12362306a36Sopenharmony_ci	}
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_ciint node_affinity_init(void)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	int node;
12962306a36Sopenharmony_ci	struct pci_dev *dev = NULL;
13062306a36Sopenharmony_ci	const struct pci_device_id *ids = hfi1_pci_tbl;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	cpumask_clear(&node_affinity.proc.used);
13362306a36Sopenharmony_ci	cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	node_affinity.proc.gen = 0;
13662306a36Sopenharmony_ci	node_affinity.num_core_siblings =
13762306a36Sopenharmony_ci				cpumask_weight(topology_sibling_cpumask(
13862306a36Sopenharmony_ci					cpumask_first(&node_affinity.proc.mask)
13962306a36Sopenharmony_ci					));
14062306a36Sopenharmony_ci	node_affinity.num_possible_nodes = num_possible_nodes();
14162306a36Sopenharmony_ci	node_affinity.num_online_nodes = num_online_nodes();
14262306a36Sopenharmony_ci	node_affinity.num_online_cpus = num_online_cpus();
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	/*
14562306a36Sopenharmony_ci	 * The real cpu mask is part of the affinity struct but it has to be
14662306a36Sopenharmony_ci	 * initialized early. It is needed to calculate the number of user
14762306a36Sopenharmony_ci	 * contexts in set_up_context_variables().
14862306a36Sopenharmony_ci	 */
14962306a36Sopenharmony_ci	init_real_cpu_mask();
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci	hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes,
15262306a36Sopenharmony_ci				     sizeof(*hfi1_per_node_cntr), GFP_KERNEL);
15362306a36Sopenharmony_ci	if (!hfi1_per_node_cntr)
15462306a36Sopenharmony_ci		return -ENOMEM;
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci	while (ids->vendor) {
15762306a36Sopenharmony_ci		dev = NULL;
15862306a36Sopenharmony_ci		while ((dev = pci_get_device(ids->vendor, ids->device, dev))) {
15962306a36Sopenharmony_ci			node = pcibus_to_node(dev->bus);
16062306a36Sopenharmony_ci			if (node < 0)
16162306a36Sopenharmony_ci				goto out;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci			hfi1_per_node_cntr[node]++;
16462306a36Sopenharmony_ci		}
16562306a36Sopenharmony_ci		ids++;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	return 0;
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ciout:
17162306a36Sopenharmony_ci	/*
17262306a36Sopenharmony_ci	 * Invalid PCI NUMA node information found, note it, and populate
17362306a36Sopenharmony_ci	 * our database 1:1.
17462306a36Sopenharmony_ci	 */
17562306a36Sopenharmony_ci	pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n");
17662306a36Sopenharmony_ci	pr_err("HFI: System BIOS may need to be upgraded\n");
17762306a36Sopenharmony_ci	for (node = 0; node < node_affinity.num_possible_nodes; node++)
17862306a36Sopenharmony_ci		hfi1_per_node_cntr[node] = 1;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	pci_dev_put(dev);
18162306a36Sopenharmony_ci
18262306a36Sopenharmony_ci	return 0;
18362306a36Sopenharmony_ci}
18462306a36Sopenharmony_ci
18562306a36Sopenharmony_cistatic void node_affinity_destroy(struct hfi1_affinity_node *entry)
18662306a36Sopenharmony_ci{
18762306a36Sopenharmony_ci	free_percpu(entry->comp_vect_affinity);
18862306a36Sopenharmony_ci	kfree(entry);
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_civoid node_affinity_destroy_all(void)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	struct list_head *pos, *q;
19462306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	mutex_lock(&node_affinity.lock);
19762306a36Sopenharmony_ci	list_for_each_safe(pos, q, &node_affinity.list) {
19862306a36Sopenharmony_ci		entry = list_entry(pos, struct hfi1_affinity_node,
19962306a36Sopenharmony_ci				   list);
20062306a36Sopenharmony_ci		list_del(pos);
20162306a36Sopenharmony_ci		node_affinity_destroy(entry);
20262306a36Sopenharmony_ci	}
20362306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
20462306a36Sopenharmony_ci	kfree(hfi1_per_node_cntr);
20562306a36Sopenharmony_ci}
20662306a36Sopenharmony_ci
20762306a36Sopenharmony_cistatic struct hfi1_affinity_node *node_affinity_allocate(int node)
20862306a36Sopenharmony_ci{
20962306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
21262306a36Sopenharmony_ci	if (!entry)
21362306a36Sopenharmony_ci		return NULL;
21462306a36Sopenharmony_ci	entry->node = node;
21562306a36Sopenharmony_ci	entry->comp_vect_affinity = alloc_percpu(u16);
21662306a36Sopenharmony_ci	INIT_LIST_HEAD(&entry->list);
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	return entry;
21962306a36Sopenharmony_ci}
22062306a36Sopenharmony_ci
22162306a36Sopenharmony_ci/*
22262306a36Sopenharmony_ci * It appends an entry to the list.
22362306a36Sopenharmony_ci * It *must* be called with node_affinity.lock held.
22462306a36Sopenharmony_ci */
22562306a36Sopenharmony_cistatic void node_affinity_add_tail(struct hfi1_affinity_node *entry)
22662306a36Sopenharmony_ci{
22762306a36Sopenharmony_ci	list_add_tail(&entry->list, &node_affinity.list);
22862306a36Sopenharmony_ci}
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci/* It must be called with node_affinity.lock held */
23162306a36Sopenharmony_cistatic struct hfi1_affinity_node *node_affinity_lookup(int node)
23262306a36Sopenharmony_ci{
23362306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_ci	list_for_each_entry(entry, &node_affinity.list, list) {
23662306a36Sopenharmony_ci		if (entry->node == node)
23762306a36Sopenharmony_ci			return entry;
23862306a36Sopenharmony_ci	}
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	return NULL;
24162306a36Sopenharmony_ci}
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic int per_cpu_affinity_get(cpumask_var_t possible_cpumask,
24462306a36Sopenharmony_ci				u16 __percpu *comp_vect_affinity)
24562306a36Sopenharmony_ci{
24662306a36Sopenharmony_ci	int curr_cpu;
24762306a36Sopenharmony_ci	u16 cntr;
24862306a36Sopenharmony_ci	u16 prev_cntr;
24962306a36Sopenharmony_ci	int ret_cpu;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	if (!possible_cpumask) {
25262306a36Sopenharmony_ci		ret_cpu = -EINVAL;
25362306a36Sopenharmony_ci		goto fail;
25462306a36Sopenharmony_ci	}
25562306a36Sopenharmony_ci
25662306a36Sopenharmony_ci	if (!comp_vect_affinity) {
25762306a36Sopenharmony_ci		ret_cpu = -EINVAL;
25862306a36Sopenharmony_ci		goto fail;
25962306a36Sopenharmony_ci	}
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	ret_cpu = cpumask_first(possible_cpumask);
26262306a36Sopenharmony_ci	if (ret_cpu >= nr_cpu_ids) {
26362306a36Sopenharmony_ci		ret_cpu = -EINVAL;
26462306a36Sopenharmony_ci		goto fail;
26562306a36Sopenharmony_ci	}
26662306a36Sopenharmony_ci
26762306a36Sopenharmony_ci	prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu);
26862306a36Sopenharmony_ci	for_each_cpu(curr_cpu, possible_cpumask) {
26962306a36Sopenharmony_ci		cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci		if (cntr < prev_cntr) {
27262306a36Sopenharmony_ci			ret_cpu = curr_cpu;
27362306a36Sopenharmony_ci			prev_cntr = cntr;
27462306a36Sopenharmony_ci		}
27562306a36Sopenharmony_ci	}
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci	*per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1;
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_cifail:
28062306a36Sopenharmony_ci	return ret_cpu;
28162306a36Sopenharmony_ci}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_cistatic int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask,
28462306a36Sopenharmony_ci				    u16 __percpu *comp_vect_affinity)
28562306a36Sopenharmony_ci{
28662306a36Sopenharmony_ci	int curr_cpu;
28762306a36Sopenharmony_ci	int max_cpu;
28862306a36Sopenharmony_ci	u16 cntr;
28962306a36Sopenharmony_ci	u16 prev_cntr;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	if (!possible_cpumask)
29262306a36Sopenharmony_ci		return -EINVAL;
29362306a36Sopenharmony_ci
29462306a36Sopenharmony_ci	if (!comp_vect_affinity)
29562306a36Sopenharmony_ci		return -EINVAL;
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	max_cpu = cpumask_first(possible_cpumask);
29862306a36Sopenharmony_ci	if (max_cpu >= nr_cpu_ids)
29962306a36Sopenharmony_ci		return -EINVAL;
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu);
30262306a36Sopenharmony_ci	for_each_cpu(curr_cpu, possible_cpumask) {
30362306a36Sopenharmony_ci		cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu);
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_ci		if (cntr > prev_cntr) {
30662306a36Sopenharmony_ci			max_cpu = curr_cpu;
30762306a36Sopenharmony_ci			prev_cntr = cntr;
30862306a36Sopenharmony_ci		}
30962306a36Sopenharmony_ci	}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_ci	*per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1;
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	return max_cpu;
31462306a36Sopenharmony_ci}
31562306a36Sopenharmony_ci
31662306a36Sopenharmony_ci/*
31762306a36Sopenharmony_ci * Non-interrupt CPUs are used first, then interrupt CPUs.
31862306a36Sopenharmony_ci * Two already allocated cpu masks must be passed.
31962306a36Sopenharmony_ci */
32062306a36Sopenharmony_cistatic int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd,
32162306a36Sopenharmony_ci				  struct hfi1_affinity_node *entry,
32262306a36Sopenharmony_ci				  cpumask_var_t non_intr_cpus,
32362306a36Sopenharmony_ci				  cpumask_var_t available_cpus)
32462306a36Sopenharmony_ci	__must_hold(&node_affinity.lock)
32562306a36Sopenharmony_ci{
32662306a36Sopenharmony_ci	int cpu;
32762306a36Sopenharmony_ci	struct cpu_mask_set *set = dd->comp_vect;
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	lockdep_assert_held(&node_affinity.lock);
33062306a36Sopenharmony_ci	if (!non_intr_cpus) {
33162306a36Sopenharmony_ci		cpu = -1;
33262306a36Sopenharmony_ci		goto fail;
33362306a36Sopenharmony_ci	}
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci	if (!available_cpus) {
33662306a36Sopenharmony_ci		cpu = -1;
33762306a36Sopenharmony_ci		goto fail;
33862306a36Sopenharmony_ci	}
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	/* Available CPUs for pinning completion vectors */
34162306a36Sopenharmony_ci	_cpu_mask_set_gen_inc(set);
34262306a36Sopenharmony_ci	cpumask_andnot(available_cpus, &set->mask, &set->used);
34362306a36Sopenharmony_ci
34462306a36Sopenharmony_ci	/* Available CPUs without SDMA engine interrupts */
34562306a36Sopenharmony_ci	cpumask_andnot(non_intr_cpus, available_cpus,
34662306a36Sopenharmony_ci		       &entry->def_intr.used);
34762306a36Sopenharmony_ci
34862306a36Sopenharmony_ci	/* If there are non-interrupt CPUs available, use them first */
34962306a36Sopenharmony_ci	if (!cpumask_empty(non_intr_cpus))
35062306a36Sopenharmony_ci		cpu = cpumask_first(non_intr_cpus);
35162306a36Sopenharmony_ci	else /* Otherwise, use interrupt CPUs */
35262306a36Sopenharmony_ci		cpu = cpumask_first(available_cpus);
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	if (cpu >= nr_cpu_ids) { /* empty */
35562306a36Sopenharmony_ci		cpu = -1;
35662306a36Sopenharmony_ci		goto fail;
35762306a36Sopenharmony_ci	}
35862306a36Sopenharmony_ci	cpumask_set_cpu(cpu, &set->used);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_cifail:
36162306a36Sopenharmony_ci	return cpu;
36262306a36Sopenharmony_ci}
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_cistatic void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu)
36562306a36Sopenharmony_ci{
36662306a36Sopenharmony_ci	struct cpu_mask_set *set = dd->comp_vect;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	if (cpu < 0)
36962306a36Sopenharmony_ci		return;
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	cpu_mask_set_put(set, cpu);
37262306a36Sopenharmony_ci}
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci/* _dev_comp_vect_mappings_destroy() is reentrant */
37562306a36Sopenharmony_cistatic void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd)
37662306a36Sopenharmony_ci{
37762306a36Sopenharmony_ci	int i, cpu;
37862306a36Sopenharmony_ci
37962306a36Sopenharmony_ci	if (!dd->comp_vect_mappings)
38062306a36Sopenharmony_ci		return;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
38362306a36Sopenharmony_ci		cpu = dd->comp_vect_mappings[i];
38462306a36Sopenharmony_ci		_dev_comp_vect_cpu_put(dd, cpu);
38562306a36Sopenharmony_ci		dd->comp_vect_mappings[i] = -1;
38662306a36Sopenharmony_ci		hfi1_cdbg(AFFINITY,
38762306a36Sopenharmony_ci			  "[%s] Release CPU %d from completion vector %d",
38862306a36Sopenharmony_ci			  rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i);
38962306a36Sopenharmony_ci	}
39062306a36Sopenharmony_ci
39162306a36Sopenharmony_ci	kfree(dd->comp_vect_mappings);
39262306a36Sopenharmony_ci	dd->comp_vect_mappings = NULL;
39362306a36Sopenharmony_ci}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci/*
39662306a36Sopenharmony_ci * This function creates the table for looking up CPUs for completion vectors.
39762306a36Sopenharmony_ci * num_comp_vectors needs to have been initilized before calling this function.
39862306a36Sopenharmony_ci */
39962306a36Sopenharmony_cistatic int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd,
40062306a36Sopenharmony_ci					  struct hfi1_affinity_node *entry)
40162306a36Sopenharmony_ci	__must_hold(&node_affinity.lock)
40262306a36Sopenharmony_ci{
40362306a36Sopenharmony_ci	int i, cpu, ret;
40462306a36Sopenharmony_ci	cpumask_var_t non_intr_cpus;
40562306a36Sopenharmony_ci	cpumask_var_t available_cpus;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	lockdep_assert_held(&node_affinity.lock);
40862306a36Sopenharmony_ci
40962306a36Sopenharmony_ci	if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL))
41062306a36Sopenharmony_ci		return -ENOMEM;
41162306a36Sopenharmony_ci
41262306a36Sopenharmony_ci	if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) {
41362306a36Sopenharmony_ci		free_cpumask_var(non_intr_cpus);
41462306a36Sopenharmony_ci		return -ENOMEM;
41562306a36Sopenharmony_ci	}
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci	dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus,
41862306a36Sopenharmony_ci					 sizeof(*dd->comp_vect_mappings),
41962306a36Sopenharmony_ci					 GFP_KERNEL);
42062306a36Sopenharmony_ci	if (!dd->comp_vect_mappings) {
42162306a36Sopenharmony_ci		ret = -ENOMEM;
42262306a36Sopenharmony_ci		goto fail;
42362306a36Sopenharmony_ci	}
42462306a36Sopenharmony_ci	for (i = 0; i < dd->comp_vect_possible_cpus; i++)
42562306a36Sopenharmony_ci		dd->comp_vect_mappings[i] = -1;
42662306a36Sopenharmony_ci
42762306a36Sopenharmony_ci	for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
42862306a36Sopenharmony_ci		cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus,
42962306a36Sopenharmony_ci					     available_cpus);
43062306a36Sopenharmony_ci		if (cpu < 0) {
43162306a36Sopenharmony_ci			ret = -EINVAL;
43262306a36Sopenharmony_ci			goto fail;
43362306a36Sopenharmony_ci		}
43462306a36Sopenharmony_ci
43562306a36Sopenharmony_ci		dd->comp_vect_mappings[i] = cpu;
43662306a36Sopenharmony_ci		hfi1_cdbg(AFFINITY,
43762306a36Sopenharmony_ci			  "[%s] Completion Vector %d -> CPU %d",
43862306a36Sopenharmony_ci			  rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu);
43962306a36Sopenharmony_ci	}
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	free_cpumask_var(available_cpus);
44262306a36Sopenharmony_ci	free_cpumask_var(non_intr_cpus);
44362306a36Sopenharmony_ci	return 0;
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_cifail:
44662306a36Sopenharmony_ci	free_cpumask_var(available_cpus);
44762306a36Sopenharmony_ci	free_cpumask_var(non_intr_cpus);
44862306a36Sopenharmony_ci	_dev_comp_vect_mappings_destroy(dd);
44962306a36Sopenharmony_ci
45062306a36Sopenharmony_ci	return ret;
45162306a36Sopenharmony_ci}
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ciint hfi1_comp_vectors_set_up(struct hfi1_devdata *dd)
45462306a36Sopenharmony_ci{
45562306a36Sopenharmony_ci	int ret;
45662306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
45762306a36Sopenharmony_ci
45862306a36Sopenharmony_ci	mutex_lock(&node_affinity.lock);
45962306a36Sopenharmony_ci	entry = node_affinity_lookup(dd->node);
46062306a36Sopenharmony_ci	if (!entry) {
46162306a36Sopenharmony_ci		ret = -EINVAL;
46262306a36Sopenharmony_ci		goto unlock;
46362306a36Sopenharmony_ci	}
46462306a36Sopenharmony_ci	ret = _dev_comp_vect_mappings_create(dd, entry);
46562306a36Sopenharmony_ciunlock:
46662306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	return ret;
46962306a36Sopenharmony_ci}
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_civoid hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd)
47262306a36Sopenharmony_ci{
47362306a36Sopenharmony_ci	_dev_comp_vect_mappings_destroy(dd);
47462306a36Sopenharmony_ci}
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ciint hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect)
47762306a36Sopenharmony_ci{
47862306a36Sopenharmony_ci	struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi);
47962306a36Sopenharmony_ci	struct hfi1_devdata *dd = dd_from_dev(verbs_dev);
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	if (!dd->comp_vect_mappings)
48262306a36Sopenharmony_ci		return -EINVAL;
48362306a36Sopenharmony_ci	if (comp_vect >= dd->comp_vect_possible_cpus)
48462306a36Sopenharmony_ci		return -EINVAL;
48562306a36Sopenharmony_ci
48662306a36Sopenharmony_ci	return dd->comp_vect_mappings[comp_vect];
48762306a36Sopenharmony_ci}
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci/*
49062306a36Sopenharmony_ci * It assumes dd->comp_vect_possible_cpus is available.
49162306a36Sopenharmony_ci */
49262306a36Sopenharmony_cistatic int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd,
49362306a36Sopenharmony_ci					struct hfi1_affinity_node *entry,
49462306a36Sopenharmony_ci					bool first_dev_init)
49562306a36Sopenharmony_ci	__must_hold(&node_affinity.lock)
49662306a36Sopenharmony_ci{
49762306a36Sopenharmony_ci	int i, j, curr_cpu;
49862306a36Sopenharmony_ci	int possible_cpus_comp_vect = 0;
49962306a36Sopenharmony_ci	struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask;
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	lockdep_assert_held(&node_affinity.lock);
50262306a36Sopenharmony_ci	/*
50362306a36Sopenharmony_ci	 * If there's only one CPU available for completion vectors, then
50462306a36Sopenharmony_ci	 * there will only be one completion vector available. Othewise,
50562306a36Sopenharmony_ci	 * the number of completion vector available will be the number of
50662306a36Sopenharmony_ci	 * available CPUs divide it by the number of devices in the
50762306a36Sopenharmony_ci	 * local NUMA node.
50862306a36Sopenharmony_ci	 */
50962306a36Sopenharmony_ci	if (cpumask_weight(&entry->comp_vect_mask) == 1) {
51062306a36Sopenharmony_ci		possible_cpus_comp_vect = 1;
51162306a36Sopenharmony_ci		dd_dev_warn(dd,
51262306a36Sopenharmony_ci			    "Number of kernel receive queues is too large for completion vector affinity to be effective\n");
51362306a36Sopenharmony_ci	} else {
51462306a36Sopenharmony_ci		possible_cpus_comp_vect +=
51562306a36Sopenharmony_ci			cpumask_weight(&entry->comp_vect_mask) /
51662306a36Sopenharmony_ci				       hfi1_per_node_cntr[dd->node];
51762306a36Sopenharmony_ci
51862306a36Sopenharmony_ci		/*
51962306a36Sopenharmony_ci		 * If the completion vector CPUs available doesn't divide
52062306a36Sopenharmony_ci		 * evenly among devices, then the first device device to be
52162306a36Sopenharmony_ci		 * initialized gets an extra CPU.
52262306a36Sopenharmony_ci		 */
52362306a36Sopenharmony_ci		if (first_dev_init &&
52462306a36Sopenharmony_ci		    cpumask_weight(&entry->comp_vect_mask) %
52562306a36Sopenharmony_ci		    hfi1_per_node_cntr[dd->node] != 0)
52662306a36Sopenharmony_ci			possible_cpus_comp_vect++;
52762306a36Sopenharmony_ci	}
52862306a36Sopenharmony_ci
52962306a36Sopenharmony_ci	dd->comp_vect_possible_cpus = possible_cpus_comp_vect;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	/* Reserving CPUs for device completion vector */
53262306a36Sopenharmony_ci	for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
53362306a36Sopenharmony_ci		curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask,
53462306a36Sopenharmony_ci						entry->comp_vect_affinity);
53562306a36Sopenharmony_ci		if (curr_cpu < 0)
53662306a36Sopenharmony_ci			goto fail;
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci		cpumask_set_cpu(curr_cpu, dev_comp_vect_mask);
53962306a36Sopenharmony_ci	}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_ci	hfi1_cdbg(AFFINITY,
54262306a36Sopenharmony_ci		  "[%s] Completion vector affinity CPU set(s) %*pbl",
54362306a36Sopenharmony_ci		  rvt_get_ibdev_name(&(dd)->verbs_dev.rdi),
54462306a36Sopenharmony_ci		  cpumask_pr_args(dev_comp_vect_mask));
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci	return 0;
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_cifail:
54962306a36Sopenharmony_ci	for (j = 0; j < i; j++)
55062306a36Sopenharmony_ci		per_cpu_affinity_put_max(&entry->comp_vect_mask,
55162306a36Sopenharmony_ci					 entry->comp_vect_affinity);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	return curr_cpu;
55462306a36Sopenharmony_ci}
55562306a36Sopenharmony_ci
55662306a36Sopenharmony_ci/*
55762306a36Sopenharmony_ci * It assumes dd->comp_vect_possible_cpus is available.
55862306a36Sopenharmony_ci */
55962306a36Sopenharmony_cistatic void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
56062306a36Sopenharmony_ci					     struct hfi1_affinity_node *entry)
56162306a36Sopenharmony_ci	__must_hold(&node_affinity.lock)
56262306a36Sopenharmony_ci{
56362306a36Sopenharmony_ci	int i, cpu;
56462306a36Sopenharmony_ci
56562306a36Sopenharmony_ci	lockdep_assert_held(&node_affinity.lock);
56662306a36Sopenharmony_ci	if (!dd->comp_vect_possible_cpus)
56762306a36Sopenharmony_ci		return;
56862306a36Sopenharmony_ci
56962306a36Sopenharmony_ci	for (i = 0; i < dd->comp_vect_possible_cpus; i++) {
57062306a36Sopenharmony_ci		cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask,
57162306a36Sopenharmony_ci					       entry->comp_vect_affinity);
57262306a36Sopenharmony_ci		/* Clearing CPU in device completion vector cpu mask */
57362306a36Sopenharmony_ci		if (cpu >= 0)
57462306a36Sopenharmony_ci			cpumask_clear_cpu(cpu, &dd->comp_vect->mask);
57562306a36Sopenharmony_ci	}
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci	dd->comp_vect_possible_cpus = 0;
57862306a36Sopenharmony_ci}
57962306a36Sopenharmony_ci
58062306a36Sopenharmony_ci/*
58162306a36Sopenharmony_ci * Interrupt affinity.
58262306a36Sopenharmony_ci *
58362306a36Sopenharmony_ci * non-rcv avail gets a default mask that
58462306a36Sopenharmony_ci * starts as possible cpus with threads reset
58562306a36Sopenharmony_ci * and each rcv avail reset.
58662306a36Sopenharmony_ci *
58762306a36Sopenharmony_ci * rcv avail gets node relative 1 wrapping back
58862306a36Sopenharmony_ci * to the node relative 1 as necessary.
58962306a36Sopenharmony_ci *
59062306a36Sopenharmony_ci */
59162306a36Sopenharmony_ciint hfi1_dev_affinity_init(struct hfi1_devdata *dd)
59262306a36Sopenharmony_ci{
59362306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
59462306a36Sopenharmony_ci	const struct cpumask *local_mask;
59562306a36Sopenharmony_ci	int curr_cpu, possible, i, ret;
59662306a36Sopenharmony_ci	bool new_entry = false;
59762306a36Sopenharmony_ci
59862306a36Sopenharmony_ci	local_mask = cpumask_of_node(dd->node);
59962306a36Sopenharmony_ci	if (cpumask_first(local_mask) >= nr_cpu_ids)
60062306a36Sopenharmony_ci		local_mask = topology_core_cpumask(0);
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	mutex_lock(&node_affinity.lock);
60362306a36Sopenharmony_ci	entry = node_affinity_lookup(dd->node);
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci	/*
60662306a36Sopenharmony_ci	 * If this is the first time this NUMA node's affinity is used,
60762306a36Sopenharmony_ci	 * create an entry in the global affinity structure and initialize it.
60862306a36Sopenharmony_ci	 */
60962306a36Sopenharmony_ci	if (!entry) {
61062306a36Sopenharmony_ci		entry = node_affinity_allocate(dd->node);
61162306a36Sopenharmony_ci		if (!entry) {
61262306a36Sopenharmony_ci			dd_dev_err(dd,
61362306a36Sopenharmony_ci				   "Unable to allocate global affinity node\n");
61462306a36Sopenharmony_ci			ret = -ENOMEM;
61562306a36Sopenharmony_ci			goto fail;
61662306a36Sopenharmony_ci		}
61762306a36Sopenharmony_ci		new_entry = true;
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci		init_cpu_mask_set(&entry->def_intr);
62062306a36Sopenharmony_ci		init_cpu_mask_set(&entry->rcv_intr);
62162306a36Sopenharmony_ci		cpumask_clear(&entry->comp_vect_mask);
62262306a36Sopenharmony_ci		cpumask_clear(&entry->general_intr_mask);
62362306a36Sopenharmony_ci		/* Use the "real" cpu mask of this node as the default */
62462306a36Sopenharmony_ci		cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
62562306a36Sopenharmony_ci			    local_mask);
62662306a36Sopenharmony_ci
62762306a36Sopenharmony_ci		/* fill in the receive list */
62862306a36Sopenharmony_ci		possible = cpumask_weight(&entry->def_intr.mask);
62962306a36Sopenharmony_ci		curr_cpu = cpumask_first(&entry->def_intr.mask);
63062306a36Sopenharmony_ci
63162306a36Sopenharmony_ci		if (possible == 1) {
63262306a36Sopenharmony_ci			/* only one CPU, everyone will use it */
63362306a36Sopenharmony_ci			cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
63462306a36Sopenharmony_ci			cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
63562306a36Sopenharmony_ci		} else {
63662306a36Sopenharmony_ci			/*
63762306a36Sopenharmony_ci			 * The general/control context will be the first CPU in
63862306a36Sopenharmony_ci			 * the default list, so it is removed from the default
63962306a36Sopenharmony_ci			 * list and added to the general interrupt list.
64062306a36Sopenharmony_ci			 */
64162306a36Sopenharmony_ci			cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask);
64262306a36Sopenharmony_ci			cpumask_set_cpu(curr_cpu, &entry->general_intr_mask);
64362306a36Sopenharmony_ci			curr_cpu = cpumask_next(curr_cpu,
64462306a36Sopenharmony_ci						&entry->def_intr.mask);
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci			/*
64762306a36Sopenharmony_ci			 * Remove the remaining kernel receive queues from
64862306a36Sopenharmony_ci			 * the default list and add them to the receive list.
64962306a36Sopenharmony_ci			 */
65062306a36Sopenharmony_ci			for (i = 0;
65162306a36Sopenharmony_ci			     i < (dd->n_krcv_queues - 1) *
65262306a36Sopenharmony_ci				  hfi1_per_node_cntr[dd->node];
65362306a36Sopenharmony_ci			     i++) {
65462306a36Sopenharmony_ci				cpumask_clear_cpu(curr_cpu,
65562306a36Sopenharmony_ci						  &entry->def_intr.mask);
65662306a36Sopenharmony_ci				cpumask_set_cpu(curr_cpu,
65762306a36Sopenharmony_ci						&entry->rcv_intr.mask);
65862306a36Sopenharmony_ci				curr_cpu = cpumask_next(curr_cpu,
65962306a36Sopenharmony_ci							&entry->def_intr.mask);
66062306a36Sopenharmony_ci				if (curr_cpu >= nr_cpu_ids)
66162306a36Sopenharmony_ci					break;
66262306a36Sopenharmony_ci			}
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci			/*
66562306a36Sopenharmony_ci			 * If there ends up being 0 CPU cores leftover for SDMA
66662306a36Sopenharmony_ci			 * engines, use the same CPU cores as general/control
66762306a36Sopenharmony_ci			 * context.
66862306a36Sopenharmony_ci			 */
66962306a36Sopenharmony_ci			if (cpumask_empty(&entry->def_intr.mask))
67062306a36Sopenharmony_ci				cpumask_copy(&entry->def_intr.mask,
67162306a36Sopenharmony_ci					     &entry->general_intr_mask);
67262306a36Sopenharmony_ci		}
67362306a36Sopenharmony_ci
67462306a36Sopenharmony_ci		/* Determine completion vector CPUs for the entire node */
67562306a36Sopenharmony_ci		cpumask_and(&entry->comp_vect_mask,
67662306a36Sopenharmony_ci			    &node_affinity.real_cpu_mask, local_mask);
67762306a36Sopenharmony_ci		cpumask_andnot(&entry->comp_vect_mask,
67862306a36Sopenharmony_ci			       &entry->comp_vect_mask,
67962306a36Sopenharmony_ci			       &entry->rcv_intr.mask);
68062306a36Sopenharmony_ci		cpumask_andnot(&entry->comp_vect_mask,
68162306a36Sopenharmony_ci			       &entry->comp_vect_mask,
68262306a36Sopenharmony_ci			       &entry->general_intr_mask);
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci		/*
68562306a36Sopenharmony_ci		 * If there ends up being 0 CPU cores leftover for completion
68662306a36Sopenharmony_ci		 * vectors, use the same CPU core as the general/control
68762306a36Sopenharmony_ci		 * context.
68862306a36Sopenharmony_ci		 */
68962306a36Sopenharmony_ci		if (cpumask_empty(&entry->comp_vect_mask))
69062306a36Sopenharmony_ci			cpumask_copy(&entry->comp_vect_mask,
69162306a36Sopenharmony_ci				     &entry->general_intr_mask);
69262306a36Sopenharmony_ci	}
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry);
69562306a36Sopenharmony_ci	if (ret < 0)
69662306a36Sopenharmony_ci		goto fail;
69762306a36Sopenharmony_ci
69862306a36Sopenharmony_ci	if (new_entry)
69962306a36Sopenharmony_ci		node_affinity_add_tail(entry);
70062306a36Sopenharmony_ci
70162306a36Sopenharmony_ci	dd->affinity_entry = entry;
70262306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
70362306a36Sopenharmony_ci
70462306a36Sopenharmony_ci	return 0;
70562306a36Sopenharmony_ci
70662306a36Sopenharmony_cifail:
70762306a36Sopenharmony_ci	if (new_entry)
70862306a36Sopenharmony_ci		node_affinity_destroy(entry);
70962306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
71062306a36Sopenharmony_ci	return ret;
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_civoid hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
71462306a36Sopenharmony_ci{
71562306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
71662306a36Sopenharmony_ci
71762306a36Sopenharmony_ci	mutex_lock(&node_affinity.lock);
71862306a36Sopenharmony_ci	if (!dd->affinity_entry)
71962306a36Sopenharmony_ci		goto unlock;
72062306a36Sopenharmony_ci	entry = node_affinity_lookup(dd->node);
72162306a36Sopenharmony_ci	if (!entry)
72262306a36Sopenharmony_ci		goto unlock;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	/*
72562306a36Sopenharmony_ci	 * Free device completion vector CPUs to be used by future
72662306a36Sopenharmony_ci	 * completion vectors
72762306a36Sopenharmony_ci	 */
72862306a36Sopenharmony_ci	_dev_comp_vect_cpu_mask_clean_up(dd, entry);
72962306a36Sopenharmony_ciunlock:
73062306a36Sopenharmony_ci	dd->affinity_entry = NULL;
73162306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
73262306a36Sopenharmony_ci}
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci/*
73562306a36Sopenharmony_ci * Function updates the irq affinity hint for msix after it has been changed
73662306a36Sopenharmony_ci * by the user using the /proc/irq interface. This function only accepts
73762306a36Sopenharmony_ci * one cpu in the mask.
73862306a36Sopenharmony_ci */
73962306a36Sopenharmony_cistatic void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu)
74062306a36Sopenharmony_ci{
74162306a36Sopenharmony_ci	struct sdma_engine *sde = msix->arg;
74262306a36Sopenharmony_ci	struct hfi1_devdata *dd = sde->dd;
74362306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
74462306a36Sopenharmony_ci	struct cpu_mask_set *set;
74562306a36Sopenharmony_ci	int i, old_cpu;
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_ci	if (cpu > num_online_cpus() || cpu == sde->cpu)
74862306a36Sopenharmony_ci		return;
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ci	mutex_lock(&node_affinity.lock);
75162306a36Sopenharmony_ci	entry = node_affinity_lookup(dd->node);
75262306a36Sopenharmony_ci	if (!entry)
75362306a36Sopenharmony_ci		goto unlock;
75462306a36Sopenharmony_ci
75562306a36Sopenharmony_ci	old_cpu = sde->cpu;
75662306a36Sopenharmony_ci	sde->cpu = cpu;
75762306a36Sopenharmony_ci	cpumask_clear(&msix->mask);
75862306a36Sopenharmony_ci	cpumask_set_cpu(cpu, &msix->mask);
75962306a36Sopenharmony_ci	dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n",
76062306a36Sopenharmony_ci		   msix->irq, irq_type_names[msix->type],
76162306a36Sopenharmony_ci		   sde->this_idx, cpu);
76262306a36Sopenharmony_ci	irq_set_affinity_hint(msix->irq, &msix->mask);
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	/*
76562306a36Sopenharmony_ci	 * Set the new cpu in the hfi1_affinity_node and clean
76662306a36Sopenharmony_ci	 * the old cpu if it is not used by any other IRQ
76762306a36Sopenharmony_ci	 */
76862306a36Sopenharmony_ci	set = &entry->def_intr;
76962306a36Sopenharmony_ci	cpumask_set_cpu(cpu, &set->mask);
77062306a36Sopenharmony_ci	cpumask_set_cpu(cpu, &set->used);
77162306a36Sopenharmony_ci	for (i = 0; i < dd->msix_info.max_requested; i++) {
77262306a36Sopenharmony_ci		struct hfi1_msix_entry *other_msix;
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci		other_msix = &dd->msix_info.msix_entries[i];
77562306a36Sopenharmony_ci		if (other_msix->type != IRQ_SDMA || other_msix == msix)
77662306a36Sopenharmony_ci			continue;
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci		if (cpumask_test_cpu(old_cpu, &other_msix->mask))
77962306a36Sopenharmony_ci			goto unlock;
78062306a36Sopenharmony_ci	}
78162306a36Sopenharmony_ci	cpumask_clear_cpu(old_cpu, &set->mask);
78262306a36Sopenharmony_ci	cpumask_clear_cpu(old_cpu, &set->used);
78362306a36Sopenharmony_ciunlock:
78462306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
78562306a36Sopenharmony_ci}
78662306a36Sopenharmony_ci
78762306a36Sopenharmony_cistatic void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify,
78862306a36Sopenharmony_ci				     const cpumask_t *mask)
78962306a36Sopenharmony_ci{
79062306a36Sopenharmony_ci	int cpu = cpumask_first(mask);
79162306a36Sopenharmony_ci	struct hfi1_msix_entry *msix = container_of(notify,
79262306a36Sopenharmony_ci						    struct hfi1_msix_entry,
79362306a36Sopenharmony_ci						    notify);
79462306a36Sopenharmony_ci
79562306a36Sopenharmony_ci	/* Only one CPU configuration supported currently */
79662306a36Sopenharmony_ci	hfi1_update_sdma_affinity(msix, cpu);
79762306a36Sopenharmony_ci}
79862306a36Sopenharmony_ci
79962306a36Sopenharmony_cistatic void hfi1_irq_notifier_release(struct kref *ref)
80062306a36Sopenharmony_ci{
80162306a36Sopenharmony_ci	/*
80262306a36Sopenharmony_ci	 * This is required by affinity notifier. We don't have anything to
80362306a36Sopenharmony_ci	 * free here.
80462306a36Sopenharmony_ci	 */
80562306a36Sopenharmony_ci}
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_cistatic void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix)
80862306a36Sopenharmony_ci{
80962306a36Sopenharmony_ci	struct irq_affinity_notify *notify = &msix->notify;
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	notify->irq = msix->irq;
81262306a36Sopenharmony_ci	notify->notify = hfi1_irq_notifier_notify;
81362306a36Sopenharmony_ci	notify->release = hfi1_irq_notifier_release;
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	if (irq_set_affinity_notifier(notify->irq, notify))
81662306a36Sopenharmony_ci		pr_err("Failed to register sdma irq affinity notifier for irq %d\n",
81762306a36Sopenharmony_ci		       notify->irq);
81862306a36Sopenharmony_ci}
81962306a36Sopenharmony_ci
82062306a36Sopenharmony_cistatic void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix)
82162306a36Sopenharmony_ci{
82262306a36Sopenharmony_ci	struct irq_affinity_notify *notify = &msix->notify;
82362306a36Sopenharmony_ci
82462306a36Sopenharmony_ci	if (irq_set_affinity_notifier(notify->irq, NULL))
82562306a36Sopenharmony_ci		pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n",
82662306a36Sopenharmony_ci		       notify->irq);
82762306a36Sopenharmony_ci}
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci/*
83062306a36Sopenharmony_ci * Function sets the irq affinity for msix.
83162306a36Sopenharmony_ci * It *must* be called with node_affinity.lock held.
83262306a36Sopenharmony_ci */
83362306a36Sopenharmony_cistatic int get_irq_affinity(struct hfi1_devdata *dd,
83462306a36Sopenharmony_ci			    struct hfi1_msix_entry *msix)
83562306a36Sopenharmony_ci{
83662306a36Sopenharmony_ci	cpumask_var_t diff;
83762306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
83862306a36Sopenharmony_ci	struct cpu_mask_set *set = NULL;
83962306a36Sopenharmony_ci	struct sdma_engine *sde = NULL;
84062306a36Sopenharmony_ci	struct hfi1_ctxtdata *rcd = NULL;
84162306a36Sopenharmony_ci	char extra[64];
84262306a36Sopenharmony_ci	int cpu = -1;
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci	extra[0] = '\0';
84562306a36Sopenharmony_ci	cpumask_clear(&msix->mask);
84662306a36Sopenharmony_ci
84762306a36Sopenharmony_ci	entry = node_affinity_lookup(dd->node);
84862306a36Sopenharmony_ci
84962306a36Sopenharmony_ci	switch (msix->type) {
85062306a36Sopenharmony_ci	case IRQ_SDMA:
85162306a36Sopenharmony_ci		sde = (struct sdma_engine *)msix->arg;
85262306a36Sopenharmony_ci		scnprintf(extra, 64, "engine %u", sde->this_idx);
85362306a36Sopenharmony_ci		set = &entry->def_intr;
85462306a36Sopenharmony_ci		break;
85562306a36Sopenharmony_ci	case IRQ_GENERAL:
85662306a36Sopenharmony_ci		cpu = cpumask_first(&entry->general_intr_mask);
85762306a36Sopenharmony_ci		break;
85862306a36Sopenharmony_ci	case IRQ_RCVCTXT:
85962306a36Sopenharmony_ci		rcd = (struct hfi1_ctxtdata *)msix->arg;
86062306a36Sopenharmony_ci		if (rcd->ctxt == HFI1_CTRL_CTXT)
86162306a36Sopenharmony_ci			cpu = cpumask_first(&entry->general_intr_mask);
86262306a36Sopenharmony_ci		else
86362306a36Sopenharmony_ci			set = &entry->rcv_intr;
86462306a36Sopenharmony_ci		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
86562306a36Sopenharmony_ci		break;
86662306a36Sopenharmony_ci	case IRQ_NETDEVCTXT:
86762306a36Sopenharmony_ci		rcd = (struct hfi1_ctxtdata *)msix->arg;
86862306a36Sopenharmony_ci		set = &entry->def_intr;
86962306a36Sopenharmony_ci		scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
87062306a36Sopenharmony_ci		break;
87162306a36Sopenharmony_ci	default:
87262306a36Sopenharmony_ci		dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type);
87362306a36Sopenharmony_ci		return -EINVAL;
87462306a36Sopenharmony_ci	}
87562306a36Sopenharmony_ci
87662306a36Sopenharmony_ci	/*
87762306a36Sopenharmony_ci	 * The general and control contexts are placed on a particular
87862306a36Sopenharmony_ci	 * CPU, which is set above. Skip accounting for it. Everything else
87962306a36Sopenharmony_ci	 * finds its CPU here.
88062306a36Sopenharmony_ci	 */
88162306a36Sopenharmony_ci	if (cpu == -1 && set) {
88262306a36Sopenharmony_ci		if (!zalloc_cpumask_var(&diff, GFP_KERNEL))
88362306a36Sopenharmony_ci			return -ENOMEM;
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci		cpu = cpu_mask_set_get_first(set, diff);
88662306a36Sopenharmony_ci		if (cpu < 0) {
88762306a36Sopenharmony_ci			free_cpumask_var(diff);
88862306a36Sopenharmony_ci			dd_dev_err(dd, "Failure to obtain CPU for IRQ\n");
88962306a36Sopenharmony_ci			return cpu;
89062306a36Sopenharmony_ci		}
89162306a36Sopenharmony_ci
89262306a36Sopenharmony_ci		free_cpumask_var(diff);
89362306a36Sopenharmony_ci	}
89462306a36Sopenharmony_ci
89562306a36Sopenharmony_ci	cpumask_set_cpu(cpu, &msix->mask);
89662306a36Sopenharmony_ci	dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n",
89762306a36Sopenharmony_ci		    msix->irq, irq_type_names[msix->type],
89862306a36Sopenharmony_ci		    extra, cpu);
89962306a36Sopenharmony_ci	irq_set_affinity_hint(msix->irq, &msix->mask);
90062306a36Sopenharmony_ci
90162306a36Sopenharmony_ci	if (msix->type == IRQ_SDMA) {
90262306a36Sopenharmony_ci		sde->cpu = cpu;
90362306a36Sopenharmony_ci		hfi1_setup_sdma_notifier(msix);
90462306a36Sopenharmony_ci	}
90562306a36Sopenharmony_ci
90662306a36Sopenharmony_ci	return 0;
90762306a36Sopenharmony_ci}
90862306a36Sopenharmony_ci
90962306a36Sopenharmony_ciint hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
91062306a36Sopenharmony_ci{
91162306a36Sopenharmony_ci	int ret;
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_ci	mutex_lock(&node_affinity.lock);
91462306a36Sopenharmony_ci	ret = get_irq_affinity(dd, msix);
91562306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
91662306a36Sopenharmony_ci	return ret;
91762306a36Sopenharmony_ci}
91862306a36Sopenharmony_ci
91962306a36Sopenharmony_civoid hfi1_put_irq_affinity(struct hfi1_devdata *dd,
92062306a36Sopenharmony_ci			   struct hfi1_msix_entry *msix)
92162306a36Sopenharmony_ci{
92262306a36Sopenharmony_ci	struct cpu_mask_set *set = NULL;
92362306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
92462306a36Sopenharmony_ci
92562306a36Sopenharmony_ci	mutex_lock(&node_affinity.lock);
92662306a36Sopenharmony_ci	entry = node_affinity_lookup(dd->node);
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	switch (msix->type) {
92962306a36Sopenharmony_ci	case IRQ_SDMA:
93062306a36Sopenharmony_ci		set = &entry->def_intr;
93162306a36Sopenharmony_ci		hfi1_cleanup_sdma_notifier(msix);
93262306a36Sopenharmony_ci		break;
93362306a36Sopenharmony_ci	case IRQ_GENERAL:
93462306a36Sopenharmony_ci		/* Don't do accounting for general contexts */
93562306a36Sopenharmony_ci		break;
93662306a36Sopenharmony_ci	case IRQ_RCVCTXT: {
93762306a36Sopenharmony_ci		struct hfi1_ctxtdata *rcd = msix->arg;
93862306a36Sopenharmony_ci
93962306a36Sopenharmony_ci		/* Don't do accounting for control contexts */
94062306a36Sopenharmony_ci		if (rcd->ctxt != HFI1_CTRL_CTXT)
94162306a36Sopenharmony_ci			set = &entry->rcv_intr;
94262306a36Sopenharmony_ci		break;
94362306a36Sopenharmony_ci	}
94462306a36Sopenharmony_ci	case IRQ_NETDEVCTXT:
94562306a36Sopenharmony_ci		set = &entry->def_intr;
94662306a36Sopenharmony_ci		break;
94762306a36Sopenharmony_ci	default:
94862306a36Sopenharmony_ci		mutex_unlock(&node_affinity.lock);
94962306a36Sopenharmony_ci		return;
95062306a36Sopenharmony_ci	}
95162306a36Sopenharmony_ci
95262306a36Sopenharmony_ci	if (set) {
95362306a36Sopenharmony_ci		cpumask_andnot(&set->used, &set->used, &msix->mask);
95462306a36Sopenharmony_ci		_cpu_mask_set_gen_dec(set);
95562306a36Sopenharmony_ci	}
95662306a36Sopenharmony_ci
95762306a36Sopenharmony_ci	irq_set_affinity_hint(msix->irq, NULL);
95862306a36Sopenharmony_ci	cpumask_clear(&msix->mask);
95962306a36Sopenharmony_ci	mutex_unlock(&node_affinity.lock);
96062306a36Sopenharmony_ci}
96162306a36Sopenharmony_ci
96262306a36Sopenharmony_ci/* This should be called with node_affinity.lock held */
96362306a36Sopenharmony_cistatic void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask,
96462306a36Sopenharmony_ci				struct hfi1_affinity_node_list *affinity)
96562306a36Sopenharmony_ci{
96662306a36Sopenharmony_ci	int possible, curr_cpu, i;
96762306a36Sopenharmony_ci	uint num_cores_per_socket = node_affinity.num_online_cpus /
96862306a36Sopenharmony_ci					affinity->num_core_siblings /
96962306a36Sopenharmony_ci						node_affinity.num_online_nodes;
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	cpumask_copy(hw_thread_mask, &affinity->proc.mask);
97262306a36Sopenharmony_ci	if (affinity->num_core_siblings > 0) {
97362306a36Sopenharmony_ci		/* Removing other siblings not needed for now */
97462306a36Sopenharmony_ci		possible = cpumask_weight(hw_thread_mask);
97562306a36Sopenharmony_ci		curr_cpu = cpumask_first(hw_thread_mask);
97662306a36Sopenharmony_ci		for (i = 0;
97762306a36Sopenharmony_ci		     i < num_cores_per_socket * node_affinity.num_online_nodes;
97862306a36Sopenharmony_ci		     i++)
97962306a36Sopenharmony_ci			curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
98062306a36Sopenharmony_ci
98162306a36Sopenharmony_ci		for (; i < possible; i++) {
98262306a36Sopenharmony_ci			cpumask_clear_cpu(curr_cpu, hw_thread_mask);
98362306a36Sopenharmony_ci			curr_cpu = cpumask_next(curr_cpu, hw_thread_mask);
98462306a36Sopenharmony_ci		}
98562306a36Sopenharmony_ci
98662306a36Sopenharmony_ci		/* Identifying correct HW threads within physical cores */
98762306a36Sopenharmony_ci		cpumask_shift_left(hw_thread_mask, hw_thread_mask,
98862306a36Sopenharmony_ci				   num_cores_per_socket *
98962306a36Sopenharmony_ci				   node_affinity.num_online_nodes *
99062306a36Sopenharmony_ci				   hw_thread_no);
99162306a36Sopenharmony_ci	}
99262306a36Sopenharmony_ci}
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ciint hfi1_get_proc_affinity(int node)
99562306a36Sopenharmony_ci{
99662306a36Sopenharmony_ci	int cpu = -1, ret, i;
99762306a36Sopenharmony_ci	struct hfi1_affinity_node *entry;
99862306a36Sopenharmony_ci	cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask;
99962306a36Sopenharmony_ci	const struct cpumask *node_mask,
100062306a36Sopenharmony_ci		*proc_mask = current->cpus_ptr;
100162306a36Sopenharmony_ci	struct hfi1_affinity_node_list *affinity = &node_affinity;
100262306a36Sopenharmony_ci	struct cpu_mask_set *set = &affinity->proc;
100362306a36Sopenharmony_ci
100462306a36Sopenharmony_ci	/*
100562306a36Sopenharmony_ci	 * check whether process/context affinity has already
100662306a36Sopenharmony_ci	 * been set
100762306a36Sopenharmony_ci	 */
100862306a36Sopenharmony_ci	if (current->nr_cpus_allowed == 1) {
100962306a36Sopenharmony_ci		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl",
101062306a36Sopenharmony_ci			  current->pid, current->comm,
101162306a36Sopenharmony_ci			  cpumask_pr_args(proc_mask));
101262306a36Sopenharmony_ci		/*
101362306a36Sopenharmony_ci		 * Mark the pre-set CPU as used. This is atomic so we don't
101462306a36Sopenharmony_ci		 * need the lock
101562306a36Sopenharmony_ci		 */
101662306a36Sopenharmony_ci		cpu = cpumask_first(proc_mask);
101762306a36Sopenharmony_ci		cpumask_set_cpu(cpu, &set->used);
101862306a36Sopenharmony_ci		goto done;
101962306a36Sopenharmony_ci	} else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) {
102062306a36Sopenharmony_ci		hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl",
102162306a36Sopenharmony_ci			  current->pid, current->comm,
102262306a36Sopenharmony_ci			  cpumask_pr_args(proc_mask));
102362306a36Sopenharmony_ci		goto done;
102462306a36Sopenharmony_ci	}
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ci	/*
102762306a36Sopenharmony_ci	 * The process does not have a preset CPU affinity so find one to
102862306a36Sopenharmony_ci	 * recommend using the following algorithm:
102962306a36Sopenharmony_ci	 *
103062306a36Sopenharmony_ci	 * For each user process that is opening a context on HFI Y:
103162306a36Sopenharmony_ci	 *  a) If all cores are filled, reinitialize the bitmask
103262306a36Sopenharmony_ci	 *  b) Fill real cores first, then HT cores (First set of HT
103362306a36Sopenharmony_ci	 *     cores on all physical cores, then second set of HT core,
103462306a36Sopenharmony_ci	 *     and, so on) in the following order:
103562306a36Sopenharmony_ci	 *
103662306a36Sopenharmony_ci	 *     1. Same NUMA node as HFI Y and not running an IRQ
103762306a36Sopenharmony_ci	 *        handler
103862306a36Sopenharmony_ci	 *     2. Same NUMA node as HFI Y and running an IRQ handler
103962306a36Sopenharmony_ci	 *     3. Different NUMA node to HFI Y and not running an IRQ
104062306a36Sopenharmony_ci	 *        handler
104162306a36Sopenharmony_ci	 *     4. Different NUMA node to HFI Y and running an IRQ
104262306a36Sopenharmony_ci	 *        handler
104362306a36Sopenharmony_ci	 *  c) Mark core as filled in the bitmask. As user processes are
104462306a36Sopenharmony_ci	 *     done, clear cores from the bitmask.
104562306a36Sopenharmony_ci	 */
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci	ret = zalloc_cpumask_var(&diff, GFP_KERNEL);
104862306a36Sopenharmony_ci	if (!ret)
104962306a36Sopenharmony_ci		goto done;
105062306a36Sopenharmony_ci	ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL);
105162306a36Sopenharmony_ci	if (!ret)
105262306a36Sopenharmony_ci		goto free_diff;
105362306a36Sopenharmony_ci	ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL);
105462306a36Sopenharmony_ci	if (!ret)
105562306a36Sopenharmony_ci		goto free_hw_thread_mask;
105662306a36Sopenharmony_ci	ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL);
105762306a36Sopenharmony_ci	if (!ret)
105862306a36Sopenharmony_ci		goto free_available_mask;
105962306a36Sopenharmony_ci
106062306a36Sopenharmony_ci	mutex_lock(&affinity->lock);
106162306a36Sopenharmony_ci	/*
106262306a36Sopenharmony_ci	 * If we've used all available HW threads, clear the mask and start
106362306a36Sopenharmony_ci	 * overloading.
106462306a36Sopenharmony_ci	 */
106562306a36Sopenharmony_ci	_cpu_mask_set_gen_inc(set);
106662306a36Sopenharmony_ci
106762306a36Sopenharmony_ci	/*
106862306a36Sopenharmony_ci	 * If NUMA node has CPUs used by interrupt handlers, include them in the
106962306a36Sopenharmony_ci	 * interrupt handler mask.
107062306a36Sopenharmony_ci	 */
107162306a36Sopenharmony_ci	entry = node_affinity_lookup(node);
107262306a36Sopenharmony_ci	if (entry) {
107362306a36Sopenharmony_ci		cpumask_copy(intrs_mask, (entry->def_intr.gen ?
107462306a36Sopenharmony_ci					  &entry->def_intr.mask :
107562306a36Sopenharmony_ci					  &entry->def_intr.used));
107662306a36Sopenharmony_ci		cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ?
107762306a36Sopenharmony_ci						    &entry->rcv_intr.mask :
107862306a36Sopenharmony_ci						    &entry->rcv_intr.used));
107962306a36Sopenharmony_ci		cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask);
108062306a36Sopenharmony_ci	}
108162306a36Sopenharmony_ci	hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
108262306a36Sopenharmony_ci		  cpumask_pr_args(intrs_mask));
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	cpumask_copy(hw_thread_mask, &set->mask);
108562306a36Sopenharmony_ci
108662306a36Sopenharmony_ci	/*
108762306a36Sopenharmony_ci	 * If HT cores are enabled, identify which HW threads within the
108862306a36Sopenharmony_ci	 * physical cores should be used.
108962306a36Sopenharmony_ci	 */
109062306a36Sopenharmony_ci	if (affinity->num_core_siblings > 0) {
109162306a36Sopenharmony_ci		for (i = 0; i < affinity->num_core_siblings; i++) {
109262306a36Sopenharmony_ci			find_hw_thread_mask(i, hw_thread_mask, affinity);
109362306a36Sopenharmony_ci
109462306a36Sopenharmony_ci			/*
109562306a36Sopenharmony_ci			 * If there's at least one available core for this HW
109662306a36Sopenharmony_ci			 * thread number, stop looking for a core.
109762306a36Sopenharmony_ci			 *
109862306a36Sopenharmony_ci			 * diff will always be not empty at least once in this
109962306a36Sopenharmony_ci			 * loop as the used mask gets reset when
110062306a36Sopenharmony_ci			 * (set->mask == set->used) before this loop.
110162306a36Sopenharmony_ci			 */
110262306a36Sopenharmony_ci			cpumask_andnot(diff, hw_thread_mask, &set->used);
110362306a36Sopenharmony_ci			if (!cpumask_empty(diff))
110462306a36Sopenharmony_ci				break;
110562306a36Sopenharmony_ci		}
110662306a36Sopenharmony_ci	}
110762306a36Sopenharmony_ci	hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl",
110862306a36Sopenharmony_ci		  cpumask_pr_args(hw_thread_mask));
110962306a36Sopenharmony_ci
111062306a36Sopenharmony_ci	node_mask = cpumask_of_node(node);
111162306a36Sopenharmony_ci	hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node,
111262306a36Sopenharmony_ci		  cpumask_pr_args(node_mask));
111362306a36Sopenharmony_ci
111462306a36Sopenharmony_ci	/* Get cpumask of available CPUs on preferred NUMA */
111562306a36Sopenharmony_ci	cpumask_and(available_mask, hw_thread_mask, node_mask);
111662306a36Sopenharmony_ci	cpumask_andnot(available_mask, available_mask, &set->used);
111762306a36Sopenharmony_ci	hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node,
111862306a36Sopenharmony_ci		  cpumask_pr_args(available_mask));
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_ci	/*
112162306a36Sopenharmony_ci	 * At first, we don't want to place processes on the same
112262306a36Sopenharmony_ci	 * CPUs as interrupt handlers. Then, CPUs running interrupt
112362306a36Sopenharmony_ci	 * handlers are used.
112462306a36Sopenharmony_ci	 *
112562306a36Sopenharmony_ci	 * 1) If diff is not empty, then there are CPUs not running
112662306a36Sopenharmony_ci	 *    non-interrupt handlers available, so diff gets copied
112762306a36Sopenharmony_ci	 *    over to available_mask.
112862306a36Sopenharmony_ci	 * 2) If diff is empty, then all CPUs not running interrupt
112962306a36Sopenharmony_ci	 *    handlers are taken, so available_mask contains all
113062306a36Sopenharmony_ci	 *    available CPUs running interrupt handlers.
113162306a36Sopenharmony_ci	 * 3) If available_mask is empty, then all CPUs on the
113262306a36Sopenharmony_ci	 *    preferred NUMA node are taken, so other NUMA nodes are
113362306a36Sopenharmony_ci	 *    used for process assignments using the same method as
113462306a36Sopenharmony_ci	 *    the preferred NUMA node.
113562306a36Sopenharmony_ci	 */
113662306a36Sopenharmony_ci	cpumask_andnot(diff, available_mask, intrs_mask);
113762306a36Sopenharmony_ci	if (!cpumask_empty(diff))
113862306a36Sopenharmony_ci		cpumask_copy(available_mask, diff);
113962306a36Sopenharmony_ci
114062306a36Sopenharmony_ci	/* If we don't have CPUs on the preferred node, use other NUMA nodes */
114162306a36Sopenharmony_ci	if (cpumask_empty(available_mask)) {
114262306a36Sopenharmony_ci		cpumask_andnot(available_mask, hw_thread_mask, &set->used);
114362306a36Sopenharmony_ci		/* Excluding preferred NUMA cores */
114462306a36Sopenharmony_ci		cpumask_andnot(available_mask, available_mask, node_mask);
114562306a36Sopenharmony_ci		hfi1_cdbg(PROC,
114662306a36Sopenharmony_ci			  "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl",
114762306a36Sopenharmony_ci			  cpumask_pr_args(available_mask));
114862306a36Sopenharmony_ci
114962306a36Sopenharmony_ci		/*
115062306a36Sopenharmony_ci		 * At first, we don't want to place processes on the same
115162306a36Sopenharmony_ci		 * CPUs as interrupt handlers.
115262306a36Sopenharmony_ci		 */
115362306a36Sopenharmony_ci		cpumask_andnot(diff, available_mask, intrs_mask);
115462306a36Sopenharmony_ci		if (!cpumask_empty(diff))
115562306a36Sopenharmony_ci			cpumask_copy(available_mask, diff);
115662306a36Sopenharmony_ci	}
115762306a36Sopenharmony_ci	hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl",
115862306a36Sopenharmony_ci		  cpumask_pr_args(available_mask));
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci	cpu = cpumask_first(available_mask);
116162306a36Sopenharmony_ci	if (cpu >= nr_cpu_ids) /* empty */
116262306a36Sopenharmony_ci		cpu = -1;
116362306a36Sopenharmony_ci	else
116462306a36Sopenharmony_ci		cpumask_set_cpu(cpu, &set->used);
116562306a36Sopenharmony_ci
116662306a36Sopenharmony_ci	mutex_unlock(&affinity->lock);
116762306a36Sopenharmony_ci	hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu);
116862306a36Sopenharmony_ci
116962306a36Sopenharmony_ci	free_cpumask_var(intrs_mask);
117062306a36Sopenharmony_cifree_available_mask:
117162306a36Sopenharmony_ci	free_cpumask_var(available_mask);
117262306a36Sopenharmony_cifree_hw_thread_mask:
117362306a36Sopenharmony_ci	free_cpumask_var(hw_thread_mask);
117462306a36Sopenharmony_cifree_diff:
117562306a36Sopenharmony_ci	free_cpumask_var(diff);
117662306a36Sopenharmony_cidone:
117762306a36Sopenharmony_ci	return cpu;
117862306a36Sopenharmony_ci}
117962306a36Sopenharmony_ci
118062306a36Sopenharmony_civoid hfi1_put_proc_affinity(int cpu)
118162306a36Sopenharmony_ci{
118262306a36Sopenharmony_ci	struct hfi1_affinity_node_list *affinity = &node_affinity;
118362306a36Sopenharmony_ci	struct cpu_mask_set *set = &affinity->proc;
118462306a36Sopenharmony_ci
118562306a36Sopenharmony_ci	if (cpu < 0)
118662306a36Sopenharmony_ci		return;
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci	mutex_lock(&affinity->lock);
118962306a36Sopenharmony_ci	cpu_mask_set_put(set, cpu);
119062306a36Sopenharmony_ci	hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu);
119162306a36Sopenharmony_ci	mutex_unlock(&affinity->lock);
119262306a36Sopenharmony_ci}
1193