162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Copyright(c) 2015 - 2020 Intel Corporation. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/topology.h> 762306a36Sopenharmony_ci#include <linux/cpumask.h> 862306a36Sopenharmony_ci#include <linux/interrupt.h> 962306a36Sopenharmony_ci#include <linux/numa.h> 1062306a36Sopenharmony_ci 1162306a36Sopenharmony_ci#include "hfi.h" 1262306a36Sopenharmony_ci#include "affinity.h" 1362306a36Sopenharmony_ci#include "sdma.h" 1462306a36Sopenharmony_ci#include "trace.h" 1562306a36Sopenharmony_ci 1662306a36Sopenharmony_cistruct hfi1_affinity_node_list node_affinity = { 1762306a36Sopenharmony_ci .list = LIST_HEAD_INIT(node_affinity.list), 1862306a36Sopenharmony_ci .lock = __MUTEX_INITIALIZER(node_affinity.lock) 1962306a36Sopenharmony_ci}; 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci/* Name of IRQ types, indexed by enum irq_type */ 2262306a36Sopenharmony_cistatic const char * const irq_type_names[] = { 2362306a36Sopenharmony_ci "SDMA", 2462306a36Sopenharmony_ci "RCVCTXT", 2562306a36Sopenharmony_ci "NETDEVCTXT", 2662306a36Sopenharmony_ci "GENERAL", 2762306a36Sopenharmony_ci "OTHER", 2862306a36Sopenharmony_ci}; 2962306a36Sopenharmony_ci 3062306a36Sopenharmony_ci/* Per NUMA node count of HFI devices */ 3162306a36Sopenharmony_cistatic unsigned int *hfi1_per_node_cntr; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_cistatic inline void init_cpu_mask_set(struct cpu_mask_set *set) 3462306a36Sopenharmony_ci{ 3562306a36Sopenharmony_ci cpumask_clear(&set->mask); 3662306a36Sopenharmony_ci cpumask_clear(&set->used); 3762306a36Sopenharmony_ci set->gen = 0; 3862306a36Sopenharmony_ci} 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_ci/* Increment generation of CPU set if needed */ 4162306a36Sopenharmony_cistatic void _cpu_mask_set_gen_inc(struct cpu_mask_set *set) 4262306a36Sopenharmony_ci{ 4362306a36Sopenharmony_ci if (cpumask_equal(&set->mask, &set->used)) { 4462306a36Sopenharmony_ci /* 4562306a36Sopenharmony_ci * We've used up all the CPUs, bump up the generation 4662306a36Sopenharmony_ci * and reset the 'used' map 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci set->gen++; 4962306a36Sopenharmony_ci cpumask_clear(&set->used); 5062306a36Sopenharmony_ci } 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_cistatic void _cpu_mask_set_gen_dec(struct cpu_mask_set *set) 5462306a36Sopenharmony_ci{ 5562306a36Sopenharmony_ci if (cpumask_empty(&set->used) && set->gen) { 5662306a36Sopenharmony_ci set->gen--; 5762306a36Sopenharmony_ci cpumask_copy(&set->used, &set->mask); 5862306a36Sopenharmony_ci } 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_ci/* Get the first CPU from the list of unused CPUs in a CPU set data structure */ 6262306a36Sopenharmony_cistatic int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff) 6362306a36Sopenharmony_ci{ 6462306a36Sopenharmony_ci int cpu; 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_ci if (!diff || !set) 6762306a36Sopenharmony_ci return -EINVAL; 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_ci _cpu_mask_set_gen_inc(set); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci /* Find out CPUs left in CPU mask */ 7262306a36Sopenharmony_ci cpumask_andnot(diff, &set->mask, &set->used); 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_ci cpu = cpumask_first(diff); 7562306a36Sopenharmony_ci if (cpu >= nr_cpu_ids) /* empty */ 7662306a36Sopenharmony_ci cpu = -EINVAL; 7762306a36Sopenharmony_ci else 7862306a36Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci return cpu; 8162306a36Sopenharmony_ci} 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_cistatic void cpu_mask_set_put(struct cpu_mask_set *set, int cpu) 8462306a36Sopenharmony_ci{ 8562306a36Sopenharmony_ci if (!set) 8662306a36Sopenharmony_ci return; 8762306a36Sopenharmony_ci 8862306a36Sopenharmony_ci cpumask_clear_cpu(cpu, &set->used); 8962306a36Sopenharmony_ci _cpu_mask_set_gen_dec(set); 9062306a36Sopenharmony_ci} 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci/* Initialize non-HT cpu cores mask */ 9362306a36Sopenharmony_civoid init_real_cpu_mask(void) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci int possible, curr_cpu, i, ht; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci cpumask_clear(&node_affinity.real_cpu_mask); 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci /* Start with cpu online mask as the real cpu mask */ 10062306a36Sopenharmony_ci cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ci /* 10362306a36Sopenharmony_ci * Remove HT cores from the real cpu mask. Do this in two steps below. 10462306a36Sopenharmony_ci */ 10562306a36Sopenharmony_ci possible = cpumask_weight(&node_affinity.real_cpu_mask); 10662306a36Sopenharmony_ci ht = cpumask_weight(topology_sibling_cpumask( 10762306a36Sopenharmony_ci cpumask_first(&node_affinity.real_cpu_mask))); 10862306a36Sopenharmony_ci /* 10962306a36Sopenharmony_ci * Step 1. Skip over the first N HT siblings and use them as the 11062306a36Sopenharmony_ci * "real" cores. Assumes that HT cores are not enumerated in 11162306a36Sopenharmony_ci * succession (except in the single core case). 11262306a36Sopenharmony_ci */ 11362306a36Sopenharmony_ci curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); 11462306a36Sopenharmony_ci for (i = 0; i < possible / ht; i++) 11562306a36Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 11662306a36Sopenharmony_ci /* 11762306a36Sopenharmony_ci * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 11862306a36Sopenharmony_ci * skip any gaps. 11962306a36Sopenharmony_ci */ 12062306a36Sopenharmony_ci for (; i < possible; i++) { 12162306a36Sopenharmony_ci cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); 12262306a36Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_ciint node_affinity_init(void) 12762306a36Sopenharmony_ci{ 12862306a36Sopenharmony_ci int node; 12962306a36Sopenharmony_ci struct pci_dev *dev = NULL; 13062306a36Sopenharmony_ci const struct pci_device_id *ids = hfi1_pci_tbl; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci cpumask_clear(&node_affinity.proc.used); 13362306a36Sopenharmony_ci cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci node_affinity.proc.gen = 0; 13662306a36Sopenharmony_ci node_affinity.num_core_siblings = 13762306a36Sopenharmony_ci cpumask_weight(topology_sibling_cpumask( 13862306a36Sopenharmony_ci cpumask_first(&node_affinity.proc.mask) 13962306a36Sopenharmony_ci )); 14062306a36Sopenharmony_ci node_affinity.num_possible_nodes = num_possible_nodes(); 14162306a36Sopenharmony_ci node_affinity.num_online_nodes = num_online_nodes(); 14262306a36Sopenharmony_ci node_affinity.num_online_cpus = num_online_cpus(); 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci /* 14562306a36Sopenharmony_ci * The real cpu mask is part of the affinity struct but it has to be 14662306a36Sopenharmony_ci * initialized early. It is needed to calculate the number of user 14762306a36Sopenharmony_ci * contexts in set_up_context_variables(). 14862306a36Sopenharmony_ci */ 14962306a36Sopenharmony_ci init_real_cpu_mask(); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes, 15262306a36Sopenharmony_ci sizeof(*hfi1_per_node_cntr), GFP_KERNEL); 15362306a36Sopenharmony_ci if (!hfi1_per_node_cntr) 15462306a36Sopenharmony_ci return -ENOMEM; 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci while (ids->vendor) { 15762306a36Sopenharmony_ci dev = NULL; 15862306a36Sopenharmony_ci while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { 15962306a36Sopenharmony_ci node = pcibus_to_node(dev->bus); 16062306a36Sopenharmony_ci if (node < 0) 16162306a36Sopenharmony_ci goto out; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci hfi1_per_node_cntr[node]++; 16462306a36Sopenharmony_ci } 16562306a36Sopenharmony_ci ids++; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci return 0; 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ciout: 17162306a36Sopenharmony_ci /* 17262306a36Sopenharmony_ci * Invalid PCI NUMA node information found, note it, and populate 17362306a36Sopenharmony_ci * our database 1:1. 17462306a36Sopenharmony_ci */ 17562306a36Sopenharmony_ci pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n"); 17662306a36Sopenharmony_ci pr_err("HFI: System BIOS may need to be upgraded\n"); 17762306a36Sopenharmony_ci for (node = 0; node < node_affinity.num_possible_nodes; node++) 17862306a36Sopenharmony_ci hfi1_per_node_cntr[node] = 1; 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci pci_dev_put(dev); 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci return 0; 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_cistatic void node_affinity_destroy(struct hfi1_affinity_node *entry) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci free_percpu(entry->comp_vect_affinity); 18862306a36Sopenharmony_ci kfree(entry); 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_civoid node_affinity_destroy_all(void) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci struct list_head *pos, *q; 19462306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci mutex_lock(&node_affinity.lock); 19762306a36Sopenharmony_ci list_for_each_safe(pos, q, &node_affinity.list) { 19862306a36Sopenharmony_ci entry = list_entry(pos, struct hfi1_affinity_node, 19962306a36Sopenharmony_ci list); 20062306a36Sopenharmony_ci list_del(pos); 20162306a36Sopenharmony_ci node_affinity_destroy(entry); 20262306a36Sopenharmony_ci } 20362306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 20462306a36Sopenharmony_ci kfree(hfi1_per_node_cntr); 20562306a36Sopenharmony_ci} 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_cistatic struct hfi1_affinity_node *node_affinity_allocate(int node) 20862306a36Sopenharmony_ci{ 20962306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci entry = kzalloc(sizeof(*entry), GFP_KERNEL); 21262306a36Sopenharmony_ci if (!entry) 21362306a36Sopenharmony_ci return NULL; 21462306a36Sopenharmony_ci entry->node = node; 21562306a36Sopenharmony_ci entry->comp_vect_affinity = alloc_percpu(u16); 21662306a36Sopenharmony_ci INIT_LIST_HEAD(&entry->list); 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci return entry; 21962306a36Sopenharmony_ci} 22062306a36Sopenharmony_ci 22162306a36Sopenharmony_ci/* 22262306a36Sopenharmony_ci * It appends an entry to the list. 22362306a36Sopenharmony_ci * It *must* be called with node_affinity.lock held. 22462306a36Sopenharmony_ci */ 22562306a36Sopenharmony_cistatic void node_affinity_add_tail(struct hfi1_affinity_node *entry) 22662306a36Sopenharmony_ci{ 22762306a36Sopenharmony_ci list_add_tail(&entry->list, &node_affinity.list); 22862306a36Sopenharmony_ci} 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci/* It must be called with node_affinity.lock held */ 23162306a36Sopenharmony_cistatic struct hfi1_affinity_node *node_affinity_lookup(int node) 23262306a36Sopenharmony_ci{ 23362306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_ci list_for_each_entry(entry, &node_affinity.list, list) { 23662306a36Sopenharmony_ci if (entry->node == node) 23762306a36Sopenharmony_ci return entry; 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci return NULL; 24162306a36Sopenharmony_ci} 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_cistatic int per_cpu_affinity_get(cpumask_var_t possible_cpumask, 24462306a36Sopenharmony_ci u16 __percpu *comp_vect_affinity) 24562306a36Sopenharmony_ci{ 24662306a36Sopenharmony_ci int curr_cpu; 24762306a36Sopenharmony_ci u16 cntr; 24862306a36Sopenharmony_ci u16 prev_cntr; 24962306a36Sopenharmony_ci int ret_cpu; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci if (!possible_cpumask) { 25262306a36Sopenharmony_ci ret_cpu = -EINVAL; 25362306a36Sopenharmony_ci goto fail; 25462306a36Sopenharmony_ci } 25562306a36Sopenharmony_ci 25662306a36Sopenharmony_ci if (!comp_vect_affinity) { 25762306a36Sopenharmony_ci ret_cpu = -EINVAL; 25862306a36Sopenharmony_ci goto fail; 25962306a36Sopenharmony_ci } 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci ret_cpu = cpumask_first(possible_cpumask); 26262306a36Sopenharmony_ci if (ret_cpu >= nr_cpu_ids) { 26362306a36Sopenharmony_ci ret_cpu = -EINVAL; 26462306a36Sopenharmony_ci goto fail; 26562306a36Sopenharmony_ci } 26662306a36Sopenharmony_ci 26762306a36Sopenharmony_ci prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu); 26862306a36Sopenharmony_ci for_each_cpu(curr_cpu, possible_cpumask) { 26962306a36Sopenharmony_ci cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci if (cntr < prev_cntr) { 27262306a36Sopenharmony_ci ret_cpu = curr_cpu; 27362306a36Sopenharmony_ci prev_cntr = cntr; 27462306a36Sopenharmony_ci } 27562306a36Sopenharmony_ci } 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1; 27862306a36Sopenharmony_ci 27962306a36Sopenharmony_cifail: 28062306a36Sopenharmony_ci return ret_cpu; 28162306a36Sopenharmony_ci} 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_cistatic int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask, 28462306a36Sopenharmony_ci u16 __percpu *comp_vect_affinity) 28562306a36Sopenharmony_ci{ 28662306a36Sopenharmony_ci int curr_cpu; 28762306a36Sopenharmony_ci int max_cpu; 28862306a36Sopenharmony_ci u16 cntr; 28962306a36Sopenharmony_ci u16 prev_cntr; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci if (!possible_cpumask) 29262306a36Sopenharmony_ci return -EINVAL; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci if (!comp_vect_affinity) 29562306a36Sopenharmony_ci return -EINVAL; 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci max_cpu = cpumask_first(possible_cpumask); 29862306a36Sopenharmony_ci if (max_cpu >= nr_cpu_ids) 29962306a36Sopenharmony_ci return -EINVAL; 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu); 30262306a36Sopenharmony_ci for_each_cpu(curr_cpu, possible_cpumask) { 30362306a36Sopenharmony_ci cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_ci if (cntr > prev_cntr) { 30662306a36Sopenharmony_ci max_cpu = curr_cpu; 30762306a36Sopenharmony_ci prev_cntr = cntr; 30862306a36Sopenharmony_ci } 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_ci *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1; 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci return max_cpu; 31462306a36Sopenharmony_ci} 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci/* 31762306a36Sopenharmony_ci * Non-interrupt CPUs are used first, then interrupt CPUs. 31862306a36Sopenharmony_ci * Two already allocated cpu masks must be passed. 31962306a36Sopenharmony_ci */ 32062306a36Sopenharmony_cistatic int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd, 32162306a36Sopenharmony_ci struct hfi1_affinity_node *entry, 32262306a36Sopenharmony_ci cpumask_var_t non_intr_cpus, 32362306a36Sopenharmony_ci cpumask_var_t available_cpus) 32462306a36Sopenharmony_ci __must_hold(&node_affinity.lock) 32562306a36Sopenharmony_ci{ 32662306a36Sopenharmony_ci int cpu; 32762306a36Sopenharmony_ci struct cpu_mask_set *set = dd->comp_vect; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 33062306a36Sopenharmony_ci if (!non_intr_cpus) { 33162306a36Sopenharmony_ci cpu = -1; 33262306a36Sopenharmony_ci goto fail; 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci if (!available_cpus) { 33662306a36Sopenharmony_ci cpu = -1; 33762306a36Sopenharmony_ci goto fail; 33862306a36Sopenharmony_ci } 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci /* Available CPUs for pinning completion vectors */ 34162306a36Sopenharmony_ci _cpu_mask_set_gen_inc(set); 34262306a36Sopenharmony_ci cpumask_andnot(available_cpus, &set->mask, &set->used); 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_ci /* Available CPUs without SDMA engine interrupts */ 34562306a36Sopenharmony_ci cpumask_andnot(non_intr_cpus, available_cpus, 34662306a36Sopenharmony_ci &entry->def_intr.used); 34762306a36Sopenharmony_ci 34862306a36Sopenharmony_ci /* If there are non-interrupt CPUs available, use them first */ 34962306a36Sopenharmony_ci if (!cpumask_empty(non_intr_cpus)) 35062306a36Sopenharmony_ci cpu = cpumask_first(non_intr_cpus); 35162306a36Sopenharmony_ci else /* Otherwise, use interrupt CPUs */ 35262306a36Sopenharmony_ci cpu = cpumask_first(available_cpus); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (cpu >= nr_cpu_ids) { /* empty */ 35562306a36Sopenharmony_ci cpu = -1; 35662306a36Sopenharmony_ci goto fail; 35762306a36Sopenharmony_ci } 35862306a36Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_cifail: 36162306a36Sopenharmony_ci return cpu; 36262306a36Sopenharmony_ci} 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_cistatic void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu) 36562306a36Sopenharmony_ci{ 36662306a36Sopenharmony_ci struct cpu_mask_set *set = dd->comp_vect; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci if (cpu < 0) 36962306a36Sopenharmony_ci return; 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci cpu_mask_set_put(set, cpu); 37262306a36Sopenharmony_ci} 37362306a36Sopenharmony_ci 37462306a36Sopenharmony_ci/* _dev_comp_vect_mappings_destroy() is reentrant */ 37562306a36Sopenharmony_cistatic void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd) 37662306a36Sopenharmony_ci{ 37762306a36Sopenharmony_ci int i, cpu; 37862306a36Sopenharmony_ci 37962306a36Sopenharmony_ci if (!dd->comp_vect_mappings) 38062306a36Sopenharmony_ci return; 38162306a36Sopenharmony_ci 38262306a36Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 38362306a36Sopenharmony_ci cpu = dd->comp_vect_mappings[i]; 38462306a36Sopenharmony_ci _dev_comp_vect_cpu_put(dd, cpu); 38562306a36Sopenharmony_ci dd->comp_vect_mappings[i] = -1; 38662306a36Sopenharmony_ci hfi1_cdbg(AFFINITY, 38762306a36Sopenharmony_ci "[%s] Release CPU %d from completion vector %d", 38862306a36Sopenharmony_ci rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i); 38962306a36Sopenharmony_ci } 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci kfree(dd->comp_vect_mappings); 39262306a36Sopenharmony_ci dd->comp_vect_mappings = NULL; 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci/* 39662306a36Sopenharmony_ci * This function creates the table for looking up CPUs for completion vectors. 39762306a36Sopenharmony_ci * num_comp_vectors needs to have been initilized before calling this function. 39862306a36Sopenharmony_ci */ 39962306a36Sopenharmony_cistatic int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, 40062306a36Sopenharmony_ci struct hfi1_affinity_node *entry) 40162306a36Sopenharmony_ci __must_hold(&node_affinity.lock) 40262306a36Sopenharmony_ci{ 40362306a36Sopenharmony_ci int i, cpu, ret; 40462306a36Sopenharmony_ci cpumask_var_t non_intr_cpus; 40562306a36Sopenharmony_ci cpumask_var_t available_cpus; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL)) 41062306a36Sopenharmony_ci return -ENOMEM; 41162306a36Sopenharmony_ci 41262306a36Sopenharmony_ci if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) { 41362306a36Sopenharmony_ci free_cpumask_var(non_intr_cpus); 41462306a36Sopenharmony_ci return -ENOMEM; 41562306a36Sopenharmony_ci } 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus, 41862306a36Sopenharmony_ci sizeof(*dd->comp_vect_mappings), 41962306a36Sopenharmony_ci GFP_KERNEL); 42062306a36Sopenharmony_ci if (!dd->comp_vect_mappings) { 42162306a36Sopenharmony_ci ret = -ENOMEM; 42262306a36Sopenharmony_ci goto fail; 42362306a36Sopenharmony_ci } 42462306a36Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) 42562306a36Sopenharmony_ci dd->comp_vect_mappings[i] = -1; 42662306a36Sopenharmony_ci 42762306a36Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 42862306a36Sopenharmony_ci cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus, 42962306a36Sopenharmony_ci available_cpus); 43062306a36Sopenharmony_ci if (cpu < 0) { 43162306a36Sopenharmony_ci ret = -EINVAL; 43262306a36Sopenharmony_ci goto fail; 43362306a36Sopenharmony_ci } 43462306a36Sopenharmony_ci 43562306a36Sopenharmony_ci dd->comp_vect_mappings[i] = cpu; 43662306a36Sopenharmony_ci hfi1_cdbg(AFFINITY, 43762306a36Sopenharmony_ci "[%s] Completion Vector %d -> CPU %d", 43862306a36Sopenharmony_ci rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci free_cpumask_var(available_cpus); 44262306a36Sopenharmony_ci free_cpumask_var(non_intr_cpus); 44362306a36Sopenharmony_ci return 0; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_cifail: 44662306a36Sopenharmony_ci free_cpumask_var(available_cpus); 44762306a36Sopenharmony_ci free_cpumask_var(non_intr_cpus); 44862306a36Sopenharmony_ci _dev_comp_vect_mappings_destroy(dd); 44962306a36Sopenharmony_ci 45062306a36Sopenharmony_ci return ret; 45162306a36Sopenharmony_ci} 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ciint hfi1_comp_vectors_set_up(struct hfi1_devdata *dd) 45462306a36Sopenharmony_ci{ 45562306a36Sopenharmony_ci int ret; 45662306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_ci mutex_lock(&node_affinity.lock); 45962306a36Sopenharmony_ci entry = node_affinity_lookup(dd->node); 46062306a36Sopenharmony_ci if (!entry) { 46162306a36Sopenharmony_ci ret = -EINVAL; 46262306a36Sopenharmony_ci goto unlock; 46362306a36Sopenharmony_ci } 46462306a36Sopenharmony_ci ret = _dev_comp_vect_mappings_create(dd, entry); 46562306a36Sopenharmony_ciunlock: 46662306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci return ret; 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_civoid hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd) 47262306a36Sopenharmony_ci{ 47362306a36Sopenharmony_ci _dev_comp_vect_mappings_destroy(dd); 47462306a36Sopenharmony_ci} 47562306a36Sopenharmony_ci 47662306a36Sopenharmony_ciint hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect) 47762306a36Sopenharmony_ci{ 47862306a36Sopenharmony_ci struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); 47962306a36Sopenharmony_ci struct hfi1_devdata *dd = dd_from_dev(verbs_dev); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci if (!dd->comp_vect_mappings) 48262306a36Sopenharmony_ci return -EINVAL; 48362306a36Sopenharmony_ci if (comp_vect >= dd->comp_vect_possible_cpus) 48462306a36Sopenharmony_ci return -EINVAL; 48562306a36Sopenharmony_ci 48662306a36Sopenharmony_ci return dd->comp_vect_mappings[comp_vect]; 48762306a36Sopenharmony_ci} 48862306a36Sopenharmony_ci 48962306a36Sopenharmony_ci/* 49062306a36Sopenharmony_ci * It assumes dd->comp_vect_possible_cpus is available. 49162306a36Sopenharmony_ci */ 49262306a36Sopenharmony_cistatic int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd, 49362306a36Sopenharmony_ci struct hfi1_affinity_node *entry, 49462306a36Sopenharmony_ci bool first_dev_init) 49562306a36Sopenharmony_ci __must_hold(&node_affinity.lock) 49662306a36Sopenharmony_ci{ 49762306a36Sopenharmony_ci int i, j, curr_cpu; 49862306a36Sopenharmony_ci int possible_cpus_comp_vect = 0; 49962306a36Sopenharmony_ci struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask; 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 50262306a36Sopenharmony_ci /* 50362306a36Sopenharmony_ci * If there's only one CPU available for completion vectors, then 50462306a36Sopenharmony_ci * there will only be one completion vector available. Othewise, 50562306a36Sopenharmony_ci * the number of completion vector available will be the number of 50662306a36Sopenharmony_ci * available CPUs divide it by the number of devices in the 50762306a36Sopenharmony_ci * local NUMA node. 50862306a36Sopenharmony_ci */ 50962306a36Sopenharmony_ci if (cpumask_weight(&entry->comp_vect_mask) == 1) { 51062306a36Sopenharmony_ci possible_cpus_comp_vect = 1; 51162306a36Sopenharmony_ci dd_dev_warn(dd, 51262306a36Sopenharmony_ci "Number of kernel receive queues is too large for completion vector affinity to be effective\n"); 51362306a36Sopenharmony_ci } else { 51462306a36Sopenharmony_ci possible_cpus_comp_vect += 51562306a36Sopenharmony_ci cpumask_weight(&entry->comp_vect_mask) / 51662306a36Sopenharmony_ci hfi1_per_node_cntr[dd->node]; 51762306a36Sopenharmony_ci 51862306a36Sopenharmony_ci /* 51962306a36Sopenharmony_ci * If the completion vector CPUs available doesn't divide 52062306a36Sopenharmony_ci * evenly among devices, then the first device device to be 52162306a36Sopenharmony_ci * initialized gets an extra CPU. 52262306a36Sopenharmony_ci */ 52362306a36Sopenharmony_ci if (first_dev_init && 52462306a36Sopenharmony_ci cpumask_weight(&entry->comp_vect_mask) % 52562306a36Sopenharmony_ci hfi1_per_node_cntr[dd->node] != 0) 52662306a36Sopenharmony_ci possible_cpus_comp_vect++; 52762306a36Sopenharmony_ci } 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci dd->comp_vect_possible_cpus = possible_cpus_comp_vect; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci /* Reserving CPUs for device completion vector */ 53262306a36Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 53362306a36Sopenharmony_ci curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask, 53462306a36Sopenharmony_ci entry->comp_vect_affinity); 53562306a36Sopenharmony_ci if (curr_cpu < 0) 53662306a36Sopenharmony_ci goto fail; 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci cpumask_set_cpu(curr_cpu, dev_comp_vect_mask); 53962306a36Sopenharmony_ci } 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_ci hfi1_cdbg(AFFINITY, 54262306a36Sopenharmony_ci "[%s] Completion vector affinity CPU set(s) %*pbl", 54362306a36Sopenharmony_ci rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), 54462306a36Sopenharmony_ci cpumask_pr_args(dev_comp_vect_mask)); 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci return 0; 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_cifail: 54962306a36Sopenharmony_ci for (j = 0; j < i; j++) 55062306a36Sopenharmony_ci per_cpu_affinity_put_max(&entry->comp_vect_mask, 55162306a36Sopenharmony_ci entry->comp_vect_affinity); 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci return curr_cpu; 55462306a36Sopenharmony_ci} 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci/* 55762306a36Sopenharmony_ci * It assumes dd->comp_vect_possible_cpus is available. 55862306a36Sopenharmony_ci */ 55962306a36Sopenharmony_cistatic void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, 56062306a36Sopenharmony_ci struct hfi1_affinity_node *entry) 56162306a36Sopenharmony_ci __must_hold(&node_affinity.lock) 56262306a36Sopenharmony_ci{ 56362306a36Sopenharmony_ci int i, cpu; 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 56662306a36Sopenharmony_ci if (!dd->comp_vect_possible_cpus) 56762306a36Sopenharmony_ci return; 56862306a36Sopenharmony_ci 56962306a36Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 57062306a36Sopenharmony_ci cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask, 57162306a36Sopenharmony_ci entry->comp_vect_affinity); 57262306a36Sopenharmony_ci /* Clearing CPU in device completion vector cpu mask */ 57362306a36Sopenharmony_ci if (cpu >= 0) 57462306a36Sopenharmony_ci cpumask_clear_cpu(cpu, &dd->comp_vect->mask); 57562306a36Sopenharmony_ci } 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci dd->comp_vect_possible_cpus = 0; 57862306a36Sopenharmony_ci} 57962306a36Sopenharmony_ci 58062306a36Sopenharmony_ci/* 58162306a36Sopenharmony_ci * Interrupt affinity. 58262306a36Sopenharmony_ci * 58362306a36Sopenharmony_ci * non-rcv avail gets a default mask that 58462306a36Sopenharmony_ci * starts as possible cpus with threads reset 58562306a36Sopenharmony_ci * and each rcv avail reset. 58662306a36Sopenharmony_ci * 58762306a36Sopenharmony_ci * rcv avail gets node relative 1 wrapping back 58862306a36Sopenharmony_ci * to the node relative 1 as necessary. 58962306a36Sopenharmony_ci * 59062306a36Sopenharmony_ci */ 59162306a36Sopenharmony_ciint hfi1_dev_affinity_init(struct hfi1_devdata *dd) 59262306a36Sopenharmony_ci{ 59362306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 59462306a36Sopenharmony_ci const struct cpumask *local_mask; 59562306a36Sopenharmony_ci int curr_cpu, possible, i, ret; 59662306a36Sopenharmony_ci bool new_entry = false; 59762306a36Sopenharmony_ci 59862306a36Sopenharmony_ci local_mask = cpumask_of_node(dd->node); 59962306a36Sopenharmony_ci if (cpumask_first(local_mask) >= nr_cpu_ids) 60062306a36Sopenharmony_ci local_mask = topology_core_cpumask(0); 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci mutex_lock(&node_affinity.lock); 60362306a36Sopenharmony_ci entry = node_affinity_lookup(dd->node); 60462306a36Sopenharmony_ci 60562306a36Sopenharmony_ci /* 60662306a36Sopenharmony_ci * If this is the first time this NUMA node's affinity is used, 60762306a36Sopenharmony_ci * create an entry in the global affinity structure and initialize it. 60862306a36Sopenharmony_ci */ 60962306a36Sopenharmony_ci if (!entry) { 61062306a36Sopenharmony_ci entry = node_affinity_allocate(dd->node); 61162306a36Sopenharmony_ci if (!entry) { 61262306a36Sopenharmony_ci dd_dev_err(dd, 61362306a36Sopenharmony_ci "Unable to allocate global affinity node\n"); 61462306a36Sopenharmony_ci ret = -ENOMEM; 61562306a36Sopenharmony_ci goto fail; 61662306a36Sopenharmony_ci } 61762306a36Sopenharmony_ci new_entry = true; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci init_cpu_mask_set(&entry->def_intr); 62062306a36Sopenharmony_ci init_cpu_mask_set(&entry->rcv_intr); 62162306a36Sopenharmony_ci cpumask_clear(&entry->comp_vect_mask); 62262306a36Sopenharmony_ci cpumask_clear(&entry->general_intr_mask); 62362306a36Sopenharmony_ci /* Use the "real" cpu mask of this node as the default */ 62462306a36Sopenharmony_ci cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, 62562306a36Sopenharmony_ci local_mask); 62662306a36Sopenharmony_ci 62762306a36Sopenharmony_ci /* fill in the receive list */ 62862306a36Sopenharmony_ci possible = cpumask_weight(&entry->def_intr.mask); 62962306a36Sopenharmony_ci curr_cpu = cpumask_first(&entry->def_intr.mask); 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_ci if (possible == 1) { 63262306a36Sopenharmony_ci /* only one CPU, everyone will use it */ 63362306a36Sopenharmony_ci cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); 63462306a36Sopenharmony_ci cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 63562306a36Sopenharmony_ci } else { 63662306a36Sopenharmony_ci /* 63762306a36Sopenharmony_ci * The general/control context will be the first CPU in 63862306a36Sopenharmony_ci * the default list, so it is removed from the default 63962306a36Sopenharmony_ci * list and added to the general interrupt list. 64062306a36Sopenharmony_ci */ 64162306a36Sopenharmony_ci cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); 64262306a36Sopenharmony_ci cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 64362306a36Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, 64462306a36Sopenharmony_ci &entry->def_intr.mask); 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci /* 64762306a36Sopenharmony_ci * Remove the remaining kernel receive queues from 64862306a36Sopenharmony_ci * the default list and add them to the receive list. 64962306a36Sopenharmony_ci */ 65062306a36Sopenharmony_ci for (i = 0; 65162306a36Sopenharmony_ci i < (dd->n_krcv_queues - 1) * 65262306a36Sopenharmony_ci hfi1_per_node_cntr[dd->node]; 65362306a36Sopenharmony_ci i++) { 65462306a36Sopenharmony_ci cpumask_clear_cpu(curr_cpu, 65562306a36Sopenharmony_ci &entry->def_intr.mask); 65662306a36Sopenharmony_ci cpumask_set_cpu(curr_cpu, 65762306a36Sopenharmony_ci &entry->rcv_intr.mask); 65862306a36Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, 65962306a36Sopenharmony_ci &entry->def_intr.mask); 66062306a36Sopenharmony_ci if (curr_cpu >= nr_cpu_ids) 66162306a36Sopenharmony_ci break; 66262306a36Sopenharmony_ci } 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci /* 66562306a36Sopenharmony_ci * If there ends up being 0 CPU cores leftover for SDMA 66662306a36Sopenharmony_ci * engines, use the same CPU cores as general/control 66762306a36Sopenharmony_ci * context. 66862306a36Sopenharmony_ci */ 66962306a36Sopenharmony_ci if (cpumask_empty(&entry->def_intr.mask)) 67062306a36Sopenharmony_ci cpumask_copy(&entry->def_intr.mask, 67162306a36Sopenharmony_ci &entry->general_intr_mask); 67262306a36Sopenharmony_ci } 67362306a36Sopenharmony_ci 67462306a36Sopenharmony_ci /* Determine completion vector CPUs for the entire node */ 67562306a36Sopenharmony_ci cpumask_and(&entry->comp_vect_mask, 67662306a36Sopenharmony_ci &node_affinity.real_cpu_mask, local_mask); 67762306a36Sopenharmony_ci cpumask_andnot(&entry->comp_vect_mask, 67862306a36Sopenharmony_ci &entry->comp_vect_mask, 67962306a36Sopenharmony_ci &entry->rcv_intr.mask); 68062306a36Sopenharmony_ci cpumask_andnot(&entry->comp_vect_mask, 68162306a36Sopenharmony_ci &entry->comp_vect_mask, 68262306a36Sopenharmony_ci &entry->general_intr_mask); 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci /* 68562306a36Sopenharmony_ci * If there ends up being 0 CPU cores leftover for completion 68662306a36Sopenharmony_ci * vectors, use the same CPU core as the general/control 68762306a36Sopenharmony_ci * context. 68862306a36Sopenharmony_ci */ 68962306a36Sopenharmony_ci if (cpumask_empty(&entry->comp_vect_mask)) 69062306a36Sopenharmony_ci cpumask_copy(&entry->comp_vect_mask, 69162306a36Sopenharmony_ci &entry->general_intr_mask); 69262306a36Sopenharmony_ci } 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry); 69562306a36Sopenharmony_ci if (ret < 0) 69662306a36Sopenharmony_ci goto fail; 69762306a36Sopenharmony_ci 69862306a36Sopenharmony_ci if (new_entry) 69962306a36Sopenharmony_ci node_affinity_add_tail(entry); 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci dd->affinity_entry = entry; 70262306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_ci return 0; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_cifail: 70762306a36Sopenharmony_ci if (new_entry) 70862306a36Sopenharmony_ci node_affinity_destroy(entry); 70962306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 71062306a36Sopenharmony_ci return ret; 71162306a36Sopenharmony_ci} 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_civoid hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) 71462306a36Sopenharmony_ci{ 71562306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 71662306a36Sopenharmony_ci 71762306a36Sopenharmony_ci mutex_lock(&node_affinity.lock); 71862306a36Sopenharmony_ci if (!dd->affinity_entry) 71962306a36Sopenharmony_ci goto unlock; 72062306a36Sopenharmony_ci entry = node_affinity_lookup(dd->node); 72162306a36Sopenharmony_ci if (!entry) 72262306a36Sopenharmony_ci goto unlock; 72362306a36Sopenharmony_ci 72462306a36Sopenharmony_ci /* 72562306a36Sopenharmony_ci * Free device completion vector CPUs to be used by future 72662306a36Sopenharmony_ci * completion vectors 72762306a36Sopenharmony_ci */ 72862306a36Sopenharmony_ci _dev_comp_vect_cpu_mask_clean_up(dd, entry); 72962306a36Sopenharmony_ciunlock: 73062306a36Sopenharmony_ci dd->affinity_entry = NULL; 73162306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 73262306a36Sopenharmony_ci} 73362306a36Sopenharmony_ci 73462306a36Sopenharmony_ci/* 73562306a36Sopenharmony_ci * Function updates the irq affinity hint for msix after it has been changed 73662306a36Sopenharmony_ci * by the user using the /proc/irq interface. This function only accepts 73762306a36Sopenharmony_ci * one cpu in the mask. 73862306a36Sopenharmony_ci */ 73962306a36Sopenharmony_cistatic void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) 74062306a36Sopenharmony_ci{ 74162306a36Sopenharmony_ci struct sdma_engine *sde = msix->arg; 74262306a36Sopenharmony_ci struct hfi1_devdata *dd = sde->dd; 74362306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 74462306a36Sopenharmony_ci struct cpu_mask_set *set; 74562306a36Sopenharmony_ci int i, old_cpu; 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_ci if (cpu > num_online_cpus() || cpu == sde->cpu) 74862306a36Sopenharmony_ci return; 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ci mutex_lock(&node_affinity.lock); 75162306a36Sopenharmony_ci entry = node_affinity_lookup(dd->node); 75262306a36Sopenharmony_ci if (!entry) 75362306a36Sopenharmony_ci goto unlock; 75462306a36Sopenharmony_ci 75562306a36Sopenharmony_ci old_cpu = sde->cpu; 75662306a36Sopenharmony_ci sde->cpu = cpu; 75762306a36Sopenharmony_ci cpumask_clear(&msix->mask); 75862306a36Sopenharmony_ci cpumask_set_cpu(cpu, &msix->mask); 75962306a36Sopenharmony_ci dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n", 76062306a36Sopenharmony_ci msix->irq, irq_type_names[msix->type], 76162306a36Sopenharmony_ci sde->this_idx, cpu); 76262306a36Sopenharmony_ci irq_set_affinity_hint(msix->irq, &msix->mask); 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci /* 76562306a36Sopenharmony_ci * Set the new cpu in the hfi1_affinity_node and clean 76662306a36Sopenharmony_ci * the old cpu if it is not used by any other IRQ 76762306a36Sopenharmony_ci */ 76862306a36Sopenharmony_ci set = &entry->def_intr; 76962306a36Sopenharmony_ci cpumask_set_cpu(cpu, &set->mask); 77062306a36Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 77162306a36Sopenharmony_ci for (i = 0; i < dd->msix_info.max_requested; i++) { 77262306a36Sopenharmony_ci struct hfi1_msix_entry *other_msix; 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci other_msix = &dd->msix_info.msix_entries[i]; 77562306a36Sopenharmony_ci if (other_msix->type != IRQ_SDMA || other_msix == msix) 77662306a36Sopenharmony_ci continue; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci if (cpumask_test_cpu(old_cpu, &other_msix->mask)) 77962306a36Sopenharmony_ci goto unlock; 78062306a36Sopenharmony_ci } 78162306a36Sopenharmony_ci cpumask_clear_cpu(old_cpu, &set->mask); 78262306a36Sopenharmony_ci cpumask_clear_cpu(old_cpu, &set->used); 78362306a36Sopenharmony_ciunlock: 78462306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 78562306a36Sopenharmony_ci} 78662306a36Sopenharmony_ci 78762306a36Sopenharmony_cistatic void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, 78862306a36Sopenharmony_ci const cpumask_t *mask) 78962306a36Sopenharmony_ci{ 79062306a36Sopenharmony_ci int cpu = cpumask_first(mask); 79162306a36Sopenharmony_ci struct hfi1_msix_entry *msix = container_of(notify, 79262306a36Sopenharmony_ci struct hfi1_msix_entry, 79362306a36Sopenharmony_ci notify); 79462306a36Sopenharmony_ci 79562306a36Sopenharmony_ci /* Only one CPU configuration supported currently */ 79662306a36Sopenharmony_ci hfi1_update_sdma_affinity(msix, cpu); 79762306a36Sopenharmony_ci} 79862306a36Sopenharmony_ci 79962306a36Sopenharmony_cistatic void hfi1_irq_notifier_release(struct kref *ref) 80062306a36Sopenharmony_ci{ 80162306a36Sopenharmony_ci /* 80262306a36Sopenharmony_ci * This is required by affinity notifier. We don't have anything to 80362306a36Sopenharmony_ci * free here. 80462306a36Sopenharmony_ci */ 80562306a36Sopenharmony_ci} 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_cistatic void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) 80862306a36Sopenharmony_ci{ 80962306a36Sopenharmony_ci struct irq_affinity_notify *notify = &msix->notify; 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci notify->irq = msix->irq; 81262306a36Sopenharmony_ci notify->notify = hfi1_irq_notifier_notify; 81362306a36Sopenharmony_ci notify->release = hfi1_irq_notifier_release; 81462306a36Sopenharmony_ci 81562306a36Sopenharmony_ci if (irq_set_affinity_notifier(notify->irq, notify)) 81662306a36Sopenharmony_ci pr_err("Failed to register sdma irq affinity notifier for irq %d\n", 81762306a36Sopenharmony_ci notify->irq); 81862306a36Sopenharmony_ci} 81962306a36Sopenharmony_ci 82062306a36Sopenharmony_cistatic void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) 82162306a36Sopenharmony_ci{ 82262306a36Sopenharmony_ci struct irq_affinity_notify *notify = &msix->notify; 82362306a36Sopenharmony_ci 82462306a36Sopenharmony_ci if (irq_set_affinity_notifier(notify->irq, NULL)) 82562306a36Sopenharmony_ci pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n", 82662306a36Sopenharmony_ci notify->irq); 82762306a36Sopenharmony_ci} 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci/* 83062306a36Sopenharmony_ci * Function sets the irq affinity for msix. 83162306a36Sopenharmony_ci * It *must* be called with node_affinity.lock held. 83262306a36Sopenharmony_ci */ 83362306a36Sopenharmony_cistatic int get_irq_affinity(struct hfi1_devdata *dd, 83462306a36Sopenharmony_ci struct hfi1_msix_entry *msix) 83562306a36Sopenharmony_ci{ 83662306a36Sopenharmony_ci cpumask_var_t diff; 83762306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 83862306a36Sopenharmony_ci struct cpu_mask_set *set = NULL; 83962306a36Sopenharmony_ci struct sdma_engine *sde = NULL; 84062306a36Sopenharmony_ci struct hfi1_ctxtdata *rcd = NULL; 84162306a36Sopenharmony_ci char extra[64]; 84262306a36Sopenharmony_ci int cpu = -1; 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci extra[0] = '\0'; 84562306a36Sopenharmony_ci cpumask_clear(&msix->mask); 84662306a36Sopenharmony_ci 84762306a36Sopenharmony_ci entry = node_affinity_lookup(dd->node); 84862306a36Sopenharmony_ci 84962306a36Sopenharmony_ci switch (msix->type) { 85062306a36Sopenharmony_ci case IRQ_SDMA: 85162306a36Sopenharmony_ci sde = (struct sdma_engine *)msix->arg; 85262306a36Sopenharmony_ci scnprintf(extra, 64, "engine %u", sde->this_idx); 85362306a36Sopenharmony_ci set = &entry->def_intr; 85462306a36Sopenharmony_ci break; 85562306a36Sopenharmony_ci case IRQ_GENERAL: 85662306a36Sopenharmony_ci cpu = cpumask_first(&entry->general_intr_mask); 85762306a36Sopenharmony_ci break; 85862306a36Sopenharmony_ci case IRQ_RCVCTXT: 85962306a36Sopenharmony_ci rcd = (struct hfi1_ctxtdata *)msix->arg; 86062306a36Sopenharmony_ci if (rcd->ctxt == HFI1_CTRL_CTXT) 86162306a36Sopenharmony_ci cpu = cpumask_first(&entry->general_intr_mask); 86262306a36Sopenharmony_ci else 86362306a36Sopenharmony_ci set = &entry->rcv_intr; 86462306a36Sopenharmony_ci scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 86562306a36Sopenharmony_ci break; 86662306a36Sopenharmony_ci case IRQ_NETDEVCTXT: 86762306a36Sopenharmony_ci rcd = (struct hfi1_ctxtdata *)msix->arg; 86862306a36Sopenharmony_ci set = &entry->def_intr; 86962306a36Sopenharmony_ci scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 87062306a36Sopenharmony_ci break; 87162306a36Sopenharmony_ci default: 87262306a36Sopenharmony_ci dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); 87362306a36Sopenharmony_ci return -EINVAL; 87462306a36Sopenharmony_ci } 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci /* 87762306a36Sopenharmony_ci * The general and control contexts are placed on a particular 87862306a36Sopenharmony_ci * CPU, which is set above. Skip accounting for it. Everything else 87962306a36Sopenharmony_ci * finds its CPU here. 88062306a36Sopenharmony_ci */ 88162306a36Sopenharmony_ci if (cpu == -1 && set) { 88262306a36Sopenharmony_ci if (!zalloc_cpumask_var(&diff, GFP_KERNEL)) 88362306a36Sopenharmony_ci return -ENOMEM; 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci cpu = cpu_mask_set_get_first(set, diff); 88662306a36Sopenharmony_ci if (cpu < 0) { 88762306a36Sopenharmony_ci free_cpumask_var(diff); 88862306a36Sopenharmony_ci dd_dev_err(dd, "Failure to obtain CPU for IRQ\n"); 88962306a36Sopenharmony_ci return cpu; 89062306a36Sopenharmony_ci } 89162306a36Sopenharmony_ci 89262306a36Sopenharmony_ci free_cpumask_var(diff); 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci 89562306a36Sopenharmony_ci cpumask_set_cpu(cpu, &msix->mask); 89662306a36Sopenharmony_ci dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n", 89762306a36Sopenharmony_ci msix->irq, irq_type_names[msix->type], 89862306a36Sopenharmony_ci extra, cpu); 89962306a36Sopenharmony_ci irq_set_affinity_hint(msix->irq, &msix->mask); 90062306a36Sopenharmony_ci 90162306a36Sopenharmony_ci if (msix->type == IRQ_SDMA) { 90262306a36Sopenharmony_ci sde->cpu = cpu; 90362306a36Sopenharmony_ci hfi1_setup_sdma_notifier(msix); 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci return 0; 90762306a36Sopenharmony_ci} 90862306a36Sopenharmony_ci 90962306a36Sopenharmony_ciint hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) 91062306a36Sopenharmony_ci{ 91162306a36Sopenharmony_ci int ret; 91262306a36Sopenharmony_ci 91362306a36Sopenharmony_ci mutex_lock(&node_affinity.lock); 91462306a36Sopenharmony_ci ret = get_irq_affinity(dd, msix); 91562306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 91662306a36Sopenharmony_ci return ret; 91762306a36Sopenharmony_ci} 91862306a36Sopenharmony_ci 91962306a36Sopenharmony_civoid hfi1_put_irq_affinity(struct hfi1_devdata *dd, 92062306a36Sopenharmony_ci struct hfi1_msix_entry *msix) 92162306a36Sopenharmony_ci{ 92262306a36Sopenharmony_ci struct cpu_mask_set *set = NULL; 92362306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 92462306a36Sopenharmony_ci 92562306a36Sopenharmony_ci mutex_lock(&node_affinity.lock); 92662306a36Sopenharmony_ci entry = node_affinity_lookup(dd->node); 92762306a36Sopenharmony_ci 92862306a36Sopenharmony_ci switch (msix->type) { 92962306a36Sopenharmony_ci case IRQ_SDMA: 93062306a36Sopenharmony_ci set = &entry->def_intr; 93162306a36Sopenharmony_ci hfi1_cleanup_sdma_notifier(msix); 93262306a36Sopenharmony_ci break; 93362306a36Sopenharmony_ci case IRQ_GENERAL: 93462306a36Sopenharmony_ci /* Don't do accounting for general contexts */ 93562306a36Sopenharmony_ci break; 93662306a36Sopenharmony_ci case IRQ_RCVCTXT: { 93762306a36Sopenharmony_ci struct hfi1_ctxtdata *rcd = msix->arg; 93862306a36Sopenharmony_ci 93962306a36Sopenharmony_ci /* Don't do accounting for control contexts */ 94062306a36Sopenharmony_ci if (rcd->ctxt != HFI1_CTRL_CTXT) 94162306a36Sopenharmony_ci set = &entry->rcv_intr; 94262306a36Sopenharmony_ci break; 94362306a36Sopenharmony_ci } 94462306a36Sopenharmony_ci case IRQ_NETDEVCTXT: 94562306a36Sopenharmony_ci set = &entry->def_intr; 94662306a36Sopenharmony_ci break; 94762306a36Sopenharmony_ci default: 94862306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 94962306a36Sopenharmony_ci return; 95062306a36Sopenharmony_ci } 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci if (set) { 95362306a36Sopenharmony_ci cpumask_andnot(&set->used, &set->used, &msix->mask); 95462306a36Sopenharmony_ci _cpu_mask_set_gen_dec(set); 95562306a36Sopenharmony_ci } 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci irq_set_affinity_hint(msix->irq, NULL); 95862306a36Sopenharmony_ci cpumask_clear(&msix->mask); 95962306a36Sopenharmony_ci mutex_unlock(&node_affinity.lock); 96062306a36Sopenharmony_ci} 96162306a36Sopenharmony_ci 96262306a36Sopenharmony_ci/* This should be called with node_affinity.lock held */ 96362306a36Sopenharmony_cistatic void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, 96462306a36Sopenharmony_ci struct hfi1_affinity_node_list *affinity) 96562306a36Sopenharmony_ci{ 96662306a36Sopenharmony_ci int possible, curr_cpu, i; 96762306a36Sopenharmony_ci uint num_cores_per_socket = node_affinity.num_online_cpus / 96862306a36Sopenharmony_ci affinity->num_core_siblings / 96962306a36Sopenharmony_ci node_affinity.num_online_nodes; 97062306a36Sopenharmony_ci 97162306a36Sopenharmony_ci cpumask_copy(hw_thread_mask, &affinity->proc.mask); 97262306a36Sopenharmony_ci if (affinity->num_core_siblings > 0) { 97362306a36Sopenharmony_ci /* Removing other siblings not needed for now */ 97462306a36Sopenharmony_ci possible = cpumask_weight(hw_thread_mask); 97562306a36Sopenharmony_ci curr_cpu = cpumask_first(hw_thread_mask); 97662306a36Sopenharmony_ci for (i = 0; 97762306a36Sopenharmony_ci i < num_cores_per_socket * node_affinity.num_online_nodes; 97862306a36Sopenharmony_ci i++) 97962306a36Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 98062306a36Sopenharmony_ci 98162306a36Sopenharmony_ci for (; i < possible; i++) { 98262306a36Sopenharmony_ci cpumask_clear_cpu(curr_cpu, hw_thread_mask); 98362306a36Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 98462306a36Sopenharmony_ci } 98562306a36Sopenharmony_ci 98662306a36Sopenharmony_ci /* Identifying correct HW threads within physical cores */ 98762306a36Sopenharmony_ci cpumask_shift_left(hw_thread_mask, hw_thread_mask, 98862306a36Sopenharmony_ci num_cores_per_socket * 98962306a36Sopenharmony_ci node_affinity.num_online_nodes * 99062306a36Sopenharmony_ci hw_thread_no); 99162306a36Sopenharmony_ci } 99262306a36Sopenharmony_ci} 99362306a36Sopenharmony_ci 99462306a36Sopenharmony_ciint hfi1_get_proc_affinity(int node) 99562306a36Sopenharmony_ci{ 99662306a36Sopenharmony_ci int cpu = -1, ret, i; 99762306a36Sopenharmony_ci struct hfi1_affinity_node *entry; 99862306a36Sopenharmony_ci cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; 99962306a36Sopenharmony_ci const struct cpumask *node_mask, 100062306a36Sopenharmony_ci *proc_mask = current->cpus_ptr; 100162306a36Sopenharmony_ci struct hfi1_affinity_node_list *affinity = &node_affinity; 100262306a36Sopenharmony_ci struct cpu_mask_set *set = &affinity->proc; 100362306a36Sopenharmony_ci 100462306a36Sopenharmony_ci /* 100562306a36Sopenharmony_ci * check whether process/context affinity has already 100662306a36Sopenharmony_ci * been set 100762306a36Sopenharmony_ci */ 100862306a36Sopenharmony_ci if (current->nr_cpus_allowed == 1) { 100962306a36Sopenharmony_ci hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", 101062306a36Sopenharmony_ci current->pid, current->comm, 101162306a36Sopenharmony_ci cpumask_pr_args(proc_mask)); 101262306a36Sopenharmony_ci /* 101362306a36Sopenharmony_ci * Mark the pre-set CPU as used. This is atomic so we don't 101462306a36Sopenharmony_ci * need the lock 101562306a36Sopenharmony_ci */ 101662306a36Sopenharmony_ci cpu = cpumask_first(proc_mask); 101762306a36Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 101862306a36Sopenharmony_ci goto done; 101962306a36Sopenharmony_ci } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { 102062306a36Sopenharmony_ci hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", 102162306a36Sopenharmony_ci current->pid, current->comm, 102262306a36Sopenharmony_ci cpumask_pr_args(proc_mask)); 102362306a36Sopenharmony_ci goto done; 102462306a36Sopenharmony_ci } 102562306a36Sopenharmony_ci 102662306a36Sopenharmony_ci /* 102762306a36Sopenharmony_ci * The process does not have a preset CPU affinity so find one to 102862306a36Sopenharmony_ci * recommend using the following algorithm: 102962306a36Sopenharmony_ci * 103062306a36Sopenharmony_ci * For each user process that is opening a context on HFI Y: 103162306a36Sopenharmony_ci * a) If all cores are filled, reinitialize the bitmask 103262306a36Sopenharmony_ci * b) Fill real cores first, then HT cores (First set of HT 103362306a36Sopenharmony_ci * cores on all physical cores, then second set of HT core, 103462306a36Sopenharmony_ci * and, so on) in the following order: 103562306a36Sopenharmony_ci * 103662306a36Sopenharmony_ci * 1. Same NUMA node as HFI Y and not running an IRQ 103762306a36Sopenharmony_ci * handler 103862306a36Sopenharmony_ci * 2. Same NUMA node as HFI Y and running an IRQ handler 103962306a36Sopenharmony_ci * 3. Different NUMA node to HFI Y and not running an IRQ 104062306a36Sopenharmony_ci * handler 104162306a36Sopenharmony_ci * 4. Different NUMA node to HFI Y and running an IRQ 104262306a36Sopenharmony_ci * handler 104362306a36Sopenharmony_ci * c) Mark core as filled in the bitmask. As user processes are 104462306a36Sopenharmony_ci * done, clear cores from the bitmask. 104562306a36Sopenharmony_ci */ 104662306a36Sopenharmony_ci 104762306a36Sopenharmony_ci ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 104862306a36Sopenharmony_ci if (!ret) 104962306a36Sopenharmony_ci goto done; 105062306a36Sopenharmony_ci ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); 105162306a36Sopenharmony_ci if (!ret) 105262306a36Sopenharmony_ci goto free_diff; 105362306a36Sopenharmony_ci ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); 105462306a36Sopenharmony_ci if (!ret) 105562306a36Sopenharmony_ci goto free_hw_thread_mask; 105662306a36Sopenharmony_ci ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); 105762306a36Sopenharmony_ci if (!ret) 105862306a36Sopenharmony_ci goto free_available_mask; 105962306a36Sopenharmony_ci 106062306a36Sopenharmony_ci mutex_lock(&affinity->lock); 106162306a36Sopenharmony_ci /* 106262306a36Sopenharmony_ci * If we've used all available HW threads, clear the mask and start 106362306a36Sopenharmony_ci * overloading. 106462306a36Sopenharmony_ci */ 106562306a36Sopenharmony_ci _cpu_mask_set_gen_inc(set); 106662306a36Sopenharmony_ci 106762306a36Sopenharmony_ci /* 106862306a36Sopenharmony_ci * If NUMA node has CPUs used by interrupt handlers, include them in the 106962306a36Sopenharmony_ci * interrupt handler mask. 107062306a36Sopenharmony_ci */ 107162306a36Sopenharmony_ci entry = node_affinity_lookup(node); 107262306a36Sopenharmony_ci if (entry) { 107362306a36Sopenharmony_ci cpumask_copy(intrs_mask, (entry->def_intr.gen ? 107462306a36Sopenharmony_ci &entry->def_intr.mask : 107562306a36Sopenharmony_ci &entry->def_intr.used)); 107662306a36Sopenharmony_ci cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? 107762306a36Sopenharmony_ci &entry->rcv_intr.mask : 107862306a36Sopenharmony_ci &entry->rcv_intr.used)); 107962306a36Sopenharmony_ci cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); 108062306a36Sopenharmony_ci } 108162306a36Sopenharmony_ci hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", 108262306a36Sopenharmony_ci cpumask_pr_args(intrs_mask)); 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci cpumask_copy(hw_thread_mask, &set->mask); 108562306a36Sopenharmony_ci 108662306a36Sopenharmony_ci /* 108762306a36Sopenharmony_ci * If HT cores are enabled, identify which HW threads within the 108862306a36Sopenharmony_ci * physical cores should be used. 108962306a36Sopenharmony_ci */ 109062306a36Sopenharmony_ci if (affinity->num_core_siblings > 0) { 109162306a36Sopenharmony_ci for (i = 0; i < affinity->num_core_siblings; i++) { 109262306a36Sopenharmony_ci find_hw_thread_mask(i, hw_thread_mask, affinity); 109362306a36Sopenharmony_ci 109462306a36Sopenharmony_ci /* 109562306a36Sopenharmony_ci * If there's at least one available core for this HW 109662306a36Sopenharmony_ci * thread number, stop looking for a core. 109762306a36Sopenharmony_ci * 109862306a36Sopenharmony_ci * diff will always be not empty at least once in this 109962306a36Sopenharmony_ci * loop as the used mask gets reset when 110062306a36Sopenharmony_ci * (set->mask == set->used) before this loop. 110162306a36Sopenharmony_ci */ 110262306a36Sopenharmony_ci cpumask_andnot(diff, hw_thread_mask, &set->used); 110362306a36Sopenharmony_ci if (!cpumask_empty(diff)) 110462306a36Sopenharmony_ci break; 110562306a36Sopenharmony_ci } 110662306a36Sopenharmony_ci } 110762306a36Sopenharmony_ci hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", 110862306a36Sopenharmony_ci cpumask_pr_args(hw_thread_mask)); 110962306a36Sopenharmony_ci 111062306a36Sopenharmony_ci node_mask = cpumask_of_node(node); 111162306a36Sopenharmony_ci hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, 111262306a36Sopenharmony_ci cpumask_pr_args(node_mask)); 111362306a36Sopenharmony_ci 111462306a36Sopenharmony_ci /* Get cpumask of available CPUs on preferred NUMA */ 111562306a36Sopenharmony_ci cpumask_and(available_mask, hw_thread_mask, node_mask); 111662306a36Sopenharmony_ci cpumask_andnot(available_mask, available_mask, &set->used); 111762306a36Sopenharmony_ci hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, 111862306a36Sopenharmony_ci cpumask_pr_args(available_mask)); 111962306a36Sopenharmony_ci 112062306a36Sopenharmony_ci /* 112162306a36Sopenharmony_ci * At first, we don't want to place processes on the same 112262306a36Sopenharmony_ci * CPUs as interrupt handlers. Then, CPUs running interrupt 112362306a36Sopenharmony_ci * handlers are used. 112462306a36Sopenharmony_ci * 112562306a36Sopenharmony_ci * 1) If diff is not empty, then there are CPUs not running 112662306a36Sopenharmony_ci * non-interrupt handlers available, so diff gets copied 112762306a36Sopenharmony_ci * over to available_mask. 112862306a36Sopenharmony_ci * 2) If diff is empty, then all CPUs not running interrupt 112962306a36Sopenharmony_ci * handlers are taken, so available_mask contains all 113062306a36Sopenharmony_ci * available CPUs running interrupt handlers. 113162306a36Sopenharmony_ci * 3) If available_mask is empty, then all CPUs on the 113262306a36Sopenharmony_ci * preferred NUMA node are taken, so other NUMA nodes are 113362306a36Sopenharmony_ci * used for process assignments using the same method as 113462306a36Sopenharmony_ci * the preferred NUMA node. 113562306a36Sopenharmony_ci */ 113662306a36Sopenharmony_ci cpumask_andnot(diff, available_mask, intrs_mask); 113762306a36Sopenharmony_ci if (!cpumask_empty(diff)) 113862306a36Sopenharmony_ci cpumask_copy(available_mask, diff); 113962306a36Sopenharmony_ci 114062306a36Sopenharmony_ci /* If we don't have CPUs on the preferred node, use other NUMA nodes */ 114162306a36Sopenharmony_ci if (cpumask_empty(available_mask)) { 114262306a36Sopenharmony_ci cpumask_andnot(available_mask, hw_thread_mask, &set->used); 114362306a36Sopenharmony_ci /* Excluding preferred NUMA cores */ 114462306a36Sopenharmony_ci cpumask_andnot(available_mask, available_mask, node_mask); 114562306a36Sopenharmony_ci hfi1_cdbg(PROC, 114662306a36Sopenharmony_ci "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", 114762306a36Sopenharmony_ci cpumask_pr_args(available_mask)); 114862306a36Sopenharmony_ci 114962306a36Sopenharmony_ci /* 115062306a36Sopenharmony_ci * At first, we don't want to place processes on the same 115162306a36Sopenharmony_ci * CPUs as interrupt handlers. 115262306a36Sopenharmony_ci */ 115362306a36Sopenharmony_ci cpumask_andnot(diff, available_mask, intrs_mask); 115462306a36Sopenharmony_ci if (!cpumask_empty(diff)) 115562306a36Sopenharmony_ci cpumask_copy(available_mask, diff); 115662306a36Sopenharmony_ci } 115762306a36Sopenharmony_ci hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", 115862306a36Sopenharmony_ci cpumask_pr_args(available_mask)); 115962306a36Sopenharmony_ci 116062306a36Sopenharmony_ci cpu = cpumask_first(available_mask); 116162306a36Sopenharmony_ci if (cpu >= nr_cpu_ids) /* empty */ 116262306a36Sopenharmony_ci cpu = -1; 116362306a36Sopenharmony_ci else 116462306a36Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 116562306a36Sopenharmony_ci 116662306a36Sopenharmony_ci mutex_unlock(&affinity->lock); 116762306a36Sopenharmony_ci hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); 116862306a36Sopenharmony_ci 116962306a36Sopenharmony_ci free_cpumask_var(intrs_mask); 117062306a36Sopenharmony_cifree_available_mask: 117162306a36Sopenharmony_ci free_cpumask_var(available_mask); 117262306a36Sopenharmony_cifree_hw_thread_mask: 117362306a36Sopenharmony_ci free_cpumask_var(hw_thread_mask); 117462306a36Sopenharmony_cifree_diff: 117562306a36Sopenharmony_ci free_cpumask_var(diff); 117662306a36Sopenharmony_cidone: 117762306a36Sopenharmony_ci return cpu; 117862306a36Sopenharmony_ci} 117962306a36Sopenharmony_ci 118062306a36Sopenharmony_civoid hfi1_put_proc_affinity(int cpu) 118162306a36Sopenharmony_ci{ 118262306a36Sopenharmony_ci struct hfi1_affinity_node_list *affinity = &node_affinity; 118362306a36Sopenharmony_ci struct cpu_mask_set *set = &affinity->proc; 118462306a36Sopenharmony_ci 118562306a36Sopenharmony_ci if (cpu < 0) 118662306a36Sopenharmony_ci return; 118762306a36Sopenharmony_ci 118862306a36Sopenharmony_ci mutex_lock(&affinity->lock); 118962306a36Sopenharmony_ci cpu_mask_set_put(set, cpu); 119062306a36Sopenharmony_ci hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); 119162306a36Sopenharmony_ci mutex_unlock(&affinity->lock); 119262306a36Sopenharmony_ci} 1193