18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright(c) 2015 - 2020 Intel Corporation. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license. When using or 58c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 108c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as 118c2ecf20Sopenharmony_ci * published by the Free Software Foundation. 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 148c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 158c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 168c2ecf20Sopenharmony_ci * General Public License for more details. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * BSD LICENSE 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 218c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions 228c2ecf20Sopenharmony_ci * are met: 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above copyright 258c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 268c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above copyright 278c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer in 288c2ecf20Sopenharmony_ci * the documentation and/or other materials provided with the 298c2ecf20Sopenharmony_ci * distribution. 308c2ecf20Sopenharmony_ci * - Neither the name of Intel Corporation nor the names of its 318c2ecf20Sopenharmony_ci * contributors may be used to endorse or promote products derived 328c2ecf20Sopenharmony_ci * from this software without specific prior written permission. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 358c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 368c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 378c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 388c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 398c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 408c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 418c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 428c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 438c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 448c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 458c2ecf20Sopenharmony_ci * 468c2ecf20Sopenharmony_ci */ 478c2ecf20Sopenharmony_ci#include <linux/topology.h> 488c2ecf20Sopenharmony_ci#include <linux/cpumask.h> 498c2ecf20Sopenharmony_ci#include <linux/module.h> 508c2ecf20Sopenharmony_ci#include <linux/interrupt.h> 518c2ecf20Sopenharmony_ci#include <linux/numa.h> 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_ci#include "hfi.h" 548c2ecf20Sopenharmony_ci#include "affinity.h" 558c2ecf20Sopenharmony_ci#include "sdma.h" 568c2ecf20Sopenharmony_ci#include "trace.h" 578c2ecf20Sopenharmony_ci 588c2ecf20Sopenharmony_cistruct hfi1_affinity_node_list node_affinity = { 598c2ecf20Sopenharmony_ci .list = LIST_HEAD_INIT(node_affinity.list), 608c2ecf20Sopenharmony_ci .lock = __MUTEX_INITIALIZER(node_affinity.lock) 618c2ecf20Sopenharmony_ci}; 628c2ecf20Sopenharmony_ci 638c2ecf20Sopenharmony_ci/* Name of IRQ types, indexed by enum irq_type */ 648c2ecf20Sopenharmony_cistatic const char * const irq_type_names[] = { 658c2ecf20Sopenharmony_ci "SDMA", 668c2ecf20Sopenharmony_ci "RCVCTXT", 678c2ecf20Sopenharmony_ci "NETDEVCTXT", 688c2ecf20Sopenharmony_ci "GENERAL", 698c2ecf20Sopenharmony_ci "OTHER", 708c2ecf20Sopenharmony_ci}; 718c2ecf20Sopenharmony_ci 728c2ecf20Sopenharmony_ci/* Per NUMA node count of HFI devices */ 738c2ecf20Sopenharmony_cistatic unsigned int *hfi1_per_node_cntr; 748c2ecf20Sopenharmony_ci 758c2ecf20Sopenharmony_cistatic inline void init_cpu_mask_set(struct cpu_mask_set *set) 768c2ecf20Sopenharmony_ci{ 778c2ecf20Sopenharmony_ci cpumask_clear(&set->mask); 788c2ecf20Sopenharmony_ci cpumask_clear(&set->used); 798c2ecf20Sopenharmony_ci set->gen = 0; 808c2ecf20Sopenharmony_ci} 818c2ecf20Sopenharmony_ci 828c2ecf20Sopenharmony_ci/* Increment generation of CPU set if needed */ 838c2ecf20Sopenharmony_cistatic void _cpu_mask_set_gen_inc(struct cpu_mask_set *set) 848c2ecf20Sopenharmony_ci{ 858c2ecf20Sopenharmony_ci if (cpumask_equal(&set->mask, &set->used)) { 868c2ecf20Sopenharmony_ci /* 878c2ecf20Sopenharmony_ci * We've used up all the CPUs, bump up the generation 888c2ecf20Sopenharmony_ci * and reset the 'used' map 898c2ecf20Sopenharmony_ci */ 908c2ecf20Sopenharmony_ci set->gen++; 918c2ecf20Sopenharmony_ci cpumask_clear(&set->used); 928c2ecf20Sopenharmony_ci } 938c2ecf20Sopenharmony_ci} 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic void _cpu_mask_set_gen_dec(struct cpu_mask_set *set) 968c2ecf20Sopenharmony_ci{ 978c2ecf20Sopenharmony_ci if (cpumask_empty(&set->used) && set->gen) { 988c2ecf20Sopenharmony_ci set->gen--; 998c2ecf20Sopenharmony_ci cpumask_copy(&set->used, &set->mask); 1008c2ecf20Sopenharmony_ci } 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci/* Get the first CPU from the list of unused CPUs in a CPU set data structure */ 1048c2ecf20Sopenharmony_cistatic int cpu_mask_set_get_first(struct cpu_mask_set *set, cpumask_var_t diff) 1058c2ecf20Sopenharmony_ci{ 1068c2ecf20Sopenharmony_ci int cpu; 1078c2ecf20Sopenharmony_ci 1088c2ecf20Sopenharmony_ci if (!diff || !set) 1098c2ecf20Sopenharmony_ci return -EINVAL; 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci _cpu_mask_set_gen_inc(set); 1128c2ecf20Sopenharmony_ci 1138c2ecf20Sopenharmony_ci /* Find out CPUs left in CPU mask */ 1148c2ecf20Sopenharmony_ci cpumask_andnot(diff, &set->mask, &set->used); 1158c2ecf20Sopenharmony_ci 1168c2ecf20Sopenharmony_ci cpu = cpumask_first(diff); 1178c2ecf20Sopenharmony_ci if (cpu >= nr_cpu_ids) /* empty */ 1188c2ecf20Sopenharmony_ci cpu = -EINVAL; 1198c2ecf20Sopenharmony_ci else 1208c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 1218c2ecf20Sopenharmony_ci 1228c2ecf20Sopenharmony_ci return cpu; 1238c2ecf20Sopenharmony_ci} 1248c2ecf20Sopenharmony_ci 1258c2ecf20Sopenharmony_cistatic void cpu_mask_set_put(struct cpu_mask_set *set, int cpu) 1268c2ecf20Sopenharmony_ci{ 1278c2ecf20Sopenharmony_ci if (!set) 1288c2ecf20Sopenharmony_ci return; 1298c2ecf20Sopenharmony_ci 1308c2ecf20Sopenharmony_ci cpumask_clear_cpu(cpu, &set->used); 1318c2ecf20Sopenharmony_ci _cpu_mask_set_gen_dec(set); 1328c2ecf20Sopenharmony_ci} 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci/* Initialize non-HT cpu cores mask */ 1358c2ecf20Sopenharmony_civoid init_real_cpu_mask(void) 1368c2ecf20Sopenharmony_ci{ 1378c2ecf20Sopenharmony_ci int possible, curr_cpu, i, ht; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci cpumask_clear(&node_affinity.real_cpu_mask); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci /* Start with cpu online mask as the real cpu mask */ 1428c2ecf20Sopenharmony_ci cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask); 1438c2ecf20Sopenharmony_ci 1448c2ecf20Sopenharmony_ci /* 1458c2ecf20Sopenharmony_ci * Remove HT cores from the real cpu mask. Do this in two steps below. 1468c2ecf20Sopenharmony_ci */ 1478c2ecf20Sopenharmony_ci possible = cpumask_weight(&node_affinity.real_cpu_mask); 1488c2ecf20Sopenharmony_ci ht = cpumask_weight(topology_sibling_cpumask( 1498c2ecf20Sopenharmony_ci cpumask_first(&node_affinity.real_cpu_mask))); 1508c2ecf20Sopenharmony_ci /* 1518c2ecf20Sopenharmony_ci * Step 1. Skip over the first N HT siblings and use them as the 1528c2ecf20Sopenharmony_ci * "real" cores. Assumes that HT cores are not enumerated in 1538c2ecf20Sopenharmony_ci * succession (except in the single core case). 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_ci curr_cpu = cpumask_first(&node_affinity.real_cpu_mask); 1568c2ecf20Sopenharmony_ci for (i = 0; i < possible / ht; i++) 1578c2ecf20Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 1588c2ecf20Sopenharmony_ci /* 1598c2ecf20Sopenharmony_ci * Step 2. Remove the remaining HT siblings. Use cpumask_next() to 1608c2ecf20Sopenharmony_ci * skip any gaps. 1618c2ecf20Sopenharmony_ci */ 1628c2ecf20Sopenharmony_ci for (; i < possible; i++) { 1638c2ecf20Sopenharmony_ci cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask); 1648c2ecf20Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask); 1658c2ecf20Sopenharmony_ci } 1668c2ecf20Sopenharmony_ci} 1678c2ecf20Sopenharmony_ci 1688c2ecf20Sopenharmony_ciint node_affinity_init(void) 1698c2ecf20Sopenharmony_ci{ 1708c2ecf20Sopenharmony_ci int node; 1718c2ecf20Sopenharmony_ci struct pci_dev *dev = NULL; 1728c2ecf20Sopenharmony_ci const struct pci_device_id *ids = hfi1_pci_tbl; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_ci cpumask_clear(&node_affinity.proc.used); 1758c2ecf20Sopenharmony_ci cpumask_copy(&node_affinity.proc.mask, cpu_online_mask); 1768c2ecf20Sopenharmony_ci 1778c2ecf20Sopenharmony_ci node_affinity.proc.gen = 0; 1788c2ecf20Sopenharmony_ci node_affinity.num_core_siblings = 1798c2ecf20Sopenharmony_ci cpumask_weight(topology_sibling_cpumask( 1808c2ecf20Sopenharmony_ci cpumask_first(&node_affinity.proc.mask) 1818c2ecf20Sopenharmony_ci )); 1828c2ecf20Sopenharmony_ci node_affinity.num_possible_nodes = num_possible_nodes(); 1838c2ecf20Sopenharmony_ci node_affinity.num_online_nodes = num_online_nodes(); 1848c2ecf20Sopenharmony_ci node_affinity.num_online_cpus = num_online_cpus(); 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci /* 1878c2ecf20Sopenharmony_ci * The real cpu mask is part of the affinity struct but it has to be 1888c2ecf20Sopenharmony_ci * initialized early. It is needed to calculate the number of user 1898c2ecf20Sopenharmony_ci * contexts in set_up_context_variables(). 1908c2ecf20Sopenharmony_ci */ 1918c2ecf20Sopenharmony_ci init_real_cpu_mask(); 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci hfi1_per_node_cntr = kcalloc(node_affinity.num_possible_nodes, 1948c2ecf20Sopenharmony_ci sizeof(*hfi1_per_node_cntr), GFP_KERNEL); 1958c2ecf20Sopenharmony_ci if (!hfi1_per_node_cntr) 1968c2ecf20Sopenharmony_ci return -ENOMEM; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci while (ids->vendor) { 1998c2ecf20Sopenharmony_ci dev = NULL; 2008c2ecf20Sopenharmony_ci while ((dev = pci_get_device(ids->vendor, ids->device, dev))) { 2018c2ecf20Sopenharmony_ci node = pcibus_to_node(dev->bus); 2028c2ecf20Sopenharmony_ci if (node < 0) 2038c2ecf20Sopenharmony_ci goto out; 2048c2ecf20Sopenharmony_ci 2058c2ecf20Sopenharmony_ci hfi1_per_node_cntr[node]++; 2068c2ecf20Sopenharmony_ci } 2078c2ecf20Sopenharmony_ci ids++; 2088c2ecf20Sopenharmony_ci } 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci return 0; 2118c2ecf20Sopenharmony_ci 2128c2ecf20Sopenharmony_ciout: 2138c2ecf20Sopenharmony_ci /* 2148c2ecf20Sopenharmony_ci * Invalid PCI NUMA node information found, note it, and populate 2158c2ecf20Sopenharmony_ci * our database 1:1. 2168c2ecf20Sopenharmony_ci */ 2178c2ecf20Sopenharmony_ci pr_err("HFI: Invalid PCI NUMA node. Performance may be affected\n"); 2188c2ecf20Sopenharmony_ci pr_err("HFI: System BIOS may need to be upgraded\n"); 2198c2ecf20Sopenharmony_ci for (node = 0; node < node_affinity.num_possible_nodes; node++) 2208c2ecf20Sopenharmony_ci hfi1_per_node_cntr[node] = 1; 2218c2ecf20Sopenharmony_ci 2228c2ecf20Sopenharmony_ci pci_dev_put(dev); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci return 0; 2258c2ecf20Sopenharmony_ci} 2268c2ecf20Sopenharmony_ci 2278c2ecf20Sopenharmony_cistatic void node_affinity_destroy(struct hfi1_affinity_node *entry) 2288c2ecf20Sopenharmony_ci{ 2298c2ecf20Sopenharmony_ci free_percpu(entry->comp_vect_affinity); 2308c2ecf20Sopenharmony_ci kfree(entry); 2318c2ecf20Sopenharmony_ci} 2328c2ecf20Sopenharmony_ci 2338c2ecf20Sopenharmony_civoid node_affinity_destroy_all(void) 2348c2ecf20Sopenharmony_ci{ 2358c2ecf20Sopenharmony_ci struct list_head *pos, *q; 2368c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 2378c2ecf20Sopenharmony_ci 2388c2ecf20Sopenharmony_ci mutex_lock(&node_affinity.lock); 2398c2ecf20Sopenharmony_ci list_for_each_safe(pos, q, &node_affinity.list) { 2408c2ecf20Sopenharmony_ci entry = list_entry(pos, struct hfi1_affinity_node, 2418c2ecf20Sopenharmony_ci list); 2428c2ecf20Sopenharmony_ci list_del(pos); 2438c2ecf20Sopenharmony_ci node_affinity_destroy(entry); 2448c2ecf20Sopenharmony_ci } 2458c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 2468c2ecf20Sopenharmony_ci kfree(hfi1_per_node_cntr); 2478c2ecf20Sopenharmony_ci} 2488c2ecf20Sopenharmony_ci 2498c2ecf20Sopenharmony_cistatic struct hfi1_affinity_node *node_affinity_allocate(int node) 2508c2ecf20Sopenharmony_ci{ 2518c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 2528c2ecf20Sopenharmony_ci 2538c2ecf20Sopenharmony_ci entry = kzalloc(sizeof(*entry), GFP_KERNEL); 2548c2ecf20Sopenharmony_ci if (!entry) 2558c2ecf20Sopenharmony_ci return NULL; 2568c2ecf20Sopenharmony_ci entry->node = node; 2578c2ecf20Sopenharmony_ci entry->comp_vect_affinity = alloc_percpu(u16); 2588c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&entry->list); 2598c2ecf20Sopenharmony_ci 2608c2ecf20Sopenharmony_ci return entry; 2618c2ecf20Sopenharmony_ci} 2628c2ecf20Sopenharmony_ci 2638c2ecf20Sopenharmony_ci/* 2648c2ecf20Sopenharmony_ci * It appends an entry to the list. 2658c2ecf20Sopenharmony_ci * It *must* be called with node_affinity.lock held. 2668c2ecf20Sopenharmony_ci */ 2678c2ecf20Sopenharmony_cistatic void node_affinity_add_tail(struct hfi1_affinity_node *entry) 2688c2ecf20Sopenharmony_ci{ 2698c2ecf20Sopenharmony_ci list_add_tail(&entry->list, &node_affinity.list); 2708c2ecf20Sopenharmony_ci} 2718c2ecf20Sopenharmony_ci 2728c2ecf20Sopenharmony_ci/* It must be called with node_affinity.lock held */ 2738c2ecf20Sopenharmony_cistatic struct hfi1_affinity_node *node_affinity_lookup(int node) 2748c2ecf20Sopenharmony_ci{ 2758c2ecf20Sopenharmony_ci struct list_head *pos; 2768c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 2778c2ecf20Sopenharmony_ci 2788c2ecf20Sopenharmony_ci list_for_each(pos, &node_affinity.list) { 2798c2ecf20Sopenharmony_ci entry = list_entry(pos, struct hfi1_affinity_node, list); 2808c2ecf20Sopenharmony_ci if (entry->node == node) 2818c2ecf20Sopenharmony_ci return entry; 2828c2ecf20Sopenharmony_ci } 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci return NULL; 2858c2ecf20Sopenharmony_ci} 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_cistatic int per_cpu_affinity_get(cpumask_var_t possible_cpumask, 2888c2ecf20Sopenharmony_ci u16 __percpu *comp_vect_affinity) 2898c2ecf20Sopenharmony_ci{ 2908c2ecf20Sopenharmony_ci int curr_cpu; 2918c2ecf20Sopenharmony_ci u16 cntr; 2928c2ecf20Sopenharmony_ci u16 prev_cntr; 2938c2ecf20Sopenharmony_ci int ret_cpu; 2948c2ecf20Sopenharmony_ci 2958c2ecf20Sopenharmony_ci if (!possible_cpumask) { 2968c2ecf20Sopenharmony_ci ret_cpu = -EINVAL; 2978c2ecf20Sopenharmony_ci goto fail; 2988c2ecf20Sopenharmony_ci } 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci if (!comp_vect_affinity) { 3018c2ecf20Sopenharmony_ci ret_cpu = -EINVAL; 3028c2ecf20Sopenharmony_ci goto fail; 3038c2ecf20Sopenharmony_ci } 3048c2ecf20Sopenharmony_ci 3058c2ecf20Sopenharmony_ci ret_cpu = cpumask_first(possible_cpumask); 3068c2ecf20Sopenharmony_ci if (ret_cpu >= nr_cpu_ids) { 3078c2ecf20Sopenharmony_ci ret_cpu = -EINVAL; 3088c2ecf20Sopenharmony_ci goto fail; 3098c2ecf20Sopenharmony_ci } 3108c2ecf20Sopenharmony_ci 3118c2ecf20Sopenharmony_ci prev_cntr = *per_cpu_ptr(comp_vect_affinity, ret_cpu); 3128c2ecf20Sopenharmony_ci for_each_cpu(curr_cpu, possible_cpumask) { 3138c2ecf20Sopenharmony_ci cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); 3148c2ecf20Sopenharmony_ci 3158c2ecf20Sopenharmony_ci if (cntr < prev_cntr) { 3168c2ecf20Sopenharmony_ci ret_cpu = curr_cpu; 3178c2ecf20Sopenharmony_ci prev_cntr = cntr; 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci *per_cpu_ptr(comp_vect_affinity, ret_cpu) += 1; 3228c2ecf20Sopenharmony_ci 3238c2ecf20Sopenharmony_cifail: 3248c2ecf20Sopenharmony_ci return ret_cpu; 3258c2ecf20Sopenharmony_ci} 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_cistatic int per_cpu_affinity_put_max(cpumask_var_t possible_cpumask, 3288c2ecf20Sopenharmony_ci u16 __percpu *comp_vect_affinity) 3298c2ecf20Sopenharmony_ci{ 3308c2ecf20Sopenharmony_ci int curr_cpu; 3318c2ecf20Sopenharmony_ci int max_cpu; 3328c2ecf20Sopenharmony_ci u16 cntr; 3338c2ecf20Sopenharmony_ci u16 prev_cntr; 3348c2ecf20Sopenharmony_ci 3358c2ecf20Sopenharmony_ci if (!possible_cpumask) 3368c2ecf20Sopenharmony_ci return -EINVAL; 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci if (!comp_vect_affinity) 3398c2ecf20Sopenharmony_ci return -EINVAL; 3408c2ecf20Sopenharmony_ci 3418c2ecf20Sopenharmony_ci max_cpu = cpumask_first(possible_cpumask); 3428c2ecf20Sopenharmony_ci if (max_cpu >= nr_cpu_ids) 3438c2ecf20Sopenharmony_ci return -EINVAL; 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci prev_cntr = *per_cpu_ptr(comp_vect_affinity, max_cpu); 3468c2ecf20Sopenharmony_ci for_each_cpu(curr_cpu, possible_cpumask) { 3478c2ecf20Sopenharmony_ci cntr = *per_cpu_ptr(comp_vect_affinity, curr_cpu); 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci if (cntr > prev_cntr) { 3508c2ecf20Sopenharmony_ci max_cpu = curr_cpu; 3518c2ecf20Sopenharmony_ci prev_cntr = cntr; 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci } 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci *per_cpu_ptr(comp_vect_affinity, max_cpu) -= 1; 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci return max_cpu; 3588c2ecf20Sopenharmony_ci} 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_ci/* 3618c2ecf20Sopenharmony_ci * Non-interrupt CPUs are used first, then interrupt CPUs. 3628c2ecf20Sopenharmony_ci * Two already allocated cpu masks must be passed. 3638c2ecf20Sopenharmony_ci */ 3648c2ecf20Sopenharmony_cistatic int _dev_comp_vect_cpu_get(struct hfi1_devdata *dd, 3658c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry, 3668c2ecf20Sopenharmony_ci cpumask_var_t non_intr_cpus, 3678c2ecf20Sopenharmony_ci cpumask_var_t available_cpus) 3688c2ecf20Sopenharmony_ci __must_hold(&node_affinity.lock) 3698c2ecf20Sopenharmony_ci{ 3708c2ecf20Sopenharmony_ci int cpu; 3718c2ecf20Sopenharmony_ci struct cpu_mask_set *set = dd->comp_vect; 3728c2ecf20Sopenharmony_ci 3738c2ecf20Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 3748c2ecf20Sopenharmony_ci if (!non_intr_cpus) { 3758c2ecf20Sopenharmony_ci cpu = -1; 3768c2ecf20Sopenharmony_ci goto fail; 3778c2ecf20Sopenharmony_ci } 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci if (!available_cpus) { 3808c2ecf20Sopenharmony_ci cpu = -1; 3818c2ecf20Sopenharmony_ci goto fail; 3828c2ecf20Sopenharmony_ci } 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci /* Available CPUs for pinning completion vectors */ 3858c2ecf20Sopenharmony_ci _cpu_mask_set_gen_inc(set); 3868c2ecf20Sopenharmony_ci cpumask_andnot(available_cpus, &set->mask, &set->used); 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci /* Available CPUs without SDMA engine interrupts */ 3898c2ecf20Sopenharmony_ci cpumask_andnot(non_intr_cpus, available_cpus, 3908c2ecf20Sopenharmony_ci &entry->def_intr.used); 3918c2ecf20Sopenharmony_ci 3928c2ecf20Sopenharmony_ci /* If there are non-interrupt CPUs available, use them first */ 3938c2ecf20Sopenharmony_ci if (!cpumask_empty(non_intr_cpus)) 3948c2ecf20Sopenharmony_ci cpu = cpumask_first(non_intr_cpus); 3958c2ecf20Sopenharmony_ci else /* Otherwise, use interrupt CPUs */ 3968c2ecf20Sopenharmony_ci cpu = cpumask_first(available_cpus); 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci if (cpu >= nr_cpu_ids) { /* empty */ 3998c2ecf20Sopenharmony_ci cpu = -1; 4008c2ecf20Sopenharmony_ci goto fail; 4018c2ecf20Sopenharmony_ci } 4028c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_cifail: 4058c2ecf20Sopenharmony_ci return cpu; 4068c2ecf20Sopenharmony_ci} 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_cistatic void _dev_comp_vect_cpu_put(struct hfi1_devdata *dd, int cpu) 4098c2ecf20Sopenharmony_ci{ 4108c2ecf20Sopenharmony_ci struct cpu_mask_set *set = dd->comp_vect; 4118c2ecf20Sopenharmony_ci 4128c2ecf20Sopenharmony_ci if (cpu < 0) 4138c2ecf20Sopenharmony_ci return; 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci cpu_mask_set_put(set, cpu); 4168c2ecf20Sopenharmony_ci} 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_ci/* _dev_comp_vect_mappings_destroy() is reentrant */ 4198c2ecf20Sopenharmony_cistatic void _dev_comp_vect_mappings_destroy(struct hfi1_devdata *dd) 4208c2ecf20Sopenharmony_ci{ 4218c2ecf20Sopenharmony_ci int i, cpu; 4228c2ecf20Sopenharmony_ci 4238c2ecf20Sopenharmony_ci if (!dd->comp_vect_mappings) 4248c2ecf20Sopenharmony_ci return; 4258c2ecf20Sopenharmony_ci 4268c2ecf20Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 4278c2ecf20Sopenharmony_ci cpu = dd->comp_vect_mappings[i]; 4288c2ecf20Sopenharmony_ci _dev_comp_vect_cpu_put(dd, cpu); 4298c2ecf20Sopenharmony_ci dd->comp_vect_mappings[i] = -1; 4308c2ecf20Sopenharmony_ci hfi1_cdbg(AFFINITY, 4318c2ecf20Sopenharmony_ci "[%s] Release CPU %d from completion vector %d", 4328c2ecf20Sopenharmony_ci rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), cpu, i); 4338c2ecf20Sopenharmony_ci } 4348c2ecf20Sopenharmony_ci 4358c2ecf20Sopenharmony_ci kfree(dd->comp_vect_mappings); 4368c2ecf20Sopenharmony_ci dd->comp_vect_mappings = NULL; 4378c2ecf20Sopenharmony_ci} 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci/* 4408c2ecf20Sopenharmony_ci * This function creates the table for looking up CPUs for completion vectors. 4418c2ecf20Sopenharmony_ci * num_comp_vectors needs to have been initilized before calling this function. 4428c2ecf20Sopenharmony_ci */ 4438c2ecf20Sopenharmony_cistatic int _dev_comp_vect_mappings_create(struct hfi1_devdata *dd, 4448c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry) 4458c2ecf20Sopenharmony_ci __must_hold(&node_affinity.lock) 4468c2ecf20Sopenharmony_ci{ 4478c2ecf20Sopenharmony_ci int i, cpu, ret; 4488c2ecf20Sopenharmony_ci cpumask_var_t non_intr_cpus; 4498c2ecf20Sopenharmony_ci cpumask_var_t available_cpus; 4508c2ecf20Sopenharmony_ci 4518c2ecf20Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci if (!zalloc_cpumask_var(&non_intr_cpus, GFP_KERNEL)) 4548c2ecf20Sopenharmony_ci return -ENOMEM; 4558c2ecf20Sopenharmony_ci 4568c2ecf20Sopenharmony_ci if (!zalloc_cpumask_var(&available_cpus, GFP_KERNEL)) { 4578c2ecf20Sopenharmony_ci free_cpumask_var(non_intr_cpus); 4588c2ecf20Sopenharmony_ci return -ENOMEM; 4598c2ecf20Sopenharmony_ci } 4608c2ecf20Sopenharmony_ci 4618c2ecf20Sopenharmony_ci dd->comp_vect_mappings = kcalloc(dd->comp_vect_possible_cpus, 4628c2ecf20Sopenharmony_ci sizeof(*dd->comp_vect_mappings), 4638c2ecf20Sopenharmony_ci GFP_KERNEL); 4648c2ecf20Sopenharmony_ci if (!dd->comp_vect_mappings) { 4658c2ecf20Sopenharmony_ci ret = -ENOMEM; 4668c2ecf20Sopenharmony_ci goto fail; 4678c2ecf20Sopenharmony_ci } 4688c2ecf20Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) 4698c2ecf20Sopenharmony_ci dd->comp_vect_mappings[i] = -1; 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 4728c2ecf20Sopenharmony_ci cpu = _dev_comp_vect_cpu_get(dd, entry, non_intr_cpus, 4738c2ecf20Sopenharmony_ci available_cpus); 4748c2ecf20Sopenharmony_ci if (cpu < 0) { 4758c2ecf20Sopenharmony_ci ret = -EINVAL; 4768c2ecf20Sopenharmony_ci goto fail; 4778c2ecf20Sopenharmony_ci } 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci dd->comp_vect_mappings[i] = cpu; 4808c2ecf20Sopenharmony_ci hfi1_cdbg(AFFINITY, 4818c2ecf20Sopenharmony_ci "[%s] Completion Vector %d -> CPU %d", 4828c2ecf20Sopenharmony_ci rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), i, cpu); 4838c2ecf20Sopenharmony_ci } 4848c2ecf20Sopenharmony_ci 4858c2ecf20Sopenharmony_ci free_cpumask_var(available_cpus); 4868c2ecf20Sopenharmony_ci free_cpumask_var(non_intr_cpus); 4878c2ecf20Sopenharmony_ci return 0; 4888c2ecf20Sopenharmony_ci 4898c2ecf20Sopenharmony_cifail: 4908c2ecf20Sopenharmony_ci free_cpumask_var(available_cpus); 4918c2ecf20Sopenharmony_ci free_cpumask_var(non_intr_cpus); 4928c2ecf20Sopenharmony_ci _dev_comp_vect_mappings_destroy(dd); 4938c2ecf20Sopenharmony_ci 4948c2ecf20Sopenharmony_ci return ret; 4958c2ecf20Sopenharmony_ci} 4968c2ecf20Sopenharmony_ci 4978c2ecf20Sopenharmony_ciint hfi1_comp_vectors_set_up(struct hfi1_devdata *dd) 4988c2ecf20Sopenharmony_ci{ 4998c2ecf20Sopenharmony_ci int ret; 5008c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci mutex_lock(&node_affinity.lock); 5038c2ecf20Sopenharmony_ci entry = node_affinity_lookup(dd->node); 5048c2ecf20Sopenharmony_ci if (!entry) { 5058c2ecf20Sopenharmony_ci ret = -EINVAL; 5068c2ecf20Sopenharmony_ci goto unlock; 5078c2ecf20Sopenharmony_ci } 5088c2ecf20Sopenharmony_ci ret = _dev_comp_vect_mappings_create(dd, entry); 5098c2ecf20Sopenharmony_ciunlock: 5108c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 5118c2ecf20Sopenharmony_ci 5128c2ecf20Sopenharmony_ci return ret; 5138c2ecf20Sopenharmony_ci} 5148c2ecf20Sopenharmony_ci 5158c2ecf20Sopenharmony_civoid hfi1_comp_vectors_clean_up(struct hfi1_devdata *dd) 5168c2ecf20Sopenharmony_ci{ 5178c2ecf20Sopenharmony_ci _dev_comp_vect_mappings_destroy(dd); 5188c2ecf20Sopenharmony_ci} 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ciint hfi1_comp_vect_mappings_lookup(struct rvt_dev_info *rdi, int comp_vect) 5218c2ecf20Sopenharmony_ci{ 5228c2ecf20Sopenharmony_ci struct hfi1_ibdev *verbs_dev = dev_from_rdi(rdi); 5238c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = dd_from_dev(verbs_dev); 5248c2ecf20Sopenharmony_ci 5258c2ecf20Sopenharmony_ci if (!dd->comp_vect_mappings) 5268c2ecf20Sopenharmony_ci return -EINVAL; 5278c2ecf20Sopenharmony_ci if (comp_vect >= dd->comp_vect_possible_cpus) 5288c2ecf20Sopenharmony_ci return -EINVAL; 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci return dd->comp_vect_mappings[comp_vect]; 5318c2ecf20Sopenharmony_ci} 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci/* 5348c2ecf20Sopenharmony_ci * It assumes dd->comp_vect_possible_cpus is available. 5358c2ecf20Sopenharmony_ci */ 5368c2ecf20Sopenharmony_cistatic int _dev_comp_vect_cpu_mask_init(struct hfi1_devdata *dd, 5378c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry, 5388c2ecf20Sopenharmony_ci bool first_dev_init) 5398c2ecf20Sopenharmony_ci __must_hold(&node_affinity.lock) 5408c2ecf20Sopenharmony_ci{ 5418c2ecf20Sopenharmony_ci int i, j, curr_cpu; 5428c2ecf20Sopenharmony_ci int possible_cpus_comp_vect = 0; 5438c2ecf20Sopenharmony_ci struct cpumask *dev_comp_vect_mask = &dd->comp_vect->mask; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 5468c2ecf20Sopenharmony_ci /* 5478c2ecf20Sopenharmony_ci * If there's only one CPU available for completion vectors, then 5488c2ecf20Sopenharmony_ci * there will only be one completion vector available. Othewise, 5498c2ecf20Sopenharmony_ci * the number of completion vector available will be the number of 5508c2ecf20Sopenharmony_ci * available CPUs divide it by the number of devices in the 5518c2ecf20Sopenharmony_ci * local NUMA node. 5528c2ecf20Sopenharmony_ci */ 5538c2ecf20Sopenharmony_ci if (cpumask_weight(&entry->comp_vect_mask) == 1) { 5548c2ecf20Sopenharmony_ci possible_cpus_comp_vect = 1; 5558c2ecf20Sopenharmony_ci dd_dev_warn(dd, 5568c2ecf20Sopenharmony_ci "Number of kernel receive queues is too large for completion vector affinity to be effective\n"); 5578c2ecf20Sopenharmony_ci } else { 5588c2ecf20Sopenharmony_ci possible_cpus_comp_vect += 5598c2ecf20Sopenharmony_ci cpumask_weight(&entry->comp_vect_mask) / 5608c2ecf20Sopenharmony_ci hfi1_per_node_cntr[dd->node]; 5618c2ecf20Sopenharmony_ci 5628c2ecf20Sopenharmony_ci /* 5638c2ecf20Sopenharmony_ci * If the completion vector CPUs available doesn't divide 5648c2ecf20Sopenharmony_ci * evenly among devices, then the first device device to be 5658c2ecf20Sopenharmony_ci * initialized gets an extra CPU. 5668c2ecf20Sopenharmony_ci */ 5678c2ecf20Sopenharmony_ci if (first_dev_init && 5688c2ecf20Sopenharmony_ci cpumask_weight(&entry->comp_vect_mask) % 5698c2ecf20Sopenharmony_ci hfi1_per_node_cntr[dd->node] != 0) 5708c2ecf20Sopenharmony_ci possible_cpus_comp_vect++; 5718c2ecf20Sopenharmony_ci } 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci dd->comp_vect_possible_cpus = possible_cpus_comp_vect; 5748c2ecf20Sopenharmony_ci 5758c2ecf20Sopenharmony_ci /* Reserving CPUs for device completion vector */ 5768c2ecf20Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 5778c2ecf20Sopenharmony_ci curr_cpu = per_cpu_affinity_get(&entry->comp_vect_mask, 5788c2ecf20Sopenharmony_ci entry->comp_vect_affinity); 5798c2ecf20Sopenharmony_ci if (curr_cpu < 0) 5808c2ecf20Sopenharmony_ci goto fail; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci cpumask_set_cpu(curr_cpu, dev_comp_vect_mask); 5838c2ecf20Sopenharmony_ci } 5848c2ecf20Sopenharmony_ci 5858c2ecf20Sopenharmony_ci hfi1_cdbg(AFFINITY, 5868c2ecf20Sopenharmony_ci "[%s] Completion vector affinity CPU set(s) %*pbl", 5878c2ecf20Sopenharmony_ci rvt_get_ibdev_name(&(dd)->verbs_dev.rdi), 5888c2ecf20Sopenharmony_ci cpumask_pr_args(dev_comp_vect_mask)); 5898c2ecf20Sopenharmony_ci 5908c2ecf20Sopenharmony_ci return 0; 5918c2ecf20Sopenharmony_ci 5928c2ecf20Sopenharmony_cifail: 5938c2ecf20Sopenharmony_ci for (j = 0; j < i; j++) 5948c2ecf20Sopenharmony_ci per_cpu_affinity_put_max(&entry->comp_vect_mask, 5958c2ecf20Sopenharmony_ci entry->comp_vect_affinity); 5968c2ecf20Sopenharmony_ci 5978c2ecf20Sopenharmony_ci return curr_cpu; 5988c2ecf20Sopenharmony_ci} 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci/* 6018c2ecf20Sopenharmony_ci * It assumes dd->comp_vect_possible_cpus is available. 6028c2ecf20Sopenharmony_ci */ 6038c2ecf20Sopenharmony_cistatic void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd, 6048c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry) 6058c2ecf20Sopenharmony_ci __must_hold(&node_affinity.lock) 6068c2ecf20Sopenharmony_ci{ 6078c2ecf20Sopenharmony_ci int i, cpu; 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci lockdep_assert_held(&node_affinity.lock); 6108c2ecf20Sopenharmony_ci if (!dd->comp_vect_possible_cpus) 6118c2ecf20Sopenharmony_ci return; 6128c2ecf20Sopenharmony_ci 6138c2ecf20Sopenharmony_ci for (i = 0; i < dd->comp_vect_possible_cpus; i++) { 6148c2ecf20Sopenharmony_ci cpu = per_cpu_affinity_put_max(&dd->comp_vect->mask, 6158c2ecf20Sopenharmony_ci entry->comp_vect_affinity); 6168c2ecf20Sopenharmony_ci /* Clearing CPU in device completion vector cpu mask */ 6178c2ecf20Sopenharmony_ci if (cpu >= 0) 6188c2ecf20Sopenharmony_ci cpumask_clear_cpu(cpu, &dd->comp_vect->mask); 6198c2ecf20Sopenharmony_ci } 6208c2ecf20Sopenharmony_ci 6218c2ecf20Sopenharmony_ci dd->comp_vect_possible_cpus = 0; 6228c2ecf20Sopenharmony_ci} 6238c2ecf20Sopenharmony_ci 6248c2ecf20Sopenharmony_ci/* 6258c2ecf20Sopenharmony_ci * Interrupt affinity. 6268c2ecf20Sopenharmony_ci * 6278c2ecf20Sopenharmony_ci * non-rcv avail gets a default mask that 6288c2ecf20Sopenharmony_ci * starts as possible cpus with threads reset 6298c2ecf20Sopenharmony_ci * and each rcv avail reset. 6308c2ecf20Sopenharmony_ci * 6318c2ecf20Sopenharmony_ci * rcv avail gets node relative 1 wrapping back 6328c2ecf20Sopenharmony_ci * to the node relative 1 as necessary. 6338c2ecf20Sopenharmony_ci * 6348c2ecf20Sopenharmony_ci */ 6358c2ecf20Sopenharmony_ciint hfi1_dev_affinity_init(struct hfi1_devdata *dd) 6368c2ecf20Sopenharmony_ci{ 6378c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 6388c2ecf20Sopenharmony_ci const struct cpumask *local_mask; 6398c2ecf20Sopenharmony_ci int curr_cpu, possible, i, ret; 6408c2ecf20Sopenharmony_ci bool new_entry = false; 6418c2ecf20Sopenharmony_ci 6428c2ecf20Sopenharmony_ci local_mask = cpumask_of_node(dd->node); 6438c2ecf20Sopenharmony_ci if (cpumask_first(local_mask) >= nr_cpu_ids) 6448c2ecf20Sopenharmony_ci local_mask = topology_core_cpumask(0); 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci mutex_lock(&node_affinity.lock); 6478c2ecf20Sopenharmony_ci entry = node_affinity_lookup(dd->node); 6488c2ecf20Sopenharmony_ci 6498c2ecf20Sopenharmony_ci /* 6508c2ecf20Sopenharmony_ci * If this is the first time this NUMA node's affinity is used, 6518c2ecf20Sopenharmony_ci * create an entry in the global affinity structure and initialize it. 6528c2ecf20Sopenharmony_ci */ 6538c2ecf20Sopenharmony_ci if (!entry) { 6548c2ecf20Sopenharmony_ci entry = node_affinity_allocate(dd->node); 6558c2ecf20Sopenharmony_ci if (!entry) { 6568c2ecf20Sopenharmony_ci dd_dev_err(dd, 6578c2ecf20Sopenharmony_ci "Unable to allocate global affinity node\n"); 6588c2ecf20Sopenharmony_ci ret = -ENOMEM; 6598c2ecf20Sopenharmony_ci goto fail; 6608c2ecf20Sopenharmony_ci } 6618c2ecf20Sopenharmony_ci new_entry = true; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci init_cpu_mask_set(&entry->def_intr); 6648c2ecf20Sopenharmony_ci init_cpu_mask_set(&entry->rcv_intr); 6658c2ecf20Sopenharmony_ci cpumask_clear(&entry->comp_vect_mask); 6668c2ecf20Sopenharmony_ci cpumask_clear(&entry->general_intr_mask); 6678c2ecf20Sopenharmony_ci /* Use the "real" cpu mask of this node as the default */ 6688c2ecf20Sopenharmony_ci cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask, 6698c2ecf20Sopenharmony_ci local_mask); 6708c2ecf20Sopenharmony_ci 6718c2ecf20Sopenharmony_ci /* fill in the receive list */ 6728c2ecf20Sopenharmony_ci possible = cpumask_weight(&entry->def_intr.mask); 6738c2ecf20Sopenharmony_ci curr_cpu = cpumask_first(&entry->def_intr.mask); 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci if (possible == 1) { 6768c2ecf20Sopenharmony_ci /* only one CPU, everyone will use it */ 6778c2ecf20Sopenharmony_ci cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask); 6788c2ecf20Sopenharmony_ci cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 6798c2ecf20Sopenharmony_ci } else { 6808c2ecf20Sopenharmony_ci /* 6818c2ecf20Sopenharmony_ci * The general/control context will be the first CPU in 6828c2ecf20Sopenharmony_ci * the default list, so it is removed from the default 6838c2ecf20Sopenharmony_ci * list and added to the general interrupt list. 6848c2ecf20Sopenharmony_ci */ 6858c2ecf20Sopenharmony_ci cpumask_clear_cpu(curr_cpu, &entry->def_intr.mask); 6868c2ecf20Sopenharmony_ci cpumask_set_cpu(curr_cpu, &entry->general_intr_mask); 6878c2ecf20Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, 6888c2ecf20Sopenharmony_ci &entry->def_intr.mask); 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci /* 6918c2ecf20Sopenharmony_ci * Remove the remaining kernel receive queues from 6928c2ecf20Sopenharmony_ci * the default list and add them to the receive list. 6938c2ecf20Sopenharmony_ci */ 6948c2ecf20Sopenharmony_ci for (i = 0; 6958c2ecf20Sopenharmony_ci i < (dd->n_krcv_queues - 1) * 6968c2ecf20Sopenharmony_ci hfi1_per_node_cntr[dd->node]; 6978c2ecf20Sopenharmony_ci i++) { 6988c2ecf20Sopenharmony_ci cpumask_clear_cpu(curr_cpu, 6998c2ecf20Sopenharmony_ci &entry->def_intr.mask); 7008c2ecf20Sopenharmony_ci cpumask_set_cpu(curr_cpu, 7018c2ecf20Sopenharmony_ci &entry->rcv_intr.mask); 7028c2ecf20Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, 7038c2ecf20Sopenharmony_ci &entry->def_intr.mask); 7048c2ecf20Sopenharmony_ci if (curr_cpu >= nr_cpu_ids) 7058c2ecf20Sopenharmony_ci break; 7068c2ecf20Sopenharmony_ci } 7078c2ecf20Sopenharmony_ci 7088c2ecf20Sopenharmony_ci /* 7098c2ecf20Sopenharmony_ci * If there ends up being 0 CPU cores leftover for SDMA 7108c2ecf20Sopenharmony_ci * engines, use the same CPU cores as general/control 7118c2ecf20Sopenharmony_ci * context. 7128c2ecf20Sopenharmony_ci */ 7138c2ecf20Sopenharmony_ci if (cpumask_weight(&entry->def_intr.mask) == 0) 7148c2ecf20Sopenharmony_ci cpumask_copy(&entry->def_intr.mask, 7158c2ecf20Sopenharmony_ci &entry->general_intr_mask); 7168c2ecf20Sopenharmony_ci } 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci /* Determine completion vector CPUs for the entire node */ 7198c2ecf20Sopenharmony_ci cpumask_and(&entry->comp_vect_mask, 7208c2ecf20Sopenharmony_ci &node_affinity.real_cpu_mask, local_mask); 7218c2ecf20Sopenharmony_ci cpumask_andnot(&entry->comp_vect_mask, 7228c2ecf20Sopenharmony_ci &entry->comp_vect_mask, 7238c2ecf20Sopenharmony_ci &entry->rcv_intr.mask); 7248c2ecf20Sopenharmony_ci cpumask_andnot(&entry->comp_vect_mask, 7258c2ecf20Sopenharmony_ci &entry->comp_vect_mask, 7268c2ecf20Sopenharmony_ci &entry->general_intr_mask); 7278c2ecf20Sopenharmony_ci 7288c2ecf20Sopenharmony_ci /* 7298c2ecf20Sopenharmony_ci * If there ends up being 0 CPU cores leftover for completion 7308c2ecf20Sopenharmony_ci * vectors, use the same CPU core as the general/control 7318c2ecf20Sopenharmony_ci * context. 7328c2ecf20Sopenharmony_ci */ 7338c2ecf20Sopenharmony_ci if (cpumask_weight(&entry->comp_vect_mask) == 0) 7348c2ecf20Sopenharmony_ci cpumask_copy(&entry->comp_vect_mask, 7358c2ecf20Sopenharmony_ci &entry->general_intr_mask); 7368c2ecf20Sopenharmony_ci } 7378c2ecf20Sopenharmony_ci 7388c2ecf20Sopenharmony_ci ret = _dev_comp_vect_cpu_mask_init(dd, entry, new_entry); 7398c2ecf20Sopenharmony_ci if (ret < 0) 7408c2ecf20Sopenharmony_ci goto fail; 7418c2ecf20Sopenharmony_ci 7428c2ecf20Sopenharmony_ci if (new_entry) 7438c2ecf20Sopenharmony_ci node_affinity_add_tail(entry); 7448c2ecf20Sopenharmony_ci 7458c2ecf20Sopenharmony_ci dd->affinity_entry = entry; 7468c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci return 0; 7498c2ecf20Sopenharmony_ci 7508c2ecf20Sopenharmony_cifail: 7518c2ecf20Sopenharmony_ci if (new_entry) 7528c2ecf20Sopenharmony_ci node_affinity_destroy(entry); 7538c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 7548c2ecf20Sopenharmony_ci return ret; 7558c2ecf20Sopenharmony_ci} 7568c2ecf20Sopenharmony_ci 7578c2ecf20Sopenharmony_civoid hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd) 7588c2ecf20Sopenharmony_ci{ 7598c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 7608c2ecf20Sopenharmony_ci 7618c2ecf20Sopenharmony_ci mutex_lock(&node_affinity.lock); 7628c2ecf20Sopenharmony_ci if (!dd->affinity_entry) 7638c2ecf20Sopenharmony_ci goto unlock; 7648c2ecf20Sopenharmony_ci entry = node_affinity_lookup(dd->node); 7658c2ecf20Sopenharmony_ci if (!entry) 7668c2ecf20Sopenharmony_ci goto unlock; 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci /* 7698c2ecf20Sopenharmony_ci * Free device completion vector CPUs to be used by future 7708c2ecf20Sopenharmony_ci * completion vectors 7718c2ecf20Sopenharmony_ci */ 7728c2ecf20Sopenharmony_ci _dev_comp_vect_cpu_mask_clean_up(dd, entry); 7738c2ecf20Sopenharmony_ciunlock: 7748c2ecf20Sopenharmony_ci dd->affinity_entry = NULL; 7758c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 7768c2ecf20Sopenharmony_ci} 7778c2ecf20Sopenharmony_ci 7788c2ecf20Sopenharmony_ci/* 7798c2ecf20Sopenharmony_ci * Function updates the irq affinity hint for msix after it has been changed 7808c2ecf20Sopenharmony_ci * by the user using the /proc/irq interface. This function only accepts 7818c2ecf20Sopenharmony_ci * one cpu in the mask. 7828c2ecf20Sopenharmony_ci */ 7838c2ecf20Sopenharmony_cistatic void hfi1_update_sdma_affinity(struct hfi1_msix_entry *msix, int cpu) 7848c2ecf20Sopenharmony_ci{ 7858c2ecf20Sopenharmony_ci struct sdma_engine *sde = msix->arg; 7868c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = sde->dd; 7878c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 7888c2ecf20Sopenharmony_ci struct cpu_mask_set *set; 7898c2ecf20Sopenharmony_ci int i, old_cpu; 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci if (cpu > num_online_cpus() || cpu == sde->cpu) 7928c2ecf20Sopenharmony_ci return; 7938c2ecf20Sopenharmony_ci 7948c2ecf20Sopenharmony_ci mutex_lock(&node_affinity.lock); 7958c2ecf20Sopenharmony_ci entry = node_affinity_lookup(dd->node); 7968c2ecf20Sopenharmony_ci if (!entry) 7978c2ecf20Sopenharmony_ci goto unlock; 7988c2ecf20Sopenharmony_ci 7998c2ecf20Sopenharmony_ci old_cpu = sde->cpu; 8008c2ecf20Sopenharmony_ci sde->cpu = cpu; 8018c2ecf20Sopenharmony_ci cpumask_clear(&msix->mask); 8028c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &msix->mask); 8038c2ecf20Sopenharmony_ci dd_dev_dbg(dd, "IRQ: %u, type %s engine %u -> cpu: %d\n", 8048c2ecf20Sopenharmony_ci msix->irq, irq_type_names[msix->type], 8058c2ecf20Sopenharmony_ci sde->this_idx, cpu); 8068c2ecf20Sopenharmony_ci irq_set_affinity_hint(msix->irq, &msix->mask); 8078c2ecf20Sopenharmony_ci 8088c2ecf20Sopenharmony_ci /* 8098c2ecf20Sopenharmony_ci * Set the new cpu in the hfi1_affinity_node and clean 8108c2ecf20Sopenharmony_ci * the old cpu if it is not used by any other IRQ 8118c2ecf20Sopenharmony_ci */ 8128c2ecf20Sopenharmony_ci set = &entry->def_intr; 8138c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &set->mask); 8148c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 8158c2ecf20Sopenharmony_ci for (i = 0; i < dd->msix_info.max_requested; i++) { 8168c2ecf20Sopenharmony_ci struct hfi1_msix_entry *other_msix; 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci other_msix = &dd->msix_info.msix_entries[i]; 8198c2ecf20Sopenharmony_ci if (other_msix->type != IRQ_SDMA || other_msix == msix) 8208c2ecf20Sopenharmony_ci continue; 8218c2ecf20Sopenharmony_ci 8228c2ecf20Sopenharmony_ci if (cpumask_test_cpu(old_cpu, &other_msix->mask)) 8238c2ecf20Sopenharmony_ci goto unlock; 8248c2ecf20Sopenharmony_ci } 8258c2ecf20Sopenharmony_ci cpumask_clear_cpu(old_cpu, &set->mask); 8268c2ecf20Sopenharmony_ci cpumask_clear_cpu(old_cpu, &set->used); 8278c2ecf20Sopenharmony_ciunlock: 8288c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 8298c2ecf20Sopenharmony_ci} 8308c2ecf20Sopenharmony_ci 8318c2ecf20Sopenharmony_cistatic void hfi1_irq_notifier_notify(struct irq_affinity_notify *notify, 8328c2ecf20Sopenharmony_ci const cpumask_t *mask) 8338c2ecf20Sopenharmony_ci{ 8348c2ecf20Sopenharmony_ci int cpu = cpumask_first(mask); 8358c2ecf20Sopenharmony_ci struct hfi1_msix_entry *msix = container_of(notify, 8368c2ecf20Sopenharmony_ci struct hfi1_msix_entry, 8378c2ecf20Sopenharmony_ci notify); 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci /* Only one CPU configuration supported currently */ 8408c2ecf20Sopenharmony_ci hfi1_update_sdma_affinity(msix, cpu); 8418c2ecf20Sopenharmony_ci} 8428c2ecf20Sopenharmony_ci 8438c2ecf20Sopenharmony_cistatic void hfi1_irq_notifier_release(struct kref *ref) 8448c2ecf20Sopenharmony_ci{ 8458c2ecf20Sopenharmony_ci /* 8468c2ecf20Sopenharmony_ci * This is required by affinity notifier. We don't have anything to 8478c2ecf20Sopenharmony_ci * free here. 8488c2ecf20Sopenharmony_ci */ 8498c2ecf20Sopenharmony_ci} 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_cistatic void hfi1_setup_sdma_notifier(struct hfi1_msix_entry *msix) 8528c2ecf20Sopenharmony_ci{ 8538c2ecf20Sopenharmony_ci struct irq_affinity_notify *notify = &msix->notify; 8548c2ecf20Sopenharmony_ci 8558c2ecf20Sopenharmony_ci notify->irq = msix->irq; 8568c2ecf20Sopenharmony_ci notify->notify = hfi1_irq_notifier_notify; 8578c2ecf20Sopenharmony_ci notify->release = hfi1_irq_notifier_release; 8588c2ecf20Sopenharmony_ci 8598c2ecf20Sopenharmony_ci if (irq_set_affinity_notifier(notify->irq, notify)) 8608c2ecf20Sopenharmony_ci pr_err("Failed to register sdma irq affinity notifier for irq %d\n", 8618c2ecf20Sopenharmony_ci notify->irq); 8628c2ecf20Sopenharmony_ci} 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_cistatic void hfi1_cleanup_sdma_notifier(struct hfi1_msix_entry *msix) 8658c2ecf20Sopenharmony_ci{ 8668c2ecf20Sopenharmony_ci struct irq_affinity_notify *notify = &msix->notify; 8678c2ecf20Sopenharmony_ci 8688c2ecf20Sopenharmony_ci if (irq_set_affinity_notifier(notify->irq, NULL)) 8698c2ecf20Sopenharmony_ci pr_err("Failed to cleanup sdma irq affinity notifier for irq %d\n", 8708c2ecf20Sopenharmony_ci notify->irq); 8718c2ecf20Sopenharmony_ci} 8728c2ecf20Sopenharmony_ci 8738c2ecf20Sopenharmony_ci/* 8748c2ecf20Sopenharmony_ci * Function sets the irq affinity for msix. 8758c2ecf20Sopenharmony_ci * It *must* be called with node_affinity.lock held. 8768c2ecf20Sopenharmony_ci */ 8778c2ecf20Sopenharmony_cistatic int get_irq_affinity(struct hfi1_devdata *dd, 8788c2ecf20Sopenharmony_ci struct hfi1_msix_entry *msix) 8798c2ecf20Sopenharmony_ci{ 8808c2ecf20Sopenharmony_ci cpumask_var_t diff; 8818c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 8828c2ecf20Sopenharmony_ci struct cpu_mask_set *set = NULL; 8838c2ecf20Sopenharmony_ci struct sdma_engine *sde = NULL; 8848c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *rcd = NULL; 8858c2ecf20Sopenharmony_ci char extra[64]; 8868c2ecf20Sopenharmony_ci int cpu = -1; 8878c2ecf20Sopenharmony_ci 8888c2ecf20Sopenharmony_ci extra[0] = '\0'; 8898c2ecf20Sopenharmony_ci cpumask_clear(&msix->mask); 8908c2ecf20Sopenharmony_ci 8918c2ecf20Sopenharmony_ci entry = node_affinity_lookup(dd->node); 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci switch (msix->type) { 8948c2ecf20Sopenharmony_ci case IRQ_SDMA: 8958c2ecf20Sopenharmony_ci sde = (struct sdma_engine *)msix->arg; 8968c2ecf20Sopenharmony_ci scnprintf(extra, 64, "engine %u", sde->this_idx); 8978c2ecf20Sopenharmony_ci set = &entry->def_intr; 8988c2ecf20Sopenharmony_ci break; 8998c2ecf20Sopenharmony_ci case IRQ_GENERAL: 9008c2ecf20Sopenharmony_ci cpu = cpumask_first(&entry->general_intr_mask); 9018c2ecf20Sopenharmony_ci break; 9028c2ecf20Sopenharmony_ci case IRQ_RCVCTXT: 9038c2ecf20Sopenharmony_ci rcd = (struct hfi1_ctxtdata *)msix->arg; 9048c2ecf20Sopenharmony_ci if (rcd->ctxt == HFI1_CTRL_CTXT) 9058c2ecf20Sopenharmony_ci cpu = cpumask_first(&entry->general_intr_mask); 9068c2ecf20Sopenharmony_ci else 9078c2ecf20Sopenharmony_ci set = &entry->rcv_intr; 9088c2ecf20Sopenharmony_ci scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 9098c2ecf20Sopenharmony_ci break; 9108c2ecf20Sopenharmony_ci case IRQ_NETDEVCTXT: 9118c2ecf20Sopenharmony_ci rcd = (struct hfi1_ctxtdata *)msix->arg; 9128c2ecf20Sopenharmony_ci set = &entry->def_intr; 9138c2ecf20Sopenharmony_ci scnprintf(extra, 64, "ctxt %u", rcd->ctxt); 9148c2ecf20Sopenharmony_ci break; 9158c2ecf20Sopenharmony_ci default: 9168c2ecf20Sopenharmony_ci dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); 9178c2ecf20Sopenharmony_ci return -EINVAL; 9188c2ecf20Sopenharmony_ci } 9198c2ecf20Sopenharmony_ci 9208c2ecf20Sopenharmony_ci /* 9218c2ecf20Sopenharmony_ci * The general and control contexts are placed on a particular 9228c2ecf20Sopenharmony_ci * CPU, which is set above. Skip accounting for it. Everything else 9238c2ecf20Sopenharmony_ci * finds its CPU here. 9248c2ecf20Sopenharmony_ci */ 9258c2ecf20Sopenharmony_ci if (cpu == -1 && set) { 9268c2ecf20Sopenharmony_ci if (!zalloc_cpumask_var(&diff, GFP_KERNEL)) 9278c2ecf20Sopenharmony_ci return -ENOMEM; 9288c2ecf20Sopenharmony_ci 9298c2ecf20Sopenharmony_ci cpu = cpu_mask_set_get_first(set, diff); 9308c2ecf20Sopenharmony_ci if (cpu < 0) { 9318c2ecf20Sopenharmony_ci free_cpumask_var(diff); 9328c2ecf20Sopenharmony_ci dd_dev_err(dd, "Failure to obtain CPU for IRQ\n"); 9338c2ecf20Sopenharmony_ci return cpu; 9348c2ecf20Sopenharmony_ci } 9358c2ecf20Sopenharmony_ci 9368c2ecf20Sopenharmony_ci free_cpumask_var(diff); 9378c2ecf20Sopenharmony_ci } 9388c2ecf20Sopenharmony_ci 9398c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &msix->mask); 9408c2ecf20Sopenharmony_ci dd_dev_info(dd, "IRQ: %u, type %s %s -> cpu: %d\n", 9418c2ecf20Sopenharmony_ci msix->irq, irq_type_names[msix->type], 9428c2ecf20Sopenharmony_ci extra, cpu); 9438c2ecf20Sopenharmony_ci irq_set_affinity_hint(msix->irq, &msix->mask); 9448c2ecf20Sopenharmony_ci 9458c2ecf20Sopenharmony_ci if (msix->type == IRQ_SDMA) { 9468c2ecf20Sopenharmony_ci sde->cpu = cpu; 9478c2ecf20Sopenharmony_ci hfi1_setup_sdma_notifier(msix); 9488c2ecf20Sopenharmony_ci } 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci return 0; 9518c2ecf20Sopenharmony_ci} 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ciint hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) 9548c2ecf20Sopenharmony_ci{ 9558c2ecf20Sopenharmony_ci int ret; 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci mutex_lock(&node_affinity.lock); 9588c2ecf20Sopenharmony_ci ret = get_irq_affinity(dd, msix); 9598c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 9608c2ecf20Sopenharmony_ci return ret; 9618c2ecf20Sopenharmony_ci} 9628c2ecf20Sopenharmony_ci 9638c2ecf20Sopenharmony_civoid hfi1_put_irq_affinity(struct hfi1_devdata *dd, 9648c2ecf20Sopenharmony_ci struct hfi1_msix_entry *msix) 9658c2ecf20Sopenharmony_ci{ 9668c2ecf20Sopenharmony_ci struct cpu_mask_set *set = NULL; 9678c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *rcd; 9688c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 9698c2ecf20Sopenharmony_ci 9708c2ecf20Sopenharmony_ci mutex_lock(&node_affinity.lock); 9718c2ecf20Sopenharmony_ci entry = node_affinity_lookup(dd->node); 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci switch (msix->type) { 9748c2ecf20Sopenharmony_ci case IRQ_SDMA: 9758c2ecf20Sopenharmony_ci set = &entry->def_intr; 9768c2ecf20Sopenharmony_ci hfi1_cleanup_sdma_notifier(msix); 9778c2ecf20Sopenharmony_ci break; 9788c2ecf20Sopenharmony_ci case IRQ_GENERAL: 9798c2ecf20Sopenharmony_ci /* Don't do accounting for general contexts */ 9808c2ecf20Sopenharmony_ci break; 9818c2ecf20Sopenharmony_ci case IRQ_RCVCTXT: 9828c2ecf20Sopenharmony_ci rcd = (struct hfi1_ctxtdata *)msix->arg; 9838c2ecf20Sopenharmony_ci /* Don't do accounting for control contexts */ 9848c2ecf20Sopenharmony_ci if (rcd->ctxt != HFI1_CTRL_CTXT) 9858c2ecf20Sopenharmony_ci set = &entry->rcv_intr; 9868c2ecf20Sopenharmony_ci break; 9878c2ecf20Sopenharmony_ci case IRQ_NETDEVCTXT: 9888c2ecf20Sopenharmony_ci rcd = (struct hfi1_ctxtdata *)msix->arg; 9898c2ecf20Sopenharmony_ci set = &entry->def_intr; 9908c2ecf20Sopenharmony_ci break; 9918c2ecf20Sopenharmony_ci default: 9928c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 9938c2ecf20Sopenharmony_ci return; 9948c2ecf20Sopenharmony_ci } 9958c2ecf20Sopenharmony_ci 9968c2ecf20Sopenharmony_ci if (set) { 9978c2ecf20Sopenharmony_ci cpumask_andnot(&set->used, &set->used, &msix->mask); 9988c2ecf20Sopenharmony_ci _cpu_mask_set_gen_dec(set); 9998c2ecf20Sopenharmony_ci } 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_ci irq_set_affinity_hint(msix->irq, NULL); 10028c2ecf20Sopenharmony_ci cpumask_clear(&msix->mask); 10038c2ecf20Sopenharmony_ci mutex_unlock(&node_affinity.lock); 10048c2ecf20Sopenharmony_ci} 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci/* This should be called with node_affinity.lock held */ 10078c2ecf20Sopenharmony_cistatic void find_hw_thread_mask(uint hw_thread_no, cpumask_var_t hw_thread_mask, 10088c2ecf20Sopenharmony_ci struct hfi1_affinity_node_list *affinity) 10098c2ecf20Sopenharmony_ci{ 10108c2ecf20Sopenharmony_ci int possible, curr_cpu, i; 10118c2ecf20Sopenharmony_ci uint num_cores_per_socket = node_affinity.num_online_cpus / 10128c2ecf20Sopenharmony_ci affinity->num_core_siblings / 10138c2ecf20Sopenharmony_ci node_affinity.num_online_nodes; 10148c2ecf20Sopenharmony_ci 10158c2ecf20Sopenharmony_ci cpumask_copy(hw_thread_mask, &affinity->proc.mask); 10168c2ecf20Sopenharmony_ci if (affinity->num_core_siblings > 0) { 10178c2ecf20Sopenharmony_ci /* Removing other siblings not needed for now */ 10188c2ecf20Sopenharmony_ci possible = cpumask_weight(hw_thread_mask); 10198c2ecf20Sopenharmony_ci curr_cpu = cpumask_first(hw_thread_mask); 10208c2ecf20Sopenharmony_ci for (i = 0; 10218c2ecf20Sopenharmony_ci i < num_cores_per_socket * node_affinity.num_online_nodes; 10228c2ecf20Sopenharmony_ci i++) 10238c2ecf20Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_ci for (; i < possible; i++) { 10268c2ecf20Sopenharmony_ci cpumask_clear_cpu(curr_cpu, hw_thread_mask); 10278c2ecf20Sopenharmony_ci curr_cpu = cpumask_next(curr_cpu, hw_thread_mask); 10288c2ecf20Sopenharmony_ci } 10298c2ecf20Sopenharmony_ci 10308c2ecf20Sopenharmony_ci /* Identifying correct HW threads within physical cores */ 10318c2ecf20Sopenharmony_ci cpumask_shift_left(hw_thread_mask, hw_thread_mask, 10328c2ecf20Sopenharmony_ci num_cores_per_socket * 10338c2ecf20Sopenharmony_ci node_affinity.num_online_nodes * 10348c2ecf20Sopenharmony_ci hw_thread_no); 10358c2ecf20Sopenharmony_ci } 10368c2ecf20Sopenharmony_ci} 10378c2ecf20Sopenharmony_ci 10388c2ecf20Sopenharmony_ciint hfi1_get_proc_affinity(int node) 10398c2ecf20Sopenharmony_ci{ 10408c2ecf20Sopenharmony_ci int cpu = -1, ret, i; 10418c2ecf20Sopenharmony_ci struct hfi1_affinity_node *entry; 10428c2ecf20Sopenharmony_ci cpumask_var_t diff, hw_thread_mask, available_mask, intrs_mask; 10438c2ecf20Sopenharmony_ci const struct cpumask *node_mask, 10448c2ecf20Sopenharmony_ci *proc_mask = current->cpus_ptr; 10458c2ecf20Sopenharmony_ci struct hfi1_affinity_node_list *affinity = &node_affinity; 10468c2ecf20Sopenharmony_ci struct cpu_mask_set *set = &affinity->proc; 10478c2ecf20Sopenharmony_ci 10488c2ecf20Sopenharmony_ci /* 10498c2ecf20Sopenharmony_ci * check whether process/context affinity has already 10508c2ecf20Sopenharmony_ci * been set 10518c2ecf20Sopenharmony_ci */ 10528c2ecf20Sopenharmony_ci if (current->nr_cpus_allowed == 1) { 10538c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", 10548c2ecf20Sopenharmony_ci current->pid, current->comm, 10558c2ecf20Sopenharmony_ci cpumask_pr_args(proc_mask)); 10568c2ecf20Sopenharmony_ci /* 10578c2ecf20Sopenharmony_ci * Mark the pre-set CPU as used. This is atomic so we don't 10588c2ecf20Sopenharmony_ci * need the lock 10598c2ecf20Sopenharmony_ci */ 10608c2ecf20Sopenharmony_ci cpu = cpumask_first(proc_mask); 10618c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 10628c2ecf20Sopenharmony_ci goto done; 10638c2ecf20Sopenharmony_ci } else if (current->nr_cpus_allowed < cpumask_weight(&set->mask)) { 10648c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", 10658c2ecf20Sopenharmony_ci current->pid, current->comm, 10668c2ecf20Sopenharmony_ci cpumask_pr_args(proc_mask)); 10678c2ecf20Sopenharmony_ci goto done; 10688c2ecf20Sopenharmony_ci } 10698c2ecf20Sopenharmony_ci 10708c2ecf20Sopenharmony_ci /* 10718c2ecf20Sopenharmony_ci * The process does not have a preset CPU affinity so find one to 10728c2ecf20Sopenharmony_ci * recommend using the following algorithm: 10738c2ecf20Sopenharmony_ci * 10748c2ecf20Sopenharmony_ci * For each user process that is opening a context on HFI Y: 10758c2ecf20Sopenharmony_ci * a) If all cores are filled, reinitialize the bitmask 10768c2ecf20Sopenharmony_ci * b) Fill real cores first, then HT cores (First set of HT 10778c2ecf20Sopenharmony_ci * cores on all physical cores, then second set of HT core, 10788c2ecf20Sopenharmony_ci * and, so on) in the following order: 10798c2ecf20Sopenharmony_ci * 10808c2ecf20Sopenharmony_ci * 1. Same NUMA node as HFI Y and not running an IRQ 10818c2ecf20Sopenharmony_ci * handler 10828c2ecf20Sopenharmony_ci * 2. Same NUMA node as HFI Y and running an IRQ handler 10838c2ecf20Sopenharmony_ci * 3. Different NUMA node to HFI Y and not running an IRQ 10848c2ecf20Sopenharmony_ci * handler 10858c2ecf20Sopenharmony_ci * 4. Different NUMA node to HFI Y and running an IRQ 10868c2ecf20Sopenharmony_ci * handler 10878c2ecf20Sopenharmony_ci * c) Mark core as filled in the bitmask. As user processes are 10888c2ecf20Sopenharmony_ci * done, clear cores from the bitmask. 10898c2ecf20Sopenharmony_ci */ 10908c2ecf20Sopenharmony_ci 10918c2ecf20Sopenharmony_ci ret = zalloc_cpumask_var(&diff, GFP_KERNEL); 10928c2ecf20Sopenharmony_ci if (!ret) 10938c2ecf20Sopenharmony_ci goto done; 10948c2ecf20Sopenharmony_ci ret = zalloc_cpumask_var(&hw_thread_mask, GFP_KERNEL); 10958c2ecf20Sopenharmony_ci if (!ret) 10968c2ecf20Sopenharmony_ci goto free_diff; 10978c2ecf20Sopenharmony_ci ret = zalloc_cpumask_var(&available_mask, GFP_KERNEL); 10988c2ecf20Sopenharmony_ci if (!ret) 10998c2ecf20Sopenharmony_ci goto free_hw_thread_mask; 11008c2ecf20Sopenharmony_ci ret = zalloc_cpumask_var(&intrs_mask, GFP_KERNEL); 11018c2ecf20Sopenharmony_ci if (!ret) 11028c2ecf20Sopenharmony_ci goto free_available_mask; 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci mutex_lock(&affinity->lock); 11058c2ecf20Sopenharmony_ci /* 11068c2ecf20Sopenharmony_ci * If we've used all available HW threads, clear the mask and start 11078c2ecf20Sopenharmony_ci * overloading. 11088c2ecf20Sopenharmony_ci */ 11098c2ecf20Sopenharmony_ci _cpu_mask_set_gen_inc(set); 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci /* 11128c2ecf20Sopenharmony_ci * If NUMA node has CPUs used by interrupt handlers, include them in the 11138c2ecf20Sopenharmony_ci * interrupt handler mask. 11148c2ecf20Sopenharmony_ci */ 11158c2ecf20Sopenharmony_ci entry = node_affinity_lookup(node); 11168c2ecf20Sopenharmony_ci if (entry) { 11178c2ecf20Sopenharmony_ci cpumask_copy(intrs_mask, (entry->def_intr.gen ? 11188c2ecf20Sopenharmony_ci &entry->def_intr.mask : 11198c2ecf20Sopenharmony_ci &entry->def_intr.used)); 11208c2ecf20Sopenharmony_ci cpumask_or(intrs_mask, intrs_mask, (entry->rcv_intr.gen ? 11218c2ecf20Sopenharmony_ci &entry->rcv_intr.mask : 11228c2ecf20Sopenharmony_ci &entry->rcv_intr.used)); 11238c2ecf20Sopenharmony_ci cpumask_or(intrs_mask, intrs_mask, &entry->general_intr_mask); 11248c2ecf20Sopenharmony_ci } 11258c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", 11268c2ecf20Sopenharmony_ci cpumask_pr_args(intrs_mask)); 11278c2ecf20Sopenharmony_ci 11288c2ecf20Sopenharmony_ci cpumask_copy(hw_thread_mask, &set->mask); 11298c2ecf20Sopenharmony_ci 11308c2ecf20Sopenharmony_ci /* 11318c2ecf20Sopenharmony_ci * If HT cores are enabled, identify which HW threads within the 11328c2ecf20Sopenharmony_ci * physical cores should be used. 11338c2ecf20Sopenharmony_ci */ 11348c2ecf20Sopenharmony_ci if (affinity->num_core_siblings > 0) { 11358c2ecf20Sopenharmony_ci for (i = 0; i < affinity->num_core_siblings; i++) { 11368c2ecf20Sopenharmony_ci find_hw_thread_mask(i, hw_thread_mask, affinity); 11378c2ecf20Sopenharmony_ci 11388c2ecf20Sopenharmony_ci /* 11398c2ecf20Sopenharmony_ci * If there's at least one available core for this HW 11408c2ecf20Sopenharmony_ci * thread number, stop looking for a core. 11418c2ecf20Sopenharmony_ci * 11428c2ecf20Sopenharmony_ci * diff will always be not empty at least once in this 11438c2ecf20Sopenharmony_ci * loop as the used mask gets reset when 11448c2ecf20Sopenharmony_ci * (set->mask == set->used) before this loop. 11458c2ecf20Sopenharmony_ci */ 11468c2ecf20Sopenharmony_ci cpumask_andnot(diff, hw_thread_mask, &set->used); 11478c2ecf20Sopenharmony_ci if (!cpumask_empty(diff)) 11488c2ecf20Sopenharmony_ci break; 11498c2ecf20Sopenharmony_ci } 11508c2ecf20Sopenharmony_ci } 11518c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "Same available HW thread on all physical CPUs: %*pbl", 11528c2ecf20Sopenharmony_ci cpumask_pr_args(hw_thread_mask)); 11538c2ecf20Sopenharmony_ci 11548c2ecf20Sopenharmony_ci node_mask = cpumask_of_node(node); 11558c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "Device on NUMA %u, CPUs %*pbl", node, 11568c2ecf20Sopenharmony_ci cpumask_pr_args(node_mask)); 11578c2ecf20Sopenharmony_ci 11588c2ecf20Sopenharmony_ci /* Get cpumask of available CPUs on preferred NUMA */ 11598c2ecf20Sopenharmony_ci cpumask_and(available_mask, hw_thread_mask, node_mask); 11608c2ecf20Sopenharmony_ci cpumask_andnot(available_mask, available_mask, &set->used); 11618c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "Available CPUs on NUMA %u: %*pbl", node, 11628c2ecf20Sopenharmony_ci cpumask_pr_args(available_mask)); 11638c2ecf20Sopenharmony_ci 11648c2ecf20Sopenharmony_ci /* 11658c2ecf20Sopenharmony_ci * At first, we don't want to place processes on the same 11668c2ecf20Sopenharmony_ci * CPUs as interrupt handlers. Then, CPUs running interrupt 11678c2ecf20Sopenharmony_ci * handlers are used. 11688c2ecf20Sopenharmony_ci * 11698c2ecf20Sopenharmony_ci * 1) If diff is not empty, then there are CPUs not running 11708c2ecf20Sopenharmony_ci * non-interrupt handlers available, so diff gets copied 11718c2ecf20Sopenharmony_ci * over to available_mask. 11728c2ecf20Sopenharmony_ci * 2) If diff is empty, then all CPUs not running interrupt 11738c2ecf20Sopenharmony_ci * handlers are taken, so available_mask contains all 11748c2ecf20Sopenharmony_ci * available CPUs running interrupt handlers. 11758c2ecf20Sopenharmony_ci * 3) If available_mask is empty, then all CPUs on the 11768c2ecf20Sopenharmony_ci * preferred NUMA node are taken, so other NUMA nodes are 11778c2ecf20Sopenharmony_ci * used for process assignments using the same method as 11788c2ecf20Sopenharmony_ci * the preferred NUMA node. 11798c2ecf20Sopenharmony_ci */ 11808c2ecf20Sopenharmony_ci cpumask_andnot(diff, available_mask, intrs_mask); 11818c2ecf20Sopenharmony_ci if (!cpumask_empty(diff)) 11828c2ecf20Sopenharmony_ci cpumask_copy(available_mask, diff); 11838c2ecf20Sopenharmony_ci 11848c2ecf20Sopenharmony_ci /* If we don't have CPUs on the preferred node, use other NUMA nodes */ 11858c2ecf20Sopenharmony_ci if (cpumask_empty(available_mask)) { 11868c2ecf20Sopenharmony_ci cpumask_andnot(available_mask, hw_thread_mask, &set->used); 11878c2ecf20Sopenharmony_ci /* Excluding preferred NUMA cores */ 11888c2ecf20Sopenharmony_ci cpumask_andnot(available_mask, available_mask, node_mask); 11898c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, 11908c2ecf20Sopenharmony_ci "Preferred NUMA node cores are taken, cores available in other NUMA nodes: %*pbl", 11918c2ecf20Sopenharmony_ci cpumask_pr_args(available_mask)); 11928c2ecf20Sopenharmony_ci 11938c2ecf20Sopenharmony_ci /* 11948c2ecf20Sopenharmony_ci * At first, we don't want to place processes on the same 11958c2ecf20Sopenharmony_ci * CPUs as interrupt handlers. 11968c2ecf20Sopenharmony_ci */ 11978c2ecf20Sopenharmony_ci cpumask_andnot(diff, available_mask, intrs_mask); 11988c2ecf20Sopenharmony_ci if (!cpumask_empty(diff)) 11998c2ecf20Sopenharmony_ci cpumask_copy(available_mask, diff); 12008c2ecf20Sopenharmony_ci } 12018c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "Possible CPUs for process: %*pbl", 12028c2ecf20Sopenharmony_ci cpumask_pr_args(available_mask)); 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_ci cpu = cpumask_first(available_mask); 12058c2ecf20Sopenharmony_ci if (cpu >= nr_cpu_ids) /* empty */ 12068c2ecf20Sopenharmony_ci cpu = -1; 12078c2ecf20Sopenharmony_ci else 12088c2ecf20Sopenharmony_ci cpumask_set_cpu(cpu, &set->used); 12098c2ecf20Sopenharmony_ci 12108c2ecf20Sopenharmony_ci mutex_unlock(&affinity->lock); 12118c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "Process assigned to CPU %d", cpu); 12128c2ecf20Sopenharmony_ci 12138c2ecf20Sopenharmony_ci free_cpumask_var(intrs_mask); 12148c2ecf20Sopenharmony_cifree_available_mask: 12158c2ecf20Sopenharmony_ci free_cpumask_var(available_mask); 12168c2ecf20Sopenharmony_cifree_hw_thread_mask: 12178c2ecf20Sopenharmony_ci free_cpumask_var(hw_thread_mask); 12188c2ecf20Sopenharmony_cifree_diff: 12198c2ecf20Sopenharmony_ci free_cpumask_var(diff); 12208c2ecf20Sopenharmony_cidone: 12218c2ecf20Sopenharmony_ci return cpu; 12228c2ecf20Sopenharmony_ci} 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_civoid hfi1_put_proc_affinity(int cpu) 12258c2ecf20Sopenharmony_ci{ 12268c2ecf20Sopenharmony_ci struct hfi1_affinity_node_list *affinity = &node_affinity; 12278c2ecf20Sopenharmony_ci struct cpu_mask_set *set = &affinity->proc; 12288c2ecf20Sopenharmony_ci 12298c2ecf20Sopenharmony_ci if (cpu < 0) 12308c2ecf20Sopenharmony_ci return; 12318c2ecf20Sopenharmony_ci 12328c2ecf20Sopenharmony_ci mutex_lock(&affinity->lock); 12338c2ecf20Sopenharmony_ci cpu_mask_set_put(set, cpu); 12348c2ecf20Sopenharmony_ci hfi1_cdbg(PROC, "Returning CPU %d for future process assignment", cpu); 12358c2ecf20Sopenharmony_ci mutex_unlock(&affinity->lock); 12368c2ecf20Sopenharmony_ci} 1237