18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright(c) 2016 Intel Corporation. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license. When using or 58c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license. 68c2ecf20Sopenharmony_ci * 78c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY 88c2ecf20Sopenharmony_ci * 98c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 108c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as 118c2ecf20Sopenharmony_ci * published by the Free Software Foundation. 128c2ecf20Sopenharmony_ci * 138c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 148c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 158c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 168c2ecf20Sopenharmony_ci * General Public License for more details. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * BSD LICENSE 198c2ecf20Sopenharmony_ci * 208c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 218c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions 228c2ecf20Sopenharmony_ci * are met: 238c2ecf20Sopenharmony_ci * 248c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above copyright 258c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 268c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above copyright 278c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer in 288c2ecf20Sopenharmony_ci * the documentation and/or other materials provided with the 298c2ecf20Sopenharmony_ci * distribution. 308c2ecf20Sopenharmony_ci * - Neither the name of Intel Corporation nor the names of its 318c2ecf20Sopenharmony_ci * contributors may be used to endorse or promote products derived 328c2ecf20Sopenharmony_ci * from this software without specific prior written permission. 338c2ecf20Sopenharmony_ci * 348c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 358c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 368c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 378c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 388c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 398c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 408c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 418c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 428c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 438c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 448c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 458c2ecf20Sopenharmony_ci * 468c2ecf20Sopenharmony_ci */ 478c2ecf20Sopenharmony_ci 488c2ecf20Sopenharmony_ci#include <linux/slab.h> 498c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 508c2ecf20Sopenharmony_ci#include <rdma/ib_umem.h> 518c2ecf20Sopenharmony_ci#include <rdma/rdma_vt.h> 528c2ecf20Sopenharmony_ci#include "vt.h" 538c2ecf20Sopenharmony_ci#include "mr.h" 548c2ecf20Sopenharmony_ci#include "trace.h" 558c2ecf20Sopenharmony_ci 568c2ecf20Sopenharmony_ci/** 578c2ecf20Sopenharmony_ci * rvt_driver_mr_init - Init MR resources per driver 588c2ecf20Sopenharmony_ci * @rdi: rvt dev struct 598c2ecf20Sopenharmony_ci * 608c2ecf20Sopenharmony_ci * Do any intilization needed when a driver registers with rdmavt. 618c2ecf20Sopenharmony_ci * 628c2ecf20Sopenharmony_ci * Return: 0 on success or errno on failure 638c2ecf20Sopenharmony_ci */ 648c2ecf20Sopenharmony_ciint rvt_driver_mr_init(struct rvt_dev_info *rdi) 658c2ecf20Sopenharmony_ci{ 668c2ecf20Sopenharmony_ci unsigned int lkey_table_size = rdi->dparms.lkey_table_size; 678c2ecf20Sopenharmony_ci unsigned lk_tab_size; 688c2ecf20Sopenharmony_ci int i; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci /* 718c2ecf20Sopenharmony_ci * The top hfi1_lkey_table_size bits are used to index the 728c2ecf20Sopenharmony_ci * table. The lower 8 bits can be owned by the user (copied from 738c2ecf20Sopenharmony_ci * the LKEY). The remaining bits act as a generation number or tag. 748c2ecf20Sopenharmony_ci */ 758c2ecf20Sopenharmony_ci if (!lkey_table_size) 768c2ecf20Sopenharmony_ci return -EINVAL; 778c2ecf20Sopenharmony_ci 788c2ecf20Sopenharmony_ci spin_lock_init(&rdi->lkey_table.lock); 798c2ecf20Sopenharmony_ci 808c2ecf20Sopenharmony_ci /* ensure generation is at least 4 bits */ 818c2ecf20Sopenharmony_ci if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { 828c2ecf20Sopenharmony_ci rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", 838c2ecf20Sopenharmony_ci lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); 848c2ecf20Sopenharmony_ci rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; 858c2ecf20Sopenharmony_ci lkey_table_size = rdi->dparms.lkey_table_size; 868c2ecf20Sopenharmony_ci } 878c2ecf20Sopenharmony_ci rdi->lkey_table.max = 1 << lkey_table_size; 888c2ecf20Sopenharmony_ci rdi->lkey_table.shift = 32 - lkey_table_size; 898c2ecf20Sopenharmony_ci lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); 908c2ecf20Sopenharmony_ci rdi->lkey_table.table = (struct rvt_mregion __rcu **) 918c2ecf20Sopenharmony_ci vmalloc_node(lk_tab_size, rdi->dparms.node); 928c2ecf20Sopenharmony_ci if (!rdi->lkey_table.table) 938c2ecf20Sopenharmony_ci return -ENOMEM; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci RCU_INIT_POINTER(rdi->dma_mr, NULL); 968c2ecf20Sopenharmony_ci for (i = 0; i < rdi->lkey_table.max; i++) 978c2ecf20Sopenharmony_ci RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); 988c2ecf20Sopenharmony_ci 998c2ecf20Sopenharmony_ci rdi->dparms.props.max_mr = rdi->lkey_table.max; 1008c2ecf20Sopenharmony_ci return 0; 1018c2ecf20Sopenharmony_ci} 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci/** 1048c2ecf20Sopenharmony_ci *rvt_mr_exit: clean up MR 1058c2ecf20Sopenharmony_ci *@rdi: rvt dev structure 1068c2ecf20Sopenharmony_ci * 1078c2ecf20Sopenharmony_ci * called when drivers have unregistered or perhaps failed to register with us 1088c2ecf20Sopenharmony_ci */ 1098c2ecf20Sopenharmony_civoid rvt_mr_exit(struct rvt_dev_info *rdi) 1108c2ecf20Sopenharmony_ci{ 1118c2ecf20Sopenharmony_ci if (rdi->dma_mr) 1128c2ecf20Sopenharmony_ci rvt_pr_err(rdi, "DMA MR not null!\n"); 1138c2ecf20Sopenharmony_ci 1148c2ecf20Sopenharmony_ci vfree(rdi->lkey_table.table); 1158c2ecf20Sopenharmony_ci} 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_cistatic void rvt_deinit_mregion(struct rvt_mregion *mr) 1188c2ecf20Sopenharmony_ci{ 1198c2ecf20Sopenharmony_ci int i = mr->mapsz; 1208c2ecf20Sopenharmony_ci 1218c2ecf20Sopenharmony_ci mr->mapsz = 0; 1228c2ecf20Sopenharmony_ci while (i) 1238c2ecf20Sopenharmony_ci kfree(mr->map[--i]); 1248c2ecf20Sopenharmony_ci percpu_ref_exit(&mr->refcount); 1258c2ecf20Sopenharmony_ci} 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_cistatic void __rvt_mregion_complete(struct percpu_ref *ref) 1288c2ecf20Sopenharmony_ci{ 1298c2ecf20Sopenharmony_ci struct rvt_mregion *mr = container_of(ref, struct rvt_mregion, 1308c2ecf20Sopenharmony_ci refcount); 1318c2ecf20Sopenharmony_ci 1328c2ecf20Sopenharmony_ci complete(&mr->comp); 1338c2ecf20Sopenharmony_ci} 1348c2ecf20Sopenharmony_ci 1358c2ecf20Sopenharmony_cistatic int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, 1368c2ecf20Sopenharmony_ci int count, unsigned int percpu_flags) 1378c2ecf20Sopenharmony_ci{ 1388c2ecf20Sopenharmony_ci int m, i = 0; 1398c2ecf20Sopenharmony_ci struct rvt_dev_info *dev = ib_to_rvt(pd->device); 1408c2ecf20Sopenharmony_ci 1418c2ecf20Sopenharmony_ci mr->mapsz = 0; 1428c2ecf20Sopenharmony_ci m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 1438c2ecf20Sopenharmony_ci for (; i < m; i++) { 1448c2ecf20Sopenharmony_ci mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, 1458c2ecf20Sopenharmony_ci dev->dparms.node); 1468c2ecf20Sopenharmony_ci if (!mr->map[i]) 1478c2ecf20Sopenharmony_ci goto bail; 1488c2ecf20Sopenharmony_ci mr->mapsz++; 1498c2ecf20Sopenharmony_ci } 1508c2ecf20Sopenharmony_ci init_completion(&mr->comp); 1518c2ecf20Sopenharmony_ci /* count returning the ptr to user */ 1528c2ecf20Sopenharmony_ci if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete, 1538c2ecf20Sopenharmony_ci percpu_flags, GFP_KERNEL)) 1548c2ecf20Sopenharmony_ci goto bail; 1558c2ecf20Sopenharmony_ci 1568c2ecf20Sopenharmony_ci atomic_set(&mr->lkey_invalid, 0); 1578c2ecf20Sopenharmony_ci mr->pd = pd; 1588c2ecf20Sopenharmony_ci mr->max_segs = count; 1598c2ecf20Sopenharmony_ci return 0; 1608c2ecf20Sopenharmony_cibail: 1618c2ecf20Sopenharmony_ci rvt_deinit_mregion(mr); 1628c2ecf20Sopenharmony_ci return -ENOMEM; 1638c2ecf20Sopenharmony_ci} 1648c2ecf20Sopenharmony_ci 1658c2ecf20Sopenharmony_ci/** 1668c2ecf20Sopenharmony_ci * rvt_alloc_lkey - allocate an lkey 1678c2ecf20Sopenharmony_ci * @mr: memory region that this lkey protects 1688c2ecf20Sopenharmony_ci * @dma_region: 0->normal key, 1->restricted DMA key 1698c2ecf20Sopenharmony_ci * 1708c2ecf20Sopenharmony_ci * Returns 0 if successful, otherwise returns -errno. 1718c2ecf20Sopenharmony_ci * 1728c2ecf20Sopenharmony_ci * Increments mr reference count as required. 1738c2ecf20Sopenharmony_ci * 1748c2ecf20Sopenharmony_ci * Sets the lkey field mr for non-dma regions. 1758c2ecf20Sopenharmony_ci * 1768c2ecf20Sopenharmony_ci */ 1778c2ecf20Sopenharmony_cistatic int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) 1788c2ecf20Sopenharmony_ci{ 1798c2ecf20Sopenharmony_ci unsigned long flags; 1808c2ecf20Sopenharmony_ci u32 r; 1818c2ecf20Sopenharmony_ci u32 n; 1828c2ecf20Sopenharmony_ci int ret = 0; 1838c2ecf20Sopenharmony_ci struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 1848c2ecf20Sopenharmony_ci struct rvt_lkey_table *rkt = &dev->lkey_table; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci rvt_get_mr(mr); 1878c2ecf20Sopenharmony_ci spin_lock_irqsave(&rkt->lock, flags); 1888c2ecf20Sopenharmony_ci 1898c2ecf20Sopenharmony_ci /* special case for dma_mr lkey == 0 */ 1908c2ecf20Sopenharmony_ci if (dma_region) { 1918c2ecf20Sopenharmony_ci struct rvt_mregion *tmr; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci tmr = rcu_access_pointer(dev->dma_mr); 1948c2ecf20Sopenharmony_ci if (!tmr) { 1958c2ecf20Sopenharmony_ci mr->lkey_published = 1; 1968c2ecf20Sopenharmony_ci /* Insure published written first */ 1978c2ecf20Sopenharmony_ci rcu_assign_pointer(dev->dma_mr, mr); 1988c2ecf20Sopenharmony_ci rvt_get_mr(mr); 1998c2ecf20Sopenharmony_ci } 2008c2ecf20Sopenharmony_ci goto success; 2018c2ecf20Sopenharmony_ci } 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci /* Find the next available LKEY */ 2048c2ecf20Sopenharmony_ci r = rkt->next; 2058c2ecf20Sopenharmony_ci n = r; 2068c2ecf20Sopenharmony_ci for (;;) { 2078c2ecf20Sopenharmony_ci if (!rcu_access_pointer(rkt->table[r])) 2088c2ecf20Sopenharmony_ci break; 2098c2ecf20Sopenharmony_ci r = (r + 1) & (rkt->max - 1); 2108c2ecf20Sopenharmony_ci if (r == n) 2118c2ecf20Sopenharmony_ci goto bail; 2128c2ecf20Sopenharmony_ci } 2138c2ecf20Sopenharmony_ci rkt->next = (r + 1) & (rkt->max - 1); 2148c2ecf20Sopenharmony_ci /* 2158c2ecf20Sopenharmony_ci * Make sure lkey is never zero which is reserved to indicate an 2168c2ecf20Sopenharmony_ci * unrestricted LKEY. 2178c2ecf20Sopenharmony_ci */ 2188c2ecf20Sopenharmony_ci rkt->gen++; 2198c2ecf20Sopenharmony_ci /* 2208c2ecf20Sopenharmony_ci * bits are capped to ensure enough bits for generation number 2218c2ecf20Sopenharmony_ci */ 2228c2ecf20Sopenharmony_ci mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | 2238c2ecf20Sopenharmony_ci ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) 2248c2ecf20Sopenharmony_ci << 8); 2258c2ecf20Sopenharmony_ci if (mr->lkey == 0) { 2268c2ecf20Sopenharmony_ci mr->lkey |= 1 << 8; 2278c2ecf20Sopenharmony_ci rkt->gen++; 2288c2ecf20Sopenharmony_ci } 2298c2ecf20Sopenharmony_ci mr->lkey_published = 1; 2308c2ecf20Sopenharmony_ci /* Insure published written first */ 2318c2ecf20Sopenharmony_ci rcu_assign_pointer(rkt->table[r], mr); 2328c2ecf20Sopenharmony_cisuccess: 2338c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rkt->lock, flags); 2348c2ecf20Sopenharmony_ciout: 2358c2ecf20Sopenharmony_ci return ret; 2368c2ecf20Sopenharmony_cibail: 2378c2ecf20Sopenharmony_ci rvt_put_mr(mr); 2388c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rkt->lock, flags); 2398c2ecf20Sopenharmony_ci ret = -ENOMEM; 2408c2ecf20Sopenharmony_ci goto out; 2418c2ecf20Sopenharmony_ci} 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci/** 2448c2ecf20Sopenharmony_ci * rvt_free_lkey - free an lkey 2458c2ecf20Sopenharmony_ci * @mr: mr to free from tables 2468c2ecf20Sopenharmony_ci */ 2478c2ecf20Sopenharmony_cistatic void rvt_free_lkey(struct rvt_mregion *mr) 2488c2ecf20Sopenharmony_ci{ 2498c2ecf20Sopenharmony_ci unsigned long flags; 2508c2ecf20Sopenharmony_ci u32 lkey = mr->lkey; 2518c2ecf20Sopenharmony_ci u32 r; 2528c2ecf20Sopenharmony_ci struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 2538c2ecf20Sopenharmony_ci struct rvt_lkey_table *rkt = &dev->lkey_table; 2548c2ecf20Sopenharmony_ci int freed = 0; 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci spin_lock_irqsave(&rkt->lock, flags); 2578c2ecf20Sopenharmony_ci if (!lkey) { 2588c2ecf20Sopenharmony_ci if (mr->lkey_published) { 2598c2ecf20Sopenharmony_ci mr->lkey_published = 0; 2608c2ecf20Sopenharmony_ci /* insure published is written before pointer */ 2618c2ecf20Sopenharmony_ci rcu_assign_pointer(dev->dma_mr, NULL); 2628c2ecf20Sopenharmony_ci rvt_put_mr(mr); 2638c2ecf20Sopenharmony_ci } 2648c2ecf20Sopenharmony_ci } else { 2658c2ecf20Sopenharmony_ci if (!mr->lkey_published) 2668c2ecf20Sopenharmony_ci goto out; 2678c2ecf20Sopenharmony_ci r = lkey >> (32 - dev->dparms.lkey_table_size); 2688c2ecf20Sopenharmony_ci mr->lkey_published = 0; 2698c2ecf20Sopenharmony_ci /* insure published is written before pointer */ 2708c2ecf20Sopenharmony_ci rcu_assign_pointer(rkt->table[r], NULL); 2718c2ecf20Sopenharmony_ci } 2728c2ecf20Sopenharmony_ci freed++; 2738c2ecf20Sopenharmony_ciout: 2748c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&rkt->lock, flags); 2758c2ecf20Sopenharmony_ci if (freed) 2768c2ecf20Sopenharmony_ci percpu_ref_kill(&mr->refcount); 2778c2ecf20Sopenharmony_ci} 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_cistatic struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) 2808c2ecf20Sopenharmony_ci{ 2818c2ecf20Sopenharmony_ci struct rvt_mr *mr; 2828c2ecf20Sopenharmony_ci int rval = -ENOMEM; 2838c2ecf20Sopenharmony_ci int m; 2848c2ecf20Sopenharmony_ci 2858c2ecf20Sopenharmony_ci /* Allocate struct plus pointers to first level page tables. */ 2868c2ecf20Sopenharmony_ci m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 2878c2ecf20Sopenharmony_ci mr = kzalloc(struct_size(mr, mr.map, m), GFP_KERNEL); 2888c2ecf20Sopenharmony_ci if (!mr) 2898c2ecf20Sopenharmony_ci goto bail; 2908c2ecf20Sopenharmony_ci 2918c2ecf20Sopenharmony_ci rval = rvt_init_mregion(&mr->mr, pd, count, 0); 2928c2ecf20Sopenharmony_ci if (rval) 2938c2ecf20Sopenharmony_ci goto bail; 2948c2ecf20Sopenharmony_ci /* 2958c2ecf20Sopenharmony_ci * ib_reg_phys_mr() will initialize mr->ibmr except for 2968c2ecf20Sopenharmony_ci * lkey and rkey. 2978c2ecf20Sopenharmony_ci */ 2988c2ecf20Sopenharmony_ci rval = rvt_alloc_lkey(&mr->mr, 0); 2998c2ecf20Sopenharmony_ci if (rval) 3008c2ecf20Sopenharmony_ci goto bail_mregion; 3018c2ecf20Sopenharmony_ci mr->ibmr.lkey = mr->mr.lkey; 3028c2ecf20Sopenharmony_ci mr->ibmr.rkey = mr->mr.lkey; 3038c2ecf20Sopenharmony_cidone: 3048c2ecf20Sopenharmony_ci return mr; 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_cibail_mregion: 3078c2ecf20Sopenharmony_ci rvt_deinit_mregion(&mr->mr); 3088c2ecf20Sopenharmony_cibail: 3098c2ecf20Sopenharmony_ci kfree(mr); 3108c2ecf20Sopenharmony_ci mr = ERR_PTR(rval); 3118c2ecf20Sopenharmony_ci goto done; 3128c2ecf20Sopenharmony_ci} 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_cistatic void __rvt_free_mr(struct rvt_mr *mr) 3158c2ecf20Sopenharmony_ci{ 3168c2ecf20Sopenharmony_ci rvt_free_lkey(&mr->mr); 3178c2ecf20Sopenharmony_ci rvt_deinit_mregion(&mr->mr); 3188c2ecf20Sopenharmony_ci kfree(mr); 3198c2ecf20Sopenharmony_ci} 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci/** 3228c2ecf20Sopenharmony_ci * rvt_get_dma_mr - get a DMA memory region 3238c2ecf20Sopenharmony_ci * @pd: protection domain for this memory region 3248c2ecf20Sopenharmony_ci * @acc: access flags 3258c2ecf20Sopenharmony_ci * 3268c2ecf20Sopenharmony_ci * Return: the memory region on success, otherwise returns an errno. 3278c2ecf20Sopenharmony_ci */ 3288c2ecf20Sopenharmony_cistruct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) 3298c2ecf20Sopenharmony_ci{ 3308c2ecf20Sopenharmony_ci struct rvt_mr *mr; 3318c2ecf20Sopenharmony_ci struct ib_mr *ret; 3328c2ecf20Sopenharmony_ci int rval; 3338c2ecf20Sopenharmony_ci 3348c2ecf20Sopenharmony_ci if (ibpd_to_rvtpd(pd)->user) 3358c2ecf20Sopenharmony_ci return ERR_PTR(-EPERM); 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci mr = kzalloc(sizeof(*mr), GFP_KERNEL); 3388c2ecf20Sopenharmony_ci if (!mr) { 3398c2ecf20Sopenharmony_ci ret = ERR_PTR(-ENOMEM); 3408c2ecf20Sopenharmony_ci goto bail; 3418c2ecf20Sopenharmony_ci } 3428c2ecf20Sopenharmony_ci 3438c2ecf20Sopenharmony_ci rval = rvt_init_mregion(&mr->mr, pd, 0, 0); 3448c2ecf20Sopenharmony_ci if (rval) { 3458c2ecf20Sopenharmony_ci ret = ERR_PTR(rval); 3468c2ecf20Sopenharmony_ci goto bail; 3478c2ecf20Sopenharmony_ci } 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci rval = rvt_alloc_lkey(&mr->mr, 1); 3508c2ecf20Sopenharmony_ci if (rval) { 3518c2ecf20Sopenharmony_ci ret = ERR_PTR(rval); 3528c2ecf20Sopenharmony_ci goto bail_mregion; 3538c2ecf20Sopenharmony_ci } 3548c2ecf20Sopenharmony_ci 3558c2ecf20Sopenharmony_ci mr->mr.access_flags = acc; 3568c2ecf20Sopenharmony_ci ret = &mr->ibmr; 3578c2ecf20Sopenharmony_cidone: 3588c2ecf20Sopenharmony_ci return ret; 3598c2ecf20Sopenharmony_ci 3608c2ecf20Sopenharmony_cibail_mregion: 3618c2ecf20Sopenharmony_ci rvt_deinit_mregion(&mr->mr); 3628c2ecf20Sopenharmony_cibail: 3638c2ecf20Sopenharmony_ci kfree(mr); 3648c2ecf20Sopenharmony_ci goto done; 3658c2ecf20Sopenharmony_ci} 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci/** 3688c2ecf20Sopenharmony_ci * rvt_reg_user_mr - register a userspace memory region 3698c2ecf20Sopenharmony_ci * @pd: protection domain for this memory region 3708c2ecf20Sopenharmony_ci * @start: starting userspace address 3718c2ecf20Sopenharmony_ci * @length: length of region to register 3728c2ecf20Sopenharmony_ci * @mr_access_flags: access flags for this memory region 3738c2ecf20Sopenharmony_ci * @udata: unused by the driver 3748c2ecf20Sopenharmony_ci * 3758c2ecf20Sopenharmony_ci * Return: the memory region on success, otherwise returns an errno. 3768c2ecf20Sopenharmony_ci */ 3778c2ecf20Sopenharmony_cistruct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 3788c2ecf20Sopenharmony_ci u64 virt_addr, int mr_access_flags, 3798c2ecf20Sopenharmony_ci struct ib_udata *udata) 3808c2ecf20Sopenharmony_ci{ 3818c2ecf20Sopenharmony_ci struct rvt_mr *mr; 3828c2ecf20Sopenharmony_ci struct ib_umem *umem; 3838c2ecf20Sopenharmony_ci struct sg_page_iter sg_iter; 3848c2ecf20Sopenharmony_ci int n, m; 3858c2ecf20Sopenharmony_ci struct ib_mr *ret; 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci if (length == 0) 3888c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci umem = ib_umem_get(pd->device, start, length, mr_access_flags); 3918c2ecf20Sopenharmony_ci if (IS_ERR(umem)) 3928c2ecf20Sopenharmony_ci return (void *)umem; 3938c2ecf20Sopenharmony_ci 3948c2ecf20Sopenharmony_ci n = ib_umem_num_pages(umem); 3958c2ecf20Sopenharmony_ci 3968c2ecf20Sopenharmony_ci mr = __rvt_alloc_mr(n, pd); 3978c2ecf20Sopenharmony_ci if (IS_ERR(mr)) { 3988c2ecf20Sopenharmony_ci ret = (struct ib_mr *)mr; 3998c2ecf20Sopenharmony_ci goto bail_umem; 4008c2ecf20Sopenharmony_ci } 4018c2ecf20Sopenharmony_ci 4028c2ecf20Sopenharmony_ci mr->mr.user_base = start; 4038c2ecf20Sopenharmony_ci mr->mr.iova = virt_addr; 4048c2ecf20Sopenharmony_ci mr->mr.length = length; 4058c2ecf20Sopenharmony_ci mr->mr.offset = ib_umem_offset(umem); 4068c2ecf20Sopenharmony_ci mr->mr.access_flags = mr_access_flags; 4078c2ecf20Sopenharmony_ci mr->umem = umem; 4088c2ecf20Sopenharmony_ci 4098c2ecf20Sopenharmony_ci mr->mr.page_shift = PAGE_SHIFT; 4108c2ecf20Sopenharmony_ci m = 0; 4118c2ecf20Sopenharmony_ci n = 0; 4128c2ecf20Sopenharmony_ci for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) { 4138c2ecf20Sopenharmony_ci void *vaddr; 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci vaddr = page_address(sg_page_iter_page(&sg_iter)); 4168c2ecf20Sopenharmony_ci if (!vaddr) { 4178c2ecf20Sopenharmony_ci ret = ERR_PTR(-EINVAL); 4188c2ecf20Sopenharmony_ci goto bail_inval; 4198c2ecf20Sopenharmony_ci } 4208c2ecf20Sopenharmony_ci mr->mr.map[m]->segs[n].vaddr = vaddr; 4218c2ecf20Sopenharmony_ci mr->mr.map[m]->segs[n].length = PAGE_SIZE; 4228c2ecf20Sopenharmony_ci trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, PAGE_SIZE); 4238c2ecf20Sopenharmony_ci if (++n == RVT_SEGSZ) { 4248c2ecf20Sopenharmony_ci m++; 4258c2ecf20Sopenharmony_ci n = 0; 4268c2ecf20Sopenharmony_ci } 4278c2ecf20Sopenharmony_ci } 4288c2ecf20Sopenharmony_ci return &mr->ibmr; 4298c2ecf20Sopenharmony_ci 4308c2ecf20Sopenharmony_cibail_inval: 4318c2ecf20Sopenharmony_ci __rvt_free_mr(mr); 4328c2ecf20Sopenharmony_ci 4338c2ecf20Sopenharmony_cibail_umem: 4348c2ecf20Sopenharmony_ci ib_umem_release(umem); 4358c2ecf20Sopenharmony_ci 4368c2ecf20Sopenharmony_ci return ret; 4378c2ecf20Sopenharmony_ci} 4388c2ecf20Sopenharmony_ci 4398c2ecf20Sopenharmony_ci/** 4408c2ecf20Sopenharmony_ci * rvt_dereg_clean_qp_cb - callback from iterator 4418c2ecf20Sopenharmony_ci * @qp - the qp 4428c2ecf20Sopenharmony_ci * @v - the mregion (as u64) 4438c2ecf20Sopenharmony_ci * 4448c2ecf20Sopenharmony_ci * This routine fields the callback for all QPs and 4458c2ecf20Sopenharmony_ci * for QPs in the same PD as the MR will call the 4468c2ecf20Sopenharmony_ci * rvt_qp_mr_clean() to potentially cleanup references. 4478c2ecf20Sopenharmony_ci */ 4488c2ecf20Sopenharmony_cistatic void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v) 4498c2ecf20Sopenharmony_ci{ 4508c2ecf20Sopenharmony_ci struct rvt_mregion *mr = (struct rvt_mregion *)v; 4518c2ecf20Sopenharmony_ci 4528c2ecf20Sopenharmony_ci /* skip PDs that are not ours */ 4538c2ecf20Sopenharmony_ci if (mr->pd != qp->ibqp.pd) 4548c2ecf20Sopenharmony_ci return; 4558c2ecf20Sopenharmony_ci rvt_qp_mr_clean(qp, mr->lkey); 4568c2ecf20Sopenharmony_ci} 4578c2ecf20Sopenharmony_ci 4588c2ecf20Sopenharmony_ci/** 4598c2ecf20Sopenharmony_ci * rvt_dereg_clean_qps - find QPs for reference cleanup 4608c2ecf20Sopenharmony_ci * @mr - the MR that is being deregistered 4618c2ecf20Sopenharmony_ci * 4628c2ecf20Sopenharmony_ci * This routine iterates RC QPs looking for references 4638c2ecf20Sopenharmony_ci * to the lkey noted in mr. 4648c2ecf20Sopenharmony_ci */ 4658c2ecf20Sopenharmony_cistatic void rvt_dereg_clean_qps(struct rvt_mregion *mr) 4668c2ecf20Sopenharmony_ci{ 4678c2ecf20Sopenharmony_ci struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb); 4708c2ecf20Sopenharmony_ci} 4718c2ecf20Sopenharmony_ci 4728c2ecf20Sopenharmony_ci/** 4738c2ecf20Sopenharmony_ci * rvt_check_refs - check references 4748c2ecf20Sopenharmony_ci * @mr - the megion 4758c2ecf20Sopenharmony_ci * @t - the caller identification 4768c2ecf20Sopenharmony_ci * 4778c2ecf20Sopenharmony_ci * This routine checks MRs holding a reference during 4788c2ecf20Sopenharmony_ci * when being de-registered. 4798c2ecf20Sopenharmony_ci * 4808c2ecf20Sopenharmony_ci * If the count is non-zero, the code calls a clean routine then 4818c2ecf20Sopenharmony_ci * waits for the timeout for the count to zero. 4828c2ecf20Sopenharmony_ci */ 4838c2ecf20Sopenharmony_cistatic int rvt_check_refs(struct rvt_mregion *mr, const char *t) 4848c2ecf20Sopenharmony_ci{ 4858c2ecf20Sopenharmony_ci unsigned long timeout; 4868c2ecf20Sopenharmony_ci struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device); 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci if (mr->lkey) { 4898c2ecf20Sopenharmony_ci /* avoid dma mr */ 4908c2ecf20Sopenharmony_ci rvt_dereg_clean_qps(mr); 4918c2ecf20Sopenharmony_ci /* @mr was indexed on rcu protected @lkey_table */ 4928c2ecf20Sopenharmony_ci synchronize_rcu(); 4938c2ecf20Sopenharmony_ci } 4948c2ecf20Sopenharmony_ci 4958c2ecf20Sopenharmony_ci timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ); 4968c2ecf20Sopenharmony_ci if (!timeout) { 4978c2ecf20Sopenharmony_ci rvt_pr_err(rdi, 4988c2ecf20Sopenharmony_ci "%s timeout mr %p pd %p lkey %x refcount %ld\n", 4998c2ecf20Sopenharmony_ci t, mr, mr->pd, mr->lkey, 5008c2ecf20Sopenharmony_ci atomic_long_read(&mr->refcount.data->count)); 5018c2ecf20Sopenharmony_ci rvt_get_mr(mr); 5028c2ecf20Sopenharmony_ci return -EBUSY; 5038c2ecf20Sopenharmony_ci } 5048c2ecf20Sopenharmony_ci return 0; 5058c2ecf20Sopenharmony_ci} 5068c2ecf20Sopenharmony_ci 5078c2ecf20Sopenharmony_ci/** 5088c2ecf20Sopenharmony_ci * rvt_mr_has_lkey - is MR 5098c2ecf20Sopenharmony_ci * @mr - the mregion 5108c2ecf20Sopenharmony_ci * @lkey - the lkey 5118c2ecf20Sopenharmony_ci */ 5128c2ecf20Sopenharmony_cibool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey) 5138c2ecf20Sopenharmony_ci{ 5148c2ecf20Sopenharmony_ci return mr && lkey == mr->lkey; 5158c2ecf20Sopenharmony_ci} 5168c2ecf20Sopenharmony_ci 5178c2ecf20Sopenharmony_ci/** 5188c2ecf20Sopenharmony_ci * rvt_ss_has_lkey - is mr in sge tests 5198c2ecf20Sopenharmony_ci * @ss - the sge state 5208c2ecf20Sopenharmony_ci * @lkey 5218c2ecf20Sopenharmony_ci * 5228c2ecf20Sopenharmony_ci * This code tests for an MR in the indicated 5238c2ecf20Sopenharmony_ci * sge state. 5248c2ecf20Sopenharmony_ci */ 5258c2ecf20Sopenharmony_cibool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey) 5268c2ecf20Sopenharmony_ci{ 5278c2ecf20Sopenharmony_ci int i; 5288c2ecf20Sopenharmony_ci bool rval = false; 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci if (!ss->num_sge) 5318c2ecf20Sopenharmony_ci return rval; 5328c2ecf20Sopenharmony_ci /* first one */ 5338c2ecf20Sopenharmony_ci rval = rvt_mr_has_lkey(ss->sge.mr, lkey); 5348c2ecf20Sopenharmony_ci /* any others */ 5358c2ecf20Sopenharmony_ci for (i = 0; !rval && i < ss->num_sge - 1; i++) 5368c2ecf20Sopenharmony_ci rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey); 5378c2ecf20Sopenharmony_ci return rval; 5388c2ecf20Sopenharmony_ci} 5398c2ecf20Sopenharmony_ci 5408c2ecf20Sopenharmony_ci/** 5418c2ecf20Sopenharmony_ci * rvt_dereg_mr - unregister and free a memory region 5428c2ecf20Sopenharmony_ci * @ibmr: the memory region to free 5438c2ecf20Sopenharmony_ci * 5448c2ecf20Sopenharmony_ci * 5458c2ecf20Sopenharmony_ci * Note that this is called to free MRs created by rvt_get_dma_mr() 5468c2ecf20Sopenharmony_ci * or rvt_reg_user_mr(). 5478c2ecf20Sopenharmony_ci * 5488c2ecf20Sopenharmony_ci * Returns 0 on success. 5498c2ecf20Sopenharmony_ci */ 5508c2ecf20Sopenharmony_ciint rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 5518c2ecf20Sopenharmony_ci{ 5528c2ecf20Sopenharmony_ci struct rvt_mr *mr = to_imr(ibmr); 5538c2ecf20Sopenharmony_ci int ret; 5548c2ecf20Sopenharmony_ci 5558c2ecf20Sopenharmony_ci rvt_free_lkey(&mr->mr); 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci rvt_put_mr(&mr->mr); /* will set completion if last */ 5588c2ecf20Sopenharmony_ci ret = rvt_check_refs(&mr->mr, __func__); 5598c2ecf20Sopenharmony_ci if (ret) 5608c2ecf20Sopenharmony_ci goto out; 5618c2ecf20Sopenharmony_ci rvt_deinit_mregion(&mr->mr); 5628c2ecf20Sopenharmony_ci ib_umem_release(mr->umem); 5638c2ecf20Sopenharmony_ci kfree(mr); 5648c2ecf20Sopenharmony_ciout: 5658c2ecf20Sopenharmony_ci return ret; 5668c2ecf20Sopenharmony_ci} 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci/** 5698c2ecf20Sopenharmony_ci * rvt_alloc_mr - Allocate a memory region usable with the 5708c2ecf20Sopenharmony_ci * @pd: protection domain for this memory region 5718c2ecf20Sopenharmony_ci * @mr_type: mem region type 5728c2ecf20Sopenharmony_ci * @max_num_sg: Max number of segments allowed 5738c2ecf20Sopenharmony_ci * 5748c2ecf20Sopenharmony_ci * Return: the memory region on success, otherwise return an errno. 5758c2ecf20Sopenharmony_ci */ 5768c2ecf20Sopenharmony_cistruct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 5778c2ecf20Sopenharmony_ci u32 max_num_sg) 5788c2ecf20Sopenharmony_ci{ 5798c2ecf20Sopenharmony_ci struct rvt_mr *mr; 5808c2ecf20Sopenharmony_ci 5818c2ecf20Sopenharmony_ci if (mr_type != IB_MR_TYPE_MEM_REG) 5828c2ecf20Sopenharmony_ci return ERR_PTR(-EINVAL); 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_ci mr = __rvt_alloc_mr(max_num_sg, pd); 5858c2ecf20Sopenharmony_ci if (IS_ERR(mr)) 5868c2ecf20Sopenharmony_ci return (struct ib_mr *)mr; 5878c2ecf20Sopenharmony_ci 5888c2ecf20Sopenharmony_ci return &mr->ibmr; 5898c2ecf20Sopenharmony_ci} 5908c2ecf20Sopenharmony_ci 5918c2ecf20Sopenharmony_ci/** 5928c2ecf20Sopenharmony_ci * rvt_set_page - page assignment function called by ib_sg_to_pages 5938c2ecf20Sopenharmony_ci * @ibmr: memory region 5948c2ecf20Sopenharmony_ci * @addr: dma address of mapped page 5958c2ecf20Sopenharmony_ci * 5968c2ecf20Sopenharmony_ci * Return: 0 on success 5978c2ecf20Sopenharmony_ci */ 5988c2ecf20Sopenharmony_cistatic int rvt_set_page(struct ib_mr *ibmr, u64 addr) 5998c2ecf20Sopenharmony_ci{ 6008c2ecf20Sopenharmony_ci struct rvt_mr *mr = to_imr(ibmr); 6018c2ecf20Sopenharmony_ci u32 ps = 1 << mr->mr.page_shift; 6028c2ecf20Sopenharmony_ci u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; 6038c2ecf20Sopenharmony_ci int m, n; 6048c2ecf20Sopenharmony_ci 6058c2ecf20Sopenharmony_ci if (unlikely(mapped_segs == mr->mr.max_segs)) 6068c2ecf20Sopenharmony_ci return -ENOMEM; 6078c2ecf20Sopenharmony_ci 6088c2ecf20Sopenharmony_ci m = mapped_segs / RVT_SEGSZ; 6098c2ecf20Sopenharmony_ci n = mapped_segs % RVT_SEGSZ; 6108c2ecf20Sopenharmony_ci mr->mr.map[m]->segs[n].vaddr = (void *)addr; 6118c2ecf20Sopenharmony_ci mr->mr.map[m]->segs[n].length = ps; 6128c2ecf20Sopenharmony_ci mr->mr.length += ps; 6138c2ecf20Sopenharmony_ci trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); 6148c2ecf20Sopenharmony_ci 6158c2ecf20Sopenharmony_ci return 0; 6168c2ecf20Sopenharmony_ci} 6178c2ecf20Sopenharmony_ci 6188c2ecf20Sopenharmony_ci/** 6198c2ecf20Sopenharmony_ci * rvt_map_mr_sg - map sg list and set it the memory region 6208c2ecf20Sopenharmony_ci * @ibmr: memory region 6218c2ecf20Sopenharmony_ci * @sg: dma mapped scatterlist 6228c2ecf20Sopenharmony_ci * @sg_nents: number of entries in sg 6238c2ecf20Sopenharmony_ci * @sg_offset: offset in bytes into sg 6248c2ecf20Sopenharmony_ci * 6258c2ecf20Sopenharmony_ci * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages. 6268c2ecf20Sopenharmony_ci * 6278c2ecf20Sopenharmony_ci * Return: number of sg elements mapped to the memory region 6288c2ecf20Sopenharmony_ci */ 6298c2ecf20Sopenharmony_ciint rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 6308c2ecf20Sopenharmony_ci int sg_nents, unsigned int *sg_offset) 6318c2ecf20Sopenharmony_ci{ 6328c2ecf20Sopenharmony_ci struct rvt_mr *mr = to_imr(ibmr); 6338c2ecf20Sopenharmony_ci int ret; 6348c2ecf20Sopenharmony_ci 6358c2ecf20Sopenharmony_ci mr->mr.length = 0; 6368c2ecf20Sopenharmony_ci mr->mr.page_shift = PAGE_SHIFT; 6378c2ecf20Sopenharmony_ci ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page); 6388c2ecf20Sopenharmony_ci mr->mr.user_base = ibmr->iova; 6398c2ecf20Sopenharmony_ci mr->mr.iova = ibmr->iova; 6408c2ecf20Sopenharmony_ci mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr; 6418c2ecf20Sopenharmony_ci mr->mr.length = (size_t)ibmr->length; 6428c2ecf20Sopenharmony_ci trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset); 6438c2ecf20Sopenharmony_ci return ret; 6448c2ecf20Sopenharmony_ci} 6458c2ecf20Sopenharmony_ci 6468c2ecf20Sopenharmony_ci/** 6478c2ecf20Sopenharmony_ci * rvt_fast_reg_mr - fast register physical MR 6488c2ecf20Sopenharmony_ci * @qp: the queue pair where the work request comes from 6498c2ecf20Sopenharmony_ci * @ibmr: the memory region to be registered 6508c2ecf20Sopenharmony_ci * @key: updated key for this memory region 6518c2ecf20Sopenharmony_ci * @access: access flags for this memory region 6528c2ecf20Sopenharmony_ci * 6538c2ecf20Sopenharmony_ci * Returns 0 on success. 6548c2ecf20Sopenharmony_ci */ 6558c2ecf20Sopenharmony_ciint rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, 6568c2ecf20Sopenharmony_ci int access) 6578c2ecf20Sopenharmony_ci{ 6588c2ecf20Sopenharmony_ci struct rvt_mr *mr = to_imr(ibmr); 6598c2ecf20Sopenharmony_ci 6608c2ecf20Sopenharmony_ci if (qp->ibqp.pd != mr->mr.pd) 6618c2ecf20Sopenharmony_ci return -EACCES; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci /* not applicable to dma MR or user MR */ 6648c2ecf20Sopenharmony_ci if (!mr->mr.lkey || mr->umem) 6658c2ecf20Sopenharmony_ci return -EINVAL; 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) 6688c2ecf20Sopenharmony_ci return -EINVAL; 6698c2ecf20Sopenharmony_ci 6708c2ecf20Sopenharmony_ci ibmr->lkey = key; 6718c2ecf20Sopenharmony_ci ibmr->rkey = key; 6728c2ecf20Sopenharmony_ci mr->mr.lkey = key; 6738c2ecf20Sopenharmony_ci mr->mr.access_flags = access; 6748c2ecf20Sopenharmony_ci mr->mr.iova = ibmr->iova; 6758c2ecf20Sopenharmony_ci atomic_set(&mr->mr.lkey_invalid, 0); 6768c2ecf20Sopenharmony_ci 6778c2ecf20Sopenharmony_ci return 0; 6788c2ecf20Sopenharmony_ci} 6798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_fast_reg_mr); 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci/** 6828c2ecf20Sopenharmony_ci * rvt_invalidate_rkey - invalidate an MR rkey 6838c2ecf20Sopenharmony_ci * @qp: queue pair associated with the invalidate op 6848c2ecf20Sopenharmony_ci * @rkey: rkey to invalidate 6858c2ecf20Sopenharmony_ci * 6868c2ecf20Sopenharmony_ci * Returns 0 on success. 6878c2ecf20Sopenharmony_ci */ 6888c2ecf20Sopenharmony_ciint rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) 6898c2ecf20Sopenharmony_ci{ 6908c2ecf20Sopenharmony_ci struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 6918c2ecf20Sopenharmony_ci struct rvt_lkey_table *rkt = &dev->lkey_table; 6928c2ecf20Sopenharmony_ci struct rvt_mregion *mr; 6938c2ecf20Sopenharmony_ci 6948c2ecf20Sopenharmony_ci if (rkey == 0) 6958c2ecf20Sopenharmony_ci return -EINVAL; 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci rcu_read_lock(); 6988c2ecf20Sopenharmony_ci mr = rcu_dereference( 6998c2ecf20Sopenharmony_ci rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 7008c2ecf20Sopenharmony_ci if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 7018c2ecf20Sopenharmony_ci goto bail; 7028c2ecf20Sopenharmony_ci 7038c2ecf20Sopenharmony_ci atomic_set(&mr->lkey_invalid, 1); 7048c2ecf20Sopenharmony_ci rcu_read_unlock(); 7058c2ecf20Sopenharmony_ci return 0; 7068c2ecf20Sopenharmony_ci 7078c2ecf20Sopenharmony_cibail: 7088c2ecf20Sopenharmony_ci rcu_read_unlock(); 7098c2ecf20Sopenharmony_ci return -EINVAL; 7108c2ecf20Sopenharmony_ci} 7118c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_invalidate_rkey); 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci/** 7148c2ecf20Sopenharmony_ci * rvt_sge_adjacent - is isge compressible 7158c2ecf20Sopenharmony_ci * @last_sge: last outgoing SGE written 7168c2ecf20Sopenharmony_ci * @sge: SGE to check 7178c2ecf20Sopenharmony_ci * 7188c2ecf20Sopenharmony_ci * If adjacent will update last_sge to add length. 7198c2ecf20Sopenharmony_ci * 7208c2ecf20Sopenharmony_ci * Return: true if isge is adjacent to last sge 7218c2ecf20Sopenharmony_ci */ 7228c2ecf20Sopenharmony_cistatic inline bool rvt_sge_adjacent(struct rvt_sge *last_sge, 7238c2ecf20Sopenharmony_ci struct ib_sge *sge) 7248c2ecf20Sopenharmony_ci{ 7258c2ecf20Sopenharmony_ci if (last_sge && sge->lkey == last_sge->mr->lkey && 7268c2ecf20Sopenharmony_ci ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) { 7278c2ecf20Sopenharmony_ci if (sge->lkey) { 7288c2ecf20Sopenharmony_ci if (unlikely((sge->addr - last_sge->mr->user_base + 7298c2ecf20Sopenharmony_ci sge->length > last_sge->mr->length))) 7308c2ecf20Sopenharmony_ci return false; /* overrun, caller will catch */ 7318c2ecf20Sopenharmony_ci } else { 7328c2ecf20Sopenharmony_ci last_sge->length += sge->length; 7338c2ecf20Sopenharmony_ci } 7348c2ecf20Sopenharmony_ci last_sge->sge_length += sge->length; 7358c2ecf20Sopenharmony_ci trace_rvt_sge_adjacent(last_sge, sge); 7368c2ecf20Sopenharmony_ci return true; 7378c2ecf20Sopenharmony_ci } 7388c2ecf20Sopenharmony_ci return false; 7398c2ecf20Sopenharmony_ci} 7408c2ecf20Sopenharmony_ci 7418c2ecf20Sopenharmony_ci/** 7428c2ecf20Sopenharmony_ci * rvt_lkey_ok - check IB SGE for validity and initialize 7438c2ecf20Sopenharmony_ci * @rkt: table containing lkey to check SGE against 7448c2ecf20Sopenharmony_ci * @pd: protection domain 7458c2ecf20Sopenharmony_ci * @isge: outgoing internal SGE 7468c2ecf20Sopenharmony_ci * @last_sge: last outgoing SGE written 7478c2ecf20Sopenharmony_ci * @sge: SGE to check 7488c2ecf20Sopenharmony_ci * @acc: access flags 7498c2ecf20Sopenharmony_ci * 7508c2ecf20Sopenharmony_ci * Check the IB SGE for validity and initialize our internal version 7518c2ecf20Sopenharmony_ci * of it. 7528c2ecf20Sopenharmony_ci * 7538c2ecf20Sopenharmony_ci * Increments the reference count when a new sge is stored. 7548c2ecf20Sopenharmony_ci * 7558c2ecf20Sopenharmony_ci * Return: 0 if compressed, 1 if added , otherwise returns -errno. 7568c2ecf20Sopenharmony_ci */ 7578c2ecf20Sopenharmony_ciint rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, 7588c2ecf20Sopenharmony_ci struct rvt_sge *isge, struct rvt_sge *last_sge, 7598c2ecf20Sopenharmony_ci struct ib_sge *sge, int acc) 7608c2ecf20Sopenharmony_ci{ 7618c2ecf20Sopenharmony_ci struct rvt_mregion *mr; 7628c2ecf20Sopenharmony_ci unsigned n, m; 7638c2ecf20Sopenharmony_ci size_t off; 7648c2ecf20Sopenharmony_ci 7658c2ecf20Sopenharmony_ci /* 7668c2ecf20Sopenharmony_ci * We use LKEY == zero for kernel virtual addresses 7678c2ecf20Sopenharmony_ci * (see rvt_get_dma_mr()). 7688c2ecf20Sopenharmony_ci */ 7698c2ecf20Sopenharmony_ci if (sge->lkey == 0) { 7708c2ecf20Sopenharmony_ci struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); 7718c2ecf20Sopenharmony_ci 7728c2ecf20Sopenharmony_ci if (pd->user) 7738c2ecf20Sopenharmony_ci return -EINVAL; 7748c2ecf20Sopenharmony_ci if (rvt_sge_adjacent(last_sge, sge)) 7758c2ecf20Sopenharmony_ci return 0; 7768c2ecf20Sopenharmony_ci rcu_read_lock(); 7778c2ecf20Sopenharmony_ci mr = rcu_dereference(dev->dma_mr); 7788c2ecf20Sopenharmony_ci if (!mr) 7798c2ecf20Sopenharmony_ci goto bail; 7808c2ecf20Sopenharmony_ci rvt_get_mr(mr); 7818c2ecf20Sopenharmony_ci rcu_read_unlock(); 7828c2ecf20Sopenharmony_ci 7838c2ecf20Sopenharmony_ci isge->mr = mr; 7848c2ecf20Sopenharmony_ci isge->vaddr = (void *)sge->addr; 7858c2ecf20Sopenharmony_ci isge->length = sge->length; 7868c2ecf20Sopenharmony_ci isge->sge_length = sge->length; 7878c2ecf20Sopenharmony_ci isge->m = 0; 7888c2ecf20Sopenharmony_ci isge->n = 0; 7898c2ecf20Sopenharmony_ci goto ok; 7908c2ecf20Sopenharmony_ci } 7918c2ecf20Sopenharmony_ci if (rvt_sge_adjacent(last_sge, sge)) 7928c2ecf20Sopenharmony_ci return 0; 7938c2ecf20Sopenharmony_ci rcu_read_lock(); 7948c2ecf20Sopenharmony_ci mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); 7958c2ecf20Sopenharmony_ci if (!mr) 7968c2ecf20Sopenharmony_ci goto bail; 7978c2ecf20Sopenharmony_ci rvt_get_mr(mr); 7988c2ecf20Sopenharmony_ci if (!READ_ONCE(mr->lkey_published)) 7998c2ecf20Sopenharmony_ci goto bail_unref; 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci if (unlikely(atomic_read(&mr->lkey_invalid) || 8028c2ecf20Sopenharmony_ci mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 8038c2ecf20Sopenharmony_ci goto bail_unref; 8048c2ecf20Sopenharmony_ci 8058c2ecf20Sopenharmony_ci off = sge->addr - mr->user_base; 8068c2ecf20Sopenharmony_ci if (unlikely(sge->addr < mr->user_base || 8078c2ecf20Sopenharmony_ci off + sge->length > mr->length || 8088c2ecf20Sopenharmony_ci (mr->access_flags & acc) != acc)) 8098c2ecf20Sopenharmony_ci goto bail_unref; 8108c2ecf20Sopenharmony_ci rcu_read_unlock(); 8118c2ecf20Sopenharmony_ci 8128c2ecf20Sopenharmony_ci off += mr->offset; 8138c2ecf20Sopenharmony_ci if (mr->page_shift) { 8148c2ecf20Sopenharmony_ci /* 8158c2ecf20Sopenharmony_ci * page sizes are uniform power of 2 so no loop is necessary 8168c2ecf20Sopenharmony_ci * entries_spanned_by_off is the number of times the loop below 8178c2ecf20Sopenharmony_ci * would have executed. 8188c2ecf20Sopenharmony_ci */ 8198c2ecf20Sopenharmony_ci size_t entries_spanned_by_off; 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci entries_spanned_by_off = off >> mr->page_shift; 8228c2ecf20Sopenharmony_ci off -= (entries_spanned_by_off << mr->page_shift); 8238c2ecf20Sopenharmony_ci m = entries_spanned_by_off / RVT_SEGSZ; 8248c2ecf20Sopenharmony_ci n = entries_spanned_by_off % RVT_SEGSZ; 8258c2ecf20Sopenharmony_ci } else { 8268c2ecf20Sopenharmony_ci m = 0; 8278c2ecf20Sopenharmony_ci n = 0; 8288c2ecf20Sopenharmony_ci while (off >= mr->map[m]->segs[n].length) { 8298c2ecf20Sopenharmony_ci off -= mr->map[m]->segs[n].length; 8308c2ecf20Sopenharmony_ci n++; 8318c2ecf20Sopenharmony_ci if (n >= RVT_SEGSZ) { 8328c2ecf20Sopenharmony_ci m++; 8338c2ecf20Sopenharmony_ci n = 0; 8348c2ecf20Sopenharmony_ci } 8358c2ecf20Sopenharmony_ci } 8368c2ecf20Sopenharmony_ci } 8378c2ecf20Sopenharmony_ci isge->mr = mr; 8388c2ecf20Sopenharmony_ci isge->vaddr = mr->map[m]->segs[n].vaddr + off; 8398c2ecf20Sopenharmony_ci isge->length = mr->map[m]->segs[n].length - off; 8408c2ecf20Sopenharmony_ci isge->sge_length = sge->length; 8418c2ecf20Sopenharmony_ci isge->m = m; 8428c2ecf20Sopenharmony_ci isge->n = n; 8438c2ecf20Sopenharmony_ciok: 8448c2ecf20Sopenharmony_ci trace_rvt_sge_new(isge, sge); 8458c2ecf20Sopenharmony_ci return 1; 8468c2ecf20Sopenharmony_cibail_unref: 8478c2ecf20Sopenharmony_ci rvt_put_mr(mr); 8488c2ecf20Sopenharmony_cibail: 8498c2ecf20Sopenharmony_ci rcu_read_unlock(); 8508c2ecf20Sopenharmony_ci return -EINVAL; 8518c2ecf20Sopenharmony_ci} 8528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_lkey_ok); 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci/** 8558c2ecf20Sopenharmony_ci * rvt_rkey_ok - check the IB virtual address, length, and RKEY 8568c2ecf20Sopenharmony_ci * @qp: qp for validation 8578c2ecf20Sopenharmony_ci * @sge: SGE state 8588c2ecf20Sopenharmony_ci * @len: length of data 8598c2ecf20Sopenharmony_ci * @vaddr: virtual address to place data 8608c2ecf20Sopenharmony_ci * @rkey: rkey to check 8618c2ecf20Sopenharmony_ci * @acc: access flags 8628c2ecf20Sopenharmony_ci * 8638c2ecf20Sopenharmony_ci * Return: 1 if successful, otherwise 0. 8648c2ecf20Sopenharmony_ci * 8658c2ecf20Sopenharmony_ci * increments the reference count upon success 8668c2ecf20Sopenharmony_ci */ 8678c2ecf20Sopenharmony_ciint rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, 8688c2ecf20Sopenharmony_ci u32 len, u64 vaddr, u32 rkey, int acc) 8698c2ecf20Sopenharmony_ci{ 8708c2ecf20Sopenharmony_ci struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 8718c2ecf20Sopenharmony_ci struct rvt_lkey_table *rkt = &dev->lkey_table; 8728c2ecf20Sopenharmony_ci struct rvt_mregion *mr; 8738c2ecf20Sopenharmony_ci unsigned n, m; 8748c2ecf20Sopenharmony_ci size_t off; 8758c2ecf20Sopenharmony_ci 8768c2ecf20Sopenharmony_ci /* 8778c2ecf20Sopenharmony_ci * We use RKEY == zero for kernel virtual addresses 8788c2ecf20Sopenharmony_ci * (see rvt_get_dma_mr()). 8798c2ecf20Sopenharmony_ci */ 8808c2ecf20Sopenharmony_ci rcu_read_lock(); 8818c2ecf20Sopenharmony_ci if (rkey == 0) { 8828c2ecf20Sopenharmony_ci struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); 8838c2ecf20Sopenharmony_ci struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci if (pd->user) 8868c2ecf20Sopenharmony_ci goto bail; 8878c2ecf20Sopenharmony_ci mr = rcu_dereference(rdi->dma_mr); 8888c2ecf20Sopenharmony_ci if (!mr) 8898c2ecf20Sopenharmony_ci goto bail; 8908c2ecf20Sopenharmony_ci rvt_get_mr(mr); 8918c2ecf20Sopenharmony_ci rcu_read_unlock(); 8928c2ecf20Sopenharmony_ci 8938c2ecf20Sopenharmony_ci sge->mr = mr; 8948c2ecf20Sopenharmony_ci sge->vaddr = (void *)vaddr; 8958c2ecf20Sopenharmony_ci sge->length = len; 8968c2ecf20Sopenharmony_ci sge->sge_length = len; 8978c2ecf20Sopenharmony_ci sge->m = 0; 8988c2ecf20Sopenharmony_ci sge->n = 0; 8998c2ecf20Sopenharmony_ci goto ok; 9008c2ecf20Sopenharmony_ci } 9018c2ecf20Sopenharmony_ci 9028c2ecf20Sopenharmony_ci mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); 9038c2ecf20Sopenharmony_ci if (!mr) 9048c2ecf20Sopenharmony_ci goto bail; 9058c2ecf20Sopenharmony_ci rvt_get_mr(mr); 9068c2ecf20Sopenharmony_ci /* insure mr read is before test */ 9078c2ecf20Sopenharmony_ci if (!READ_ONCE(mr->lkey_published)) 9088c2ecf20Sopenharmony_ci goto bail_unref; 9098c2ecf20Sopenharmony_ci if (unlikely(atomic_read(&mr->lkey_invalid) || 9108c2ecf20Sopenharmony_ci mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 9118c2ecf20Sopenharmony_ci goto bail_unref; 9128c2ecf20Sopenharmony_ci 9138c2ecf20Sopenharmony_ci off = vaddr - mr->iova; 9148c2ecf20Sopenharmony_ci if (unlikely(vaddr < mr->iova || off + len > mr->length || 9158c2ecf20Sopenharmony_ci (mr->access_flags & acc) == 0)) 9168c2ecf20Sopenharmony_ci goto bail_unref; 9178c2ecf20Sopenharmony_ci rcu_read_unlock(); 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci off += mr->offset; 9208c2ecf20Sopenharmony_ci if (mr->page_shift) { 9218c2ecf20Sopenharmony_ci /* 9228c2ecf20Sopenharmony_ci * page sizes are uniform power of 2 so no loop is necessary 9238c2ecf20Sopenharmony_ci * entries_spanned_by_off is the number of times the loop below 9248c2ecf20Sopenharmony_ci * would have executed. 9258c2ecf20Sopenharmony_ci */ 9268c2ecf20Sopenharmony_ci size_t entries_spanned_by_off; 9278c2ecf20Sopenharmony_ci 9288c2ecf20Sopenharmony_ci entries_spanned_by_off = off >> mr->page_shift; 9298c2ecf20Sopenharmony_ci off -= (entries_spanned_by_off << mr->page_shift); 9308c2ecf20Sopenharmony_ci m = entries_spanned_by_off / RVT_SEGSZ; 9318c2ecf20Sopenharmony_ci n = entries_spanned_by_off % RVT_SEGSZ; 9328c2ecf20Sopenharmony_ci } else { 9338c2ecf20Sopenharmony_ci m = 0; 9348c2ecf20Sopenharmony_ci n = 0; 9358c2ecf20Sopenharmony_ci while (off >= mr->map[m]->segs[n].length) { 9368c2ecf20Sopenharmony_ci off -= mr->map[m]->segs[n].length; 9378c2ecf20Sopenharmony_ci n++; 9388c2ecf20Sopenharmony_ci if (n >= RVT_SEGSZ) { 9398c2ecf20Sopenharmony_ci m++; 9408c2ecf20Sopenharmony_ci n = 0; 9418c2ecf20Sopenharmony_ci } 9428c2ecf20Sopenharmony_ci } 9438c2ecf20Sopenharmony_ci } 9448c2ecf20Sopenharmony_ci sge->mr = mr; 9458c2ecf20Sopenharmony_ci sge->vaddr = mr->map[m]->segs[n].vaddr + off; 9468c2ecf20Sopenharmony_ci sge->length = mr->map[m]->segs[n].length - off; 9478c2ecf20Sopenharmony_ci sge->sge_length = len; 9488c2ecf20Sopenharmony_ci sge->m = m; 9498c2ecf20Sopenharmony_ci sge->n = n; 9508c2ecf20Sopenharmony_ciok: 9518c2ecf20Sopenharmony_ci return 1; 9528c2ecf20Sopenharmony_cibail_unref: 9538c2ecf20Sopenharmony_ci rvt_put_mr(mr); 9548c2ecf20Sopenharmony_cibail: 9558c2ecf20Sopenharmony_ci rcu_read_unlock(); 9568c2ecf20Sopenharmony_ci return 0; 9578c2ecf20Sopenharmony_ci} 9588c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_rkey_ok); 959