18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci#include <rdma/ib_umem.h> 348c2ecf20Sopenharmony_ci#include <rdma/ib_umem_odp.h> 358c2ecf20Sopenharmony_ci#include <linux/kernel.h> 368c2ecf20Sopenharmony_ci 378c2ecf20Sopenharmony_ci#include "mlx5_ib.h" 388c2ecf20Sopenharmony_ci#include "cmd.h" 398c2ecf20Sopenharmony_ci#include "qp.h" 408c2ecf20Sopenharmony_ci 418c2ecf20Sopenharmony_ci#include <linux/mlx5/eq.h> 428c2ecf20Sopenharmony_ci 438c2ecf20Sopenharmony_ci/* Contains the details of a pagefault. */ 448c2ecf20Sopenharmony_cistruct mlx5_pagefault { 458c2ecf20Sopenharmony_ci u32 bytes_committed; 468c2ecf20Sopenharmony_ci u32 token; 478c2ecf20Sopenharmony_ci u8 event_subtype; 488c2ecf20Sopenharmony_ci u8 type; 498c2ecf20Sopenharmony_ci union { 508c2ecf20Sopenharmony_ci /* Initiator or send message responder pagefault details. */ 518c2ecf20Sopenharmony_ci struct { 528c2ecf20Sopenharmony_ci /* Received packet size, only valid for responders. */ 538c2ecf20Sopenharmony_ci u32 packet_size; 548c2ecf20Sopenharmony_ci /* 558c2ecf20Sopenharmony_ci * Number of resource holding WQE, depends on type. 568c2ecf20Sopenharmony_ci */ 578c2ecf20Sopenharmony_ci u32 wq_num; 588c2ecf20Sopenharmony_ci /* 598c2ecf20Sopenharmony_ci * WQE index. Refers to either the send queue or 608c2ecf20Sopenharmony_ci * receive queue, according to event_subtype. 618c2ecf20Sopenharmony_ci */ 628c2ecf20Sopenharmony_ci u16 wqe_index; 638c2ecf20Sopenharmony_ci } wqe; 648c2ecf20Sopenharmony_ci /* RDMA responder pagefault details */ 658c2ecf20Sopenharmony_ci struct { 668c2ecf20Sopenharmony_ci u32 r_key; 678c2ecf20Sopenharmony_ci /* 688c2ecf20Sopenharmony_ci * Received packet size, minimal size page fault 698c2ecf20Sopenharmony_ci * resolution required for forward progress. 708c2ecf20Sopenharmony_ci */ 718c2ecf20Sopenharmony_ci u32 packet_size; 728c2ecf20Sopenharmony_ci u32 rdma_op_len; 738c2ecf20Sopenharmony_ci u64 rdma_va; 748c2ecf20Sopenharmony_ci } rdma; 758c2ecf20Sopenharmony_ci }; 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci struct mlx5_ib_pf_eq *eq; 788c2ecf20Sopenharmony_ci struct work_struct work; 798c2ecf20Sopenharmony_ci}; 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_ci#define MAX_PREFETCH_LEN (4*1024*1024U) 828c2ecf20Sopenharmony_ci 838c2ecf20Sopenharmony_ci/* Timeout in ms to wait for an active mmu notifier to complete when handling 848c2ecf20Sopenharmony_ci * a pagefault. */ 858c2ecf20Sopenharmony_ci#define MMU_NOTIFIER_TIMEOUT 1000 868c2ecf20Sopenharmony_ci 878c2ecf20Sopenharmony_ci#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT) 888c2ecf20Sopenharmony_ci#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT) 898c2ecf20Sopenharmony_ci#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS) 908c2ecf20Sopenharmony_ci#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT) 918c2ecf20Sopenharmony_ci#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1)) 928c2ecf20Sopenharmony_ci 938c2ecf20Sopenharmony_ci#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_cistatic u64 mlx5_imr_ksm_entries; 968c2ecf20Sopenharmony_ci 978c2ecf20Sopenharmony_cistatic void populate_klm(struct mlx5_klm *pklm, size_t idx, size_t nentries, 988c2ecf20Sopenharmony_ci struct mlx5_ib_mr *imr, int flags) 998c2ecf20Sopenharmony_ci{ 1008c2ecf20Sopenharmony_ci struct mlx5_klm *end = pklm + nentries; 1018c2ecf20Sopenharmony_ci 1028c2ecf20Sopenharmony_ci if (flags & MLX5_IB_UPD_XLT_ZAP) { 1038c2ecf20Sopenharmony_ci for (; pklm != end; pklm++, idx++) { 1048c2ecf20Sopenharmony_ci pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); 1058c2ecf20Sopenharmony_ci pklm->key = cpu_to_be32(imr->dev->null_mkey); 1068c2ecf20Sopenharmony_ci pklm->va = 0; 1078c2ecf20Sopenharmony_ci } 1088c2ecf20Sopenharmony_ci return; 1098c2ecf20Sopenharmony_ci } 1108c2ecf20Sopenharmony_ci 1118c2ecf20Sopenharmony_ci /* 1128c2ecf20Sopenharmony_ci * The locking here is pretty subtle. Ideally the implicit_children 1138c2ecf20Sopenharmony_ci * xarray would be protected by the umem_mutex, however that is not 1148c2ecf20Sopenharmony_ci * possible. Instead this uses a weaker update-then-lock pattern: 1158c2ecf20Sopenharmony_ci * 1168c2ecf20Sopenharmony_ci * srcu_read_lock() 1178c2ecf20Sopenharmony_ci * xa_store() 1188c2ecf20Sopenharmony_ci * mutex_lock(umem_mutex) 1198c2ecf20Sopenharmony_ci * mlx5_ib_update_xlt() 1208c2ecf20Sopenharmony_ci * mutex_unlock(umem_mutex) 1218c2ecf20Sopenharmony_ci * destroy lkey 1228c2ecf20Sopenharmony_ci * 1238c2ecf20Sopenharmony_ci * ie any change the xarray must be followed by the locked update_xlt 1248c2ecf20Sopenharmony_ci * before destroying. 1258c2ecf20Sopenharmony_ci * 1268c2ecf20Sopenharmony_ci * The umem_mutex provides the acquire/release semantic needed to make 1278c2ecf20Sopenharmony_ci * the xa_store() visible to a racing thread. While SRCU is not 1288c2ecf20Sopenharmony_ci * technically required, using it gives consistent use of the SRCU 1298c2ecf20Sopenharmony_ci * locking around the xarray. 1308c2ecf20Sopenharmony_ci */ 1318c2ecf20Sopenharmony_ci lockdep_assert_held(&to_ib_umem_odp(imr->umem)->umem_mutex); 1328c2ecf20Sopenharmony_ci lockdep_assert_held(&imr->dev->odp_srcu); 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci for (; pklm != end; pklm++, idx++) { 1358c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mtt = xa_load(&imr->implicit_children, idx); 1368c2ecf20Sopenharmony_ci 1378c2ecf20Sopenharmony_ci pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE); 1388c2ecf20Sopenharmony_ci if (mtt) { 1398c2ecf20Sopenharmony_ci pklm->key = cpu_to_be32(mtt->ibmr.lkey); 1408c2ecf20Sopenharmony_ci pklm->va = cpu_to_be64(idx * MLX5_IMR_MTT_SIZE); 1418c2ecf20Sopenharmony_ci } else { 1428c2ecf20Sopenharmony_ci pklm->key = cpu_to_be32(imr->dev->null_mkey); 1438c2ecf20Sopenharmony_ci pklm->va = 0; 1448c2ecf20Sopenharmony_ci } 1458c2ecf20Sopenharmony_ci } 1468c2ecf20Sopenharmony_ci} 1478c2ecf20Sopenharmony_ci 1488c2ecf20Sopenharmony_cistatic u64 umem_dma_to_mtt(dma_addr_t umem_dma) 1498c2ecf20Sopenharmony_ci{ 1508c2ecf20Sopenharmony_ci u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK; 1518c2ecf20Sopenharmony_ci 1528c2ecf20Sopenharmony_ci if (umem_dma & ODP_READ_ALLOWED_BIT) 1538c2ecf20Sopenharmony_ci mtt_entry |= MLX5_IB_MTT_READ; 1548c2ecf20Sopenharmony_ci if (umem_dma & ODP_WRITE_ALLOWED_BIT) 1558c2ecf20Sopenharmony_ci mtt_entry |= MLX5_IB_MTT_WRITE; 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci return mtt_entry; 1588c2ecf20Sopenharmony_ci} 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_cistatic void populate_mtt(__be64 *pas, size_t idx, size_t nentries, 1618c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr, int flags) 1628c2ecf20Sopenharmony_ci{ 1638c2ecf20Sopenharmony_ci struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 1648c2ecf20Sopenharmony_ci dma_addr_t pa; 1658c2ecf20Sopenharmony_ci size_t i; 1668c2ecf20Sopenharmony_ci 1678c2ecf20Sopenharmony_ci if (flags & MLX5_IB_UPD_XLT_ZAP) 1688c2ecf20Sopenharmony_ci return; 1698c2ecf20Sopenharmony_ci 1708c2ecf20Sopenharmony_ci for (i = 0; i < nentries; i++) { 1718c2ecf20Sopenharmony_ci pa = odp->dma_list[idx + i]; 1728c2ecf20Sopenharmony_ci pas[i] = cpu_to_be64(umem_dma_to_mtt(pa)); 1738c2ecf20Sopenharmony_ci } 1748c2ecf20Sopenharmony_ci} 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_civoid mlx5_odp_populate_xlt(void *xlt, size_t idx, size_t nentries, 1778c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr, int flags) 1788c2ecf20Sopenharmony_ci{ 1798c2ecf20Sopenharmony_ci if (flags & MLX5_IB_UPD_XLT_INDIRECT) { 1808c2ecf20Sopenharmony_ci populate_klm(xlt, idx, nentries, mr, flags); 1818c2ecf20Sopenharmony_ci } else { 1828c2ecf20Sopenharmony_ci populate_mtt(xlt, idx, nentries, mr, flags); 1838c2ecf20Sopenharmony_ci } 1848c2ecf20Sopenharmony_ci} 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_cistatic void dma_fence_odp_mr(struct mlx5_ib_mr *mr) 1878c2ecf20Sopenharmony_ci{ 1888c2ecf20Sopenharmony_ci struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci /* Ensure mlx5_ib_invalidate_range() will not touch the MR any more */ 1918c2ecf20Sopenharmony_ci mutex_lock(&odp->umem_mutex); 1928c2ecf20Sopenharmony_ci if (odp->npages) { 1938c2ecf20Sopenharmony_ci mlx5_mr_cache_invalidate(mr); 1948c2ecf20Sopenharmony_ci ib_umem_odp_unmap_dma_pages(odp, ib_umem_start(odp), 1958c2ecf20Sopenharmony_ci ib_umem_end(odp)); 1968c2ecf20Sopenharmony_ci WARN_ON(odp->npages); 1978c2ecf20Sopenharmony_ci } 1988c2ecf20Sopenharmony_ci odp->private = NULL; 1998c2ecf20Sopenharmony_ci mutex_unlock(&odp->umem_mutex); 2008c2ecf20Sopenharmony_ci 2018c2ecf20Sopenharmony_ci if (!mr->cache_ent) { 2028c2ecf20Sopenharmony_ci mlx5_core_destroy_mkey(mr->dev->mdev, &mr->mmkey); 2038c2ecf20Sopenharmony_ci WARN_ON(mr->descs); 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci} 2068c2ecf20Sopenharmony_ci 2078c2ecf20Sopenharmony_ci/* 2088c2ecf20Sopenharmony_ci * This must be called after the mr has been removed from implicit_children 2098c2ecf20Sopenharmony_ci * and the SRCU synchronized. NOTE: The MR does not necessarily have to be 2108c2ecf20Sopenharmony_ci * empty here, parallel page faults could have raced with the free process and 2118c2ecf20Sopenharmony_ci * added pages to it. 2128c2ecf20Sopenharmony_ci */ 2138c2ecf20Sopenharmony_cistatic void free_implicit_child_mr(struct mlx5_ib_mr *mr, bool need_imr_xlt) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci struct mlx5_ib_mr *imr = mr->parent; 2168c2ecf20Sopenharmony_ci struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); 2178c2ecf20Sopenharmony_ci struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 2188c2ecf20Sopenharmony_ci unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; 2198c2ecf20Sopenharmony_ci int srcu_key; 2208c2ecf20Sopenharmony_ci 2218c2ecf20Sopenharmony_ci /* implicit_child_mr's are not allowed to have deferred work */ 2228c2ecf20Sopenharmony_ci WARN_ON(atomic_read(&mr->num_deferred_work)); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci if (need_imr_xlt) { 2258c2ecf20Sopenharmony_ci srcu_key = srcu_read_lock(&mr->dev->odp_srcu); 2268c2ecf20Sopenharmony_ci mutex_lock(&odp_imr->umem_mutex); 2278c2ecf20Sopenharmony_ci mlx5_ib_update_xlt(mr->parent, idx, 1, 0, 2288c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_INDIRECT | 2298c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ATOMIC); 2308c2ecf20Sopenharmony_ci mutex_unlock(&odp_imr->umem_mutex); 2318c2ecf20Sopenharmony_ci srcu_read_unlock(&mr->dev->odp_srcu, srcu_key); 2328c2ecf20Sopenharmony_ci } 2338c2ecf20Sopenharmony_ci 2348c2ecf20Sopenharmony_ci dma_fence_odp_mr(mr); 2358c2ecf20Sopenharmony_ci 2368c2ecf20Sopenharmony_ci mr->parent = NULL; 2378c2ecf20Sopenharmony_ci mlx5_mr_cache_free(mr->dev, mr); 2388c2ecf20Sopenharmony_ci ib_umem_odp_release(odp); 2398c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&imr->num_deferred_work)) 2408c2ecf20Sopenharmony_ci wake_up(&imr->q_deferred_work); 2418c2ecf20Sopenharmony_ci} 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_cistatic void free_implicit_child_mr_work(struct work_struct *work) 2448c2ecf20Sopenharmony_ci{ 2458c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr = 2468c2ecf20Sopenharmony_ci container_of(work, struct mlx5_ib_mr, odp_destroy.work); 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci free_implicit_child_mr(mr, true); 2498c2ecf20Sopenharmony_ci} 2508c2ecf20Sopenharmony_ci 2518c2ecf20Sopenharmony_cistatic void free_implicit_child_mr_rcu(struct rcu_head *head) 2528c2ecf20Sopenharmony_ci{ 2538c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr = 2548c2ecf20Sopenharmony_ci container_of(head, struct mlx5_ib_mr, odp_destroy.rcu); 2558c2ecf20Sopenharmony_ci 2568c2ecf20Sopenharmony_ci /* Freeing a MR is a sleeping operation, so bounce to a work queue */ 2578c2ecf20Sopenharmony_ci INIT_WORK(&mr->odp_destroy.work, free_implicit_child_mr_work); 2588c2ecf20Sopenharmony_ci queue_work(system_unbound_wq, &mr->odp_destroy.work); 2598c2ecf20Sopenharmony_ci} 2608c2ecf20Sopenharmony_ci 2618c2ecf20Sopenharmony_cistatic void destroy_unused_implicit_child_mr(struct mlx5_ib_mr *mr) 2628c2ecf20Sopenharmony_ci{ 2638c2ecf20Sopenharmony_ci struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 2648c2ecf20Sopenharmony_ci unsigned long idx = ib_umem_start(odp) >> MLX5_IMR_MTT_SHIFT; 2658c2ecf20Sopenharmony_ci struct mlx5_ib_mr *imr = mr->parent; 2668c2ecf20Sopenharmony_ci 2678c2ecf20Sopenharmony_ci xa_lock(&imr->implicit_children); 2688c2ecf20Sopenharmony_ci /* 2698c2ecf20Sopenharmony_ci * This can race with mlx5_ib_free_implicit_mr(), the first one to 2708c2ecf20Sopenharmony_ci * reach the xa lock wins the race and destroys the MR. 2718c2ecf20Sopenharmony_ci */ 2728c2ecf20Sopenharmony_ci if (__xa_cmpxchg(&imr->implicit_children, idx, mr, NULL, GFP_ATOMIC) != 2738c2ecf20Sopenharmony_ci mr) 2748c2ecf20Sopenharmony_ci goto out_unlock; 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci atomic_inc(&imr->num_deferred_work); 2778c2ecf20Sopenharmony_ci call_srcu(&mr->dev->odp_srcu, &mr->odp_destroy.rcu, 2788c2ecf20Sopenharmony_ci free_implicit_child_mr_rcu); 2798c2ecf20Sopenharmony_ci 2808c2ecf20Sopenharmony_ciout_unlock: 2818c2ecf20Sopenharmony_ci xa_unlock(&imr->implicit_children); 2828c2ecf20Sopenharmony_ci} 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_cistatic bool mlx5_ib_invalidate_range(struct mmu_interval_notifier *mni, 2858c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range, 2868c2ecf20Sopenharmony_ci unsigned long cur_seq) 2878c2ecf20Sopenharmony_ci{ 2888c2ecf20Sopenharmony_ci struct ib_umem_odp *umem_odp = 2898c2ecf20Sopenharmony_ci container_of(mni, struct ib_umem_odp, notifier); 2908c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr; 2918c2ecf20Sopenharmony_ci const u64 umr_block_mask = (MLX5_UMR_MTT_ALIGNMENT / 2928c2ecf20Sopenharmony_ci sizeof(struct mlx5_mtt)) - 1; 2938c2ecf20Sopenharmony_ci u64 idx = 0, blk_start_idx = 0; 2948c2ecf20Sopenharmony_ci u64 invalidations = 0; 2958c2ecf20Sopenharmony_ci unsigned long start; 2968c2ecf20Sopenharmony_ci unsigned long end; 2978c2ecf20Sopenharmony_ci int in_block = 0; 2988c2ecf20Sopenharmony_ci u64 addr; 2998c2ecf20Sopenharmony_ci 3008c2ecf20Sopenharmony_ci if (!mmu_notifier_range_blockable(range)) 3018c2ecf20Sopenharmony_ci return false; 3028c2ecf20Sopenharmony_ci 3038c2ecf20Sopenharmony_ci mutex_lock(&umem_odp->umem_mutex); 3048c2ecf20Sopenharmony_ci mmu_interval_set_seq(mni, cur_seq); 3058c2ecf20Sopenharmony_ci /* 3068c2ecf20Sopenharmony_ci * If npages is zero then umem_odp->private may not be setup yet. This 3078c2ecf20Sopenharmony_ci * does not complete until after the first page is mapped for DMA. 3088c2ecf20Sopenharmony_ci */ 3098c2ecf20Sopenharmony_ci if (!umem_odp->npages) 3108c2ecf20Sopenharmony_ci goto out; 3118c2ecf20Sopenharmony_ci mr = umem_odp->private; 3128c2ecf20Sopenharmony_ci 3138c2ecf20Sopenharmony_ci start = max_t(u64, ib_umem_start(umem_odp), range->start); 3148c2ecf20Sopenharmony_ci end = min_t(u64, ib_umem_end(umem_odp), range->end); 3158c2ecf20Sopenharmony_ci 3168c2ecf20Sopenharmony_ci /* 3178c2ecf20Sopenharmony_ci * Iteration one - zap the HW's MTTs. The notifiers_count ensures that 3188c2ecf20Sopenharmony_ci * while we are doing the invalidation, no page fault will attempt to 3198c2ecf20Sopenharmony_ci * overwrite the same MTTs. Concurent invalidations might race us, 3208c2ecf20Sopenharmony_ci * but they will write 0s as well, so no difference in the end result. 3218c2ecf20Sopenharmony_ci */ 3228c2ecf20Sopenharmony_ci for (addr = start; addr < end; addr += BIT(umem_odp->page_shift)) { 3238c2ecf20Sopenharmony_ci idx = (addr - ib_umem_start(umem_odp)) >> umem_odp->page_shift; 3248c2ecf20Sopenharmony_ci /* 3258c2ecf20Sopenharmony_ci * Strive to write the MTTs in chunks, but avoid overwriting 3268c2ecf20Sopenharmony_ci * non-existing MTTs. The huristic here can be improved to 3278c2ecf20Sopenharmony_ci * estimate the cost of another UMR vs. the cost of bigger 3288c2ecf20Sopenharmony_ci * UMR. 3298c2ecf20Sopenharmony_ci */ 3308c2ecf20Sopenharmony_ci if (umem_odp->dma_list[idx] & 3318c2ecf20Sopenharmony_ci (ODP_READ_ALLOWED_BIT | ODP_WRITE_ALLOWED_BIT)) { 3328c2ecf20Sopenharmony_ci if (!in_block) { 3338c2ecf20Sopenharmony_ci blk_start_idx = idx; 3348c2ecf20Sopenharmony_ci in_block = 1; 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci /* Count page invalidations */ 3388c2ecf20Sopenharmony_ci invalidations += idx - blk_start_idx + 1; 3398c2ecf20Sopenharmony_ci } else { 3408c2ecf20Sopenharmony_ci u64 umr_offset = idx & umr_block_mask; 3418c2ecf20Sopenharmony_ci 3428c2ecf20Sopenharmony_ci if (in_block && umr_offset == 0) { 3438c2ecf20Sopenharmony_ci mlx5_ib_update_xlt(mr, blk_start_idx, 3448c2ecf20Sopenharmony_ci idx - blk_start_idx, 0, 3458c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ZAP | 3468c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ATOMIC); 3478c2ecf20Sopenharmony_ci in_block = 0; 3488c2ecf20Sopenharmony_ci } 3498c2ecf20Sopenharmony_ci } 3508c2ecf20Sopenharmony_ci } 3518c2ecf20Sopenharmony_ci if (in_block) 3528c2ecf20Sopenharmony_ci mlx5_ib_update_xlt(mr, blk_start_idx, 3538c2ecf20Sopenharmony_ci idx - blk_start_idx + 1, 0, 3548c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ZAP | 3558c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ATOMIC); 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_ci mlx5_update_odp_stats(mr, invalidations, invalidations); 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci /* 3608c2ecf20Sopenharmony_ci * We are now sure that the device will not access the 3618c2ecf20Sopenharmony_ci * memory. We can safely unmap it, and mark it as dirty if 3628c2ecf20Sopenharmony_ci * needed. 3638c2ecf20Sopenharmony_ci */ 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci ib_umem_odp_unmap_dma_pages(umem_odp, start, end); 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci if (unlikely(!umem_odp->npages && mr->parent)) 3688c2ecf20Sopenharmony_ci destroy_unused_implicit_child_mr(mr); 3698c2ecf20Sopenharmony_ciout: 3708c2ecf20Sopenharmony_ci mutex_unlock(&umem_odp->umem_mutex); 3718c2ecf20Sopenharmony_ci return true; 3728c2ecf20Sopenharmony_ci} 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ciconst struct mmu_interval_notifier_ops mlx5_mn_ops = { 3758c2ecf20Sopenharmony_ci .invalidate = mlx5_ib_invalidate_range, 3768c2ecf20Sopenharmony_ci}; 3778c2ecf20Sopenharmony_ci 3788c2ecf20Sopenharmony_civoid mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev) 3798c2ecf20Sopenharmony_ci{ 3808c2ecf20Sopenharmony_ci struct ib_odp_caps *caps = &dev->odp_caps; 3818c2ecf20Sopenharmony_ci 3828c2ecf20Sopenharmony_ci memset(caps, 0, sizeof(*caps)); 3838c2ecf20Sopenharmony_ci 3848c2ecf20Sopenharmony_ci if (!MLX5_CAP_GEN(dev->mdev, pg) || 3858c2ecf20Sopenharmony_ci !mlx5_ib_can_load_pas_with_umr(dev, 0)) 3868c2ecf20Sopenharmony_ci return; 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci caps->general_caps = IB_ODP_SUPPORT; 3898c2ecf20Sopenharmony_ci 3908c2ecf20Sopenharmony_ci if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 3918c2ecf20Sopenharmony_ci dev->odp_max_size = U64_MAX; 3928c2ecf20Sopenharmony_ci else 3938c2ecf20Sopenharmony_ci dev->odp_max_size = BIT_ULL(MLX5_MAX_UMR_SHIFT + PAGE_SHIFT); 3948c2ecf20Sopenharmony_ci 3958c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.send)) 3968c2ecf20Sopenharmony_ci caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SEND; 3978c2ecf20Sopenharmony_ci 3988c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, ud_odp_caps.srq_receive)) 3998c2ecf20Sopenharmony_ci caps->per_transport_caps.ud_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.send)) 4028c2ecf20Sopenharmony_ci caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SEND; 4038c2ecf20Sopenharmony_ci 4048c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.receive)) 4058c2ecf20Sopenharmony_ci caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_RECV; 4068c2ecf20Sopenharmony_ci 4078c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.write)) 4088c2ecf20Sopenharmony_ci caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_WRITE; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.read)) 4118c2ecf20Sopenharmony_ci caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_READ; 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic)) 4148c2ecf20Sopenharmony_ci caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; 4158c2ecf20Sopenharmony_ci 4168c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.srq_receive)) 4178c2ecf20Sopenharmony_ci caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; 4188c2ecf20Sopenharmony_ci 4198c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.send)) 4208c2ecf20Sopenharmony_ci caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SEND; 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.receive)) 4238c2ecf20Sopenharmony_ci caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_RECV; 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.write)) 4268c2ecf20Sopenharmony_ci caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_WRITE; 4278c2ecf20Sopenharmony_ci 4288c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.read)) 4298c2ecf20Sopenharmony_ci caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_READ; 4308c2ecf20Sopenharmony_ci 4318c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.atomic)) 4328c2ecf20Sopenharmony_ci caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_ATOMIC; 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci if (MLX5_CAP_ODP(dev->mdev, xrc_odp_caps.srq_receive)) 4358c2ecf20Sopenharmony_ci caps->per_transport_caps.xrc_odp_caps |= IB_ODP_SUPPORT_SRQ_RECV; 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_ci if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) && 4388c2ecf20Sopenharmony_ci MLX5_CAP_GEN(dev->mdev, null_mkey) && 4398c2ecf20Sopenharmony_ci MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) && 4408c2ecf20Sopenharmony_ci !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled)) 4418c2ecf20Sopenharmony_ci caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT; 4428c2ecf20Sopenharmony_ci} 4438c2ecf20Sopenharmony_ci 4448c2ecf20Sopenharmony_cistatic void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev, 4458c2ecf20Sopenharmony_ci struct mlx5_pagefault *pfault, 4468c2ecf20Sopenharmony_ci int error) 4478c2ecf20Sopenharmony_ci{ 4488c2ecf20Sopenharmony_ci int wq_num = pfault->event_subtype == MLX5_PFAULT_SUBTYPE_WQE ? 4498c2ecf20Sopenharmony_ci pfault->wqe.wq_num : pfault->token; 4508c2ecf20Sopenharmony_ci u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {}; 4518c2ecf20Sopenharmony_ci int err; 4528c2ecf20Sopenharmony_ci 4538c2ecf20Sopenharmony_ci MLX5_SET(page_fault_resume_in, in, opcode, MLX5_CMD_OP_PAGE_FAULT_RESUME); 4548c2ecf20Sopenharmony_ci MLX5_SET(page_fault_resume_in, in, page_fault_type, pfault->type); 4558c2ecf20Sopenharmony_ci MLX5_SET(page_fault_resume_in, in, token, pfault->token); 4568c2ecf20Sopenharmony_ci MLX5_SET(page_fault_resume_in, in, wq_number, wq_num); 4578c2ecf20Sopenharmony_ci MLX5_SET(page_fault_resume_in, in, error, !!error); 4588c2ecf20Sopenharmony_ci 4598c2ecf20Sopenharmony_ci err = mlx5_cmd_exec_in(dev->mdev, page_fault_resume, in); 4608c2ecf20Sopenharmony_ci if (err) 4618c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Failed to resolve the page fault on WQ 0x%x err %d\n", 4628c2ecf20Sopenharmony_ci wq_num, err); 4638c2ecf20Sopenharmony_ci} 4648c2ecf20Sopenharmony_ci 4658c2ecf20Sopenharmony_cistatic struct mlx5_ib_mr *implicit_get_child_mr(struct mlx5_ib_mr *imr, 4668c2ecf20Sopenharmony_ci unsigned long idx) 4678c2ecf20Sopenharmony_ci{ 4688c2ecf20Sopenharmony_ci struct ib_umem_odp *odp; 4698c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr; 4708c2ecf20Sopenharmony_ci struct mlx5_ib_mr *ret; 4718c2ecf20Sopenharmony_ci int err; 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci odp = ib_umem_odp_alloc_child(to_ib_umem_odp(imr->umem), 4748c2ecf20Sopenharmony_ci idx * MLX5_IMR_MTT_SIZE, 4758c2ecf20Sopenharmony_ci MLX5_IMR_MTT_SIZE, &mlx5_mn_ops); 4768c2ecf20Sopenharmony_ci if (IS_ERR(odp)) 4778c2ecf20Sopenharmony_ci return ERR_CAST(odp); 4788c2ecf20Sopenharmony_ci 4798c2ecf20Sopenharmony_ci ret = mr = mlx5_mr_cache_alloc(imr->dev, MLX5_IMR_MTT_CACHE_ENTRY, 4808c2ecf20Sopenharmony_ci imr->access_flags); 4818c2ecf20Sopenharmony_ci if (IS_ERR(mr)) 4828c2ecf20Sopenharmony_ci goto out_umem; 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci mr->ibmr.pd = imr->ibmr.pd; 4858c2ecf20Sopenharmony_ci mr->umem = &odp->umem; 4868c2ecf20Sopenharmony_ci mr->ibmr.lkey = mr->mmkey.key; 4878c2ecf20Sopenharmony_ci mr->ibmr.rkey = mr->mmkey.key; 4888c2ecf20Sopenharmony_ci mr->mmkey.iova = idx * MLX5_IMR_MTT_SIZE; 4898c2ecf20Sopenharmony_ci mr->parent = imr; 4908c2ecf20Sopenharmony_ci odp->private = mr; 4918c2ecf20Sopenharmony_ci 4928c2ecf20Sopenharmony_ci err = mlx5_ib_update_xlt(mr, 0, 4938c2ecf20Sopenharmony_ci MLX5_IMR_MTT_ENTRIES, 4948c2ecf20Sopenharmony_ci PAGE_SHIFT, 4958c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ZAP | 4968c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ENABLE); 4978c2ecf20Sopenharmony_ci if (err) { 4988c2ecf20Sopenharmony_ci ret = ERR_PTR(err); 4998c2ecf20Sopenharmony_ci goto out_mr; 5008c2ecf20Sopenharmony_ci } 5018c2ecf20Sopenharmony_ci 5028c2ecf20Sopenharmony_ci /* 5038c2ecf20Sopenharmony_ci * Once the store to either xarray completes any error unwind has to 5048c2ecf20Sopenharmony_ci * use synchronize_srcu(). Avoid this with xa_reserve() 5058c2ecf20Sopenharmony_ci */ 5068c2ecf20Sopenharmony_ci ret = xa_cmpxchg(&imr->implicit_children, idx, NULL, mr, 5078c2ecf20Sopenharmony_ci GFP_KERNEL); 5088c2ecf20Sopenharmony_ci if (unlikely(ret)) { 5098c2ecf20Sopenharmony_ci if (xa_is_err(ret)) { 5108c2ecf20Sopenharmony_ci ret = ERR_PTR(xa_err(ret)); 5118c2ecf20Sopenharmony_ci goto out_mr; 5128c2ecf20Sopenharmony_ci } 5138c2ecf20Sopenharmony_ci /* 5148c2ecf20Sopenharmony_ci * Another thread beat us to creating the child mr, use 5158c2ecf20Sopenharmony_ci * theirs. 5168c2ecf20Sopenharmony_ci */ 5178c2ecf20Sopenharmony_ci goto out_mr; 5188c2ecf20Sopenharmony_ci } 5198c2ecf20Sopenharmony_ci 5208c2ecf20Sopenharmony_ci mlx5_ib_dbg(imr->dev, "key %x mr %p\n", mr->mmkey.key, mr); 5218c2ecf20Sopenharmony_ci return mr; 5228c2ecf20Sopenharmony_ci 5238c2ecf20Sopenharmony_ciout_mr: 5248c2ecf20Sopenharmony_ci mlx5_mr_cache_free(imr->dev, mr); 5258c2ecf20Sopenharmony_ciout_umem: 5268c2ecf20Sopenharmony_ci ib_umem_odp_release(odp); 5278c2ecf20Sopenharmony_ci return ret; 5288c2ecf20Sopenharmony_ci} 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_cistruct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd, 5318c2ecf20Sopenharmony_ci struct ib_udata *udata, 5328c2ecf20Sopenharmony_ci int access_flags) 5338c2ecf20Sopenharmony_ci{ 5348c2ecf20Sopenharmony_ci struct mlx5_ib_dev *dev = to_mdev(pd->ibpd.device); 5358c2ecf20Sopenharmony_ci struct ib_umem_odp *umem_odp; 5368c2ecf20Sopenharmony_ci struct mlx5_ib_mr *imr; 5378c2ecf20Sopenharmony_ci int err; 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci umem_odp = ib_umem_odp_alloc_implicit(&dev->ib_dev, access_flags); 5408c2ecf20Sopenharmony_ci if (IS_ERR(umem_odp)) 5418c2ecf20Sopenharmony_ci return ERR_CAST(umem_odp); 5428c2ecf20Sopenharmony_ci 5438c2ecf20Sopenharmony_ci imr = mlx5_mr_cache_alloc(dev, MLX5_IMR_KSM_CACHE_ENTRY, access_flags); 5448c2ecf20Sopenharmony_ci if (IS_ERR(imr)) { 5458c2ecf20Sopenharmony_ci err = PTR_ERR(imr); 5468c2ecf20Sopenharmony_ci goto out_umem; 5478c2ecf20Sopenharmony_ci } 5488c2ecf20Sopenharmony_ci 5498c2ecf20Sopenharmony_ci imr->ibmr.pd = &pd->ibpd; 5508c2ecf20Sopenharmony_ci imr->mmkey.iova = 0; 5518c2ecf20Sopenharmony_ci imr->umem = &umem_odp->umem; 5528c2ecf20Sopenharmony_ci imr->ibmr.lkey = imr->mmkey.key; 5538c2ecf20Sopenharmony_ci imr->ibmr.rkey = imr->mmkey.key; 5548c2ecf20Sopenharmony_ci imr->umem = &umem_odp->umem; 5558c2ecf20Sopenharmony_ci imr->is_odp_implicit = true; 5568c2ecf20Sopenharmony_ci atomic_set(&imr->num_deferred_work, 0); 5578c2ecf20Sopenharmony_ci init_waitqueue_head(&imr->q_deferred_work); 5588c2ecf20Sopenharmony_ci xa_init(&imr->implicit_children); 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci err = mlx5_ib_update_xlt(imr, 0, 5618c2ecf20Sopenharmony_ci mlx5_imr_ksm_entries, 5628c2ecf20Sopenharmony_ci MLX5_KSM_PAGE_SHIFT, 5638c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_INDIRECT | 5648c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ZAP | 5658c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ENABLE); 5668c2ecf20Sopenharmony_ci if (err) 5678c2ecf20Sopenharmony_ci goto out_mr; 5688c2ecf20Sopenharmony_ci 5698c2ecf20Sopenharmony_ci err = xa_err(xa_store(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key), 5708c2ecf20Sopenharmony_ci &imr->mmkey, GFP_KERNEL)); 5718c2ecf20Sopenharmony_ci if (err) 5728c2ecf20Sopenharmony_ci goto out_mr; 5738c2ecf20Sopenharmony_ci 5748c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "key %x mr %p\n", imr->mmkey.key, imr); 5758c2ecf20Sopenharmony_ci return imr; 5768c2ecf20Sopenharmony_ciout_mr: 5778c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Failed to register MKEY %d\n", err); 5788c2ecf20Sopenharmony_ci mlx5_mr_cache_free(dev, imr); 5798c2ecf20Sopenharmony_ciout_umem: 5808c2ecf20Sopenharmony_ci ib_umem_odp_release(umem_odp); 5818c2ecf20Sopenharmony_ci return ERR_PTR(err); 5828c2ecf20Sopenharmony_ci} 5838c2ecf20Sopenharmony_ci 5848c2ecf20Sopenharmony_civoid mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr) 5858c2ecf20Sopenharmony_ci{ 5868c2ecf20Sopenharmony_ci struct ib_umem_odp *odp_imr = to_ib_umem_odp(imr->umem); 5878c2ecf20Sopenharmony_ci struct mlx5_ib_dev *dev = imr->dev; 5888c2ecf20Sopenharmony_ci struct list_head destroy_list; 5898c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mtt; 5908c2ecf20Sopenharmony_ci struct mlx5_ib_mr *tmp; 5918c2ecf20Sopenharmony_ci unsigned long idx; 5928c2ecf20Sopenharmony_ci 5938c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&destroy_list); 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci xa_erase(&dev->odp_mkeys, mlx5_base_mkey(imr->mmkey.key)); 5968c2ecf20Sopenharmony_ci /* 5978c2ecf20Sopenharmony_ci * This stops the SRCU protected page fault path from touching either 5988c2ecf20Sopenharmony_ci * the imr or any children. The page fault path can only reach the 5998c2ecf20Sopenharmony_ci * children xarray via the imr. 6008c2ecf20Sopenharmony_ci */ 6018c2ecf20Sopenharmony_ci synchronize_srcu(&dev->odp_srcu); 6028c2ecf20Sopenharmony_ci 6038c2ecf20Sopenharmony_ci /* 6048c2ecf20Sopenharmony_ci * All work on the prefetch list must be completed, xa_erase() prevented 6058c2ecf20Sopenharmony_ci * new work from being created. 6068c2ecf20Sopenharmony_ci */ 6078c2ecf20Sopenharmony_ci wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); 6088c2ecf20Sopenharmony_ci 6098c2ecf20Sopenharmony_ci /* 6108c2ecf20Sopenharmony_ci * At this point it is forbidden for any other thread to enter 6118c2ecf20Sopenharmony_ci * pagefault_mr() on this imr. It is already forbidden to call 6128c2ecf20Sopenharmony_ci * pagefault_mr() on an implicit child. Due to this additions to 6138c2ecf20Sopenharmony_ci * implicit_children are prevented. 6148c2ecf20Sopenharmony_ci */ 6158c2ecf20Sopenharmony_ci 6168c2ecf20Sopenharmony_ci /* 6178c2ecf20Sopenharmony_ci * Block destroy_unused_implicit_child_mr() from incrementing 6188c2ecf20Sopenharmony_ci * num_deferred_work. 6198c2ecf20Sopenharmony_ci */ 6208c2ecf20Sopenharmony_ci xa_lock(&imr->implicit_children); 6218c2ecf20Sopenharmony_ci xa_for_each (&imr->implicit_children, idx, mtt) { 6228c2ecf20Sopenharmony_ci __xa_erase(&imr->implicit_children, idx); 6238c2ecf20Sopenharmony_ci list_add(&mtt->odp_destroy.elm, &destroy_list); 6248c2ecf20Sopenharmony_ci } 6258c2ecf20Sopenharmony_ci xa_unlock(&imr->implicit_children); 6268c2ecf20Sopenharmony_ci 6278c2ecf20Sopenharmony_ci /* 6288c2ecf20Sopenharmony_ci * Wait for any concurrent destroy_unused_implicit_child_mr() to 6298c2ecf20Sopenharmony_ci * complete. 6308c2ecf20Sopenharmony_ci */ 6318c2ecf20Sopenharmony_ci wait_event(imr->q_deferred_work, !atomic_read(&imr->num_deferred_work)); 6328c2ecf20Sopenharmony_ci 6338c2ecf20Sopenharmony_ci /* 6348c2ecf20Sopenharmony_ci * Fence the imr before we destroy the children. This allows us to 6358c2ecf20Sopenharmony_ci * skip updating the XLT of the imr during destroy of the child mkey 6368c2ecf20Sopenharmony_ci * the imr points to. 6378c2ecf20Sopenharmony_ci */ 6388c2ecf20Sopenharmony_ci mlx5_mr_cache_invalidate(imr); 6398c2ecf20Sopenharmony_ci 6408c2ecf20Sopenharmony_ci list_for_each_entry_safe (mtt, tmp, &destroy_list, odp_destroy.elm) 6418c2ecf20Sopenharmony_ci free_implicit_child_mr(mtt, false); 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci mlx5_mr_cache_free(dev, imr); 6448c2ecf20Sopenharmony_ci ib_umem_odp_release(odp_imr); 6458c2ecf20Sopenharmony_ci} 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci/** 6488c2ecf20Sopenharmony_ci * mlx5_ib_fence_odp_mr - Stop all access to the ODP MR 6498c2ecf20Sopenharmony_ci * @mr: to fence 6508c2ecf20Sopenharmony_ci * 6518c2ecf20Sopenharmony_ci * On return no parallel threads will be touching this MR and no DMA will be 6528c2ecf20Sopenharmony_ci * active. 6538c2ecf20Sopenharmony_ci */ 6548c2ecf20Sopenharmony_civoid mlx5_ib_fence_odp_mr(struct mlx5_ib_mr *mr) 6558c2ecf20Sopenharmony_ci{ 6568c2ecf20Sopenharmony_ci /* Prevent new page faults and prefetch requests from succeeding */ 6578c2ecf20Sopenharmony_ci xa_erase(&mr->dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key)); 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci /* Wait for all running page-fault handlers to finish. */ 6608c2ecf20Sopenharmony_ci synchronize_srcu(&mr->dev->odp_srcu); 6618c2ecf20Sopenharmony_ci 6628c2ecf20Sopenharmony_ci wait_event(mr->q_deferred_work, !atomic_read(&mr->num_deferred_work)); 6638c2ecf20Sopenharmony_ci 6648c2ecf20Sopenharmony_ci dma_fence_odp_mr(mr); 6658c2ecf20Sopenharmony_ci} 6668c2ecf20Sopenharmony_ci 6678c2ecf20Sopenharmony_ci#define MLX5_PF_FLAGS_DOWNGRADE BIT(1) 6688c2ecf20Sopenharmony_ci#define MLX5_PF_FLAGS_SNAPSHOT BIT(2) 6698c2ecf20Sopenharmony_ci#define MLX5_PF_FLAGS_ENABLE BIT(3) 6708c2ecf20Sopenharmony_cistatic int pagefault_real_mr(struct mlx5_ib_mr *mr, struct ib_umem_odp *odp, 6718c2ecf20Sopenharmony_ci u64 user_va, size_t bcnt, u32 *bytes_mapped, 6728c2ecf20Sopenharmony_ci u32 flags) 6738c2ecf20Sopenharmony_ci{ 6748c2ecf20Sopenharmony_ci int page_shift, ret, np; 6758c2ecf20Sopenharmony_ci bool downgrade = flags & MLX5_PF_FLAGS_DOWNGRADE; 6768c2ecf20Sopenharmony_ci u64 access_mask; 6778c2ecf20Sopenharmony_ci u64 start_idx; 6788c2ecf20Sopenharmony_ci bool fault = !(flags & MLX5_PF_FLAGS_SNAPSHOT); 6798c2ecf20Sopenharmony_ci u32 xlt_flags = MLX5_IB_UPD_XLT_ATOMIC; 6808c2ecf20Sopenharmony_ci 6818c2ecf20Sopenharmony_ci if (flags & MLX5_PF_FLAGS_ENABLE) 6828c2ecf20Sopenharmony_ci xlt_flags |= MLX5_IB_UPD_XLT_ENABLE; 6838c2ecf20Sopenharmony_ci 6848c2ecf20Sopenharmony_ci page_shift = odp->page_shift; 6858c2ecf20Sopenharmony_ci start_idx = (user_va - ib_umem_start(odp)) >> page_shift; 6868c2ecf20Sopenharmony_ci access_mask = ODP_READ_ALLOWED_BIT; 6878c2ecf20Sopenharmony_ci 6888c2ecf20Sopenharmony_ci if (odp->umem.writable && !downgrade) 6898c2ecf20Sopenharmony_ci access_mask |= ODP_WRITE_ALLOWED_BIT; 6908c2ecf20Sopenharmony_ci 6918c2ecf20Sopenharmony_ci np = ib_umem_odp_map_dma_and_lock(odp, user_va, bcnt, access_mask, fault); 6928c2ecf20Sopenharmony_ci if (np < 0) 6938c2ecf20Sopenharmony_ci return np; 6948c2ecf20Sopenharmony_ci 6958c2ecf20Sopenharmony_ci /* 6968c2ecf20Sopenharmony_ci * No need to check whether the MTTs really belong to this MR, since 6978c2ecf20Sopenharmony_ci * ib_umem_odp_map_dma_and_lock already checks this. 6988c2ecf20Sopenharmony_ci */ 6998c2ecf20Sopenharmony_ci ret = mlx5_ib_update_xlt(mr, start_idx, np, page_shift, xlt_flags); 7008c2ecf20Sopenharmony_ci mutex_unlock(&odp->umem_mutex); 7018c2ecf20Sopenharmony_ci 7028c2ecf20Sopenharmony_ci if (ret < 0) { 7038c2ecf20Sopenharmony_ci if (ret != -EAGAIN) 7048c2ecf20Sopenharmony_ci mlx5_ib_err(mr->dev, 7058c2ecf20Sopenharmony_ci "Failed to update mkey page tables\n"); 7068c2ecf20Sopenharmony_ci goto out; 7078c2ecf20Sopenharmony_ci } 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_ci if (bytes_mapped) { 7108c2ecf20Sopenharmony_ci u32 new_mappings = (np << page_shift) - 7118c2ecf20Sopenharmony_ci (user_va - round_down(user_va, 1 << page_shift)); 7128c2ecf20Sopenharmony_ci 7138c2ecf20Sopenharmony_ci *bytes_mapped += min_t(u32, new_mappings, bcnt); 7148c2ecf20Sopenharmony_ci } 7158c2ecf20Sopenharmony_ci 7168c2ecf20Sopenharmony_ci return np << (page_shift - PAGE_SHIFT); 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ciout: 7198c2ecf20Sopenharmony_ci return ret; 7208c2ecf20Sopenharmony_ci} 7218c2ecf20Sopenharmony_ci 7228c2ecf20Sopenharmony_cistatic int pagefault_implicit_mr(struct mlx5_ib_mr *imr, 7238c2ecf20Sopenharmony_ci struct ib_umem_odp *odp_imr, u64 user_va, 7248c2ecf20Sopenharmony_ci size_t bcnt, u32 *bytes_mapped, u32 flags) 7258c2ecf20Sopenharmony_ci{ 7268c2ecf20Sopenharmony_ci unsigned long end_idx = (user_va + bcnt - 1) >> MLX5_IMR_MTT_SHIFT; 7278c2ecf20Sopenharmony_ci unsigned long upd_start_idx = end_idx + 1; 7288c2ecf20Sopenharmony_ci unsigned long upd_len = 0; 7298c2ecf20Sopenharmony_ci unsigned long npages = 0; 7308c2ecf20Sopenharmony_ci int err; 7318c2ecf20Sopenharmony_ci int ret; 7328c2ecf20Sopenharmony_ci 7338c2ecf20Sopenharmony_ci if (unlikely(user_va >= mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE || 7348c2ecf20Sopenharmony_ci mlx5_imr_ksm_entries * MLX5_IMR_MTT_SIZE - user_va < bcnt)) 7358c2ecf20Sopenharmony_ci return -EFAULT; 7368c2ecf20Sopenharmony_ci 7378c2ecf20Sopenharmony_ci /* Fault each child mr that intersects with our interval. */ 7388c2ecf20Sopenharmony_ci while (bcnt) { 7398c2ecf20Sopenharmony_ci unsigned long idx = user_va >> MLX5_IMR_MTT_SHIFT; 7408c2ecf20Sopenharmony_ci struct ib_umem_odp *umem_odp; 7418c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mtt; 7428c2ecf20Sopenharmony_ci u64 len; 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci mtt = xa_load(&imr->implicit_children, idx); 7458c2ecf20Sopenharmony_ci if (unlikely(!mtt)) { 7468c2ecf20Sopenharmony_ci mtt = implicit_get_child_mr(imr, idx); 7478c2ecf20Sopenharmony_ci if (IS_ERR(mtt)) { 7488c2ecf20Sopenharmony_ci ret = PTR_ERR(mtt); 7498c2ecf20Sopenharmony_ci goto out; 7508c2ecf20Sopenharmony_ci } 7518c2ecf20Sopenharmony_ci upd_start_idx = min(upd_start_idx, idx); 7528c2ecf20Sopenharmony_ci upd_len = idx - upd_start_idx + 1; 7538c2ecf20Sopenharmony_ci } 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci umem_odp = to_ib_umem_odp(mtt->umem); 7568c2ecf20Sopenharmony_ci len = min_t(u64, user_va + bcnt, ib_umem_end(umem_odp)) - 7578c2ecf20Sopenharmony_ci user_va; 7588c2ecf20Sopenharmony_ci 7598c2ecf20Sopenharmony_ci ret = pagefault_real_mr(mtt, umem_odp, user_va, len, 7608c2ecf20Sopenharmony_ci bytes_mapped, flags); 7618c2ecf20Sopenharmony_ci if (ret < 0) 7628c2ecf20Sopenharmony_ci goto out; 7638c2ecf20Sopenharmony_ci user_va += len; 7648c2ecf20Sopenharmony_ci bcnt -= len; 7658c2ecf20Sopenharmony_ci npages += ret; 7668c2ecf20Sopenharmony_ci } 7678c2ecf20Sopenharmony_ci 7688c2ecf20Sopenharmony_ci ret = npages; 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_ci /* 7718c2ecf20Sopenharmony_ci * Any time the implicit_children are changed we must perform an 7728c2ecf20Sopenharmony_ci * update of the xlt before exiting to ensure the HW and the 7738c2ecf20Sopenharmony_ci * implicit_children remains synchronized. 7748c2ecf20Sopenharmony_ci */ 7758c2ecf20Sopenharmony_ciout: 7768c2ecf20Sopenharmony_ci if (likely(!upd_len)) 7778c2ecf20Sopenharmony_ci return ret; 7788c2ecf20Sopenharmony_ci 7798c2ecf20Sopenharmony_ci /* 7808c2ecf20Sopenharmony_ci * Notice this is not strictly ordered right, the KSM is updated after 7818c2ecf20Sopenharmony_ci * the implicit_children is updated, so a parallel page fault could 7828c2ecf20Sopenharmony_ci * see a MR that is not yet visible in the KSM. This is similar to a 7838c2ecf20Sopenharmony_ci * parallel page fault seeing a MR that is being concurrently removed 7848c2ecf20Sopenharmony_ci * from the KSM. Both of these improbable situations are resolved 7858c2ecf20Sopenharmony_ci * safely by resuming the HW and then taking another page fault. The 7868c2ecf20Sopenharmony_ci * next pagefault handler will see the new information. 7878c2ecf20Sopenharmony_ci */ 7888c2ecf20Sopenharmony_ci mutex_lock(&odp_imr->umem_mutex); 7898c2ecf20Sopenharmony_ci err = mlx5_ib_update_xlt(imr, upd_start_idx, upd_len, 0, 7908c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_INDIRECT | 7918c2ecf20Sopenharmony_ci MLX5_IB_UPD_XLT_ATOMIC); 7928c2ecf20Sopenharmony_ci mutex_unlock(&odp_imr->umem_mutex); 7938c2ecf20Sopenharmony_ci if (err) { 7948c2ecf20Sopenharmony_ci mlx5_ib_err(imr->dev, "Failed to update PAS\n"); 7958c2ecf20Sopenharmony_ci return err; 7968c2ecf20Sopenharmony_ci } 7978c2ecf20Sopenharmony_ci return ret; 7988c2ecf20Sopenharmony_ci} 7998c2ecf20Sopenharmony_ci 8008c2ecf20Sopenharmony_ci/* 8018c2ecf20Sopenharmony_ci * Returns: 8028c2ecf20Sopenharmony_ci * -EFAULT: The io_virt->bcnt is not within the MR, it covers pages that are 8038c2ecf20Sopenharmony_ci * not accessible, or the MR is no longer valid. 8048c2ecf20Sopenharmony_ci * -EAGAIN/-ENOMEM: The operation should be retried 8058c2ecf20Sopenharmony_ci * 8068c2ecf20Sopenharmony_ci * -EINVAL/others: General internal malfunction 8078c2ecf20Sopenharmony_ci * >0: Number of pages mapped 8088c2ecf20Sopenharmony_ci */ 8098c2ecf20Sopenharmony_cistatic int pagefault_mr(struct mlx5_ib_mr *mr, u64 io_virt, size_t bcnt, 8108c2ecf20Sopenharmony_ci u32 *bytes_mapped, u32 flags) 8118c2ecf20Sopenharmony_ci{ 8128c2ecf20Sopenharmony_ci struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 8138c2ecf20Sopenharmony_ci 8148c2ecf20Sopenharmony_ci lockdep_assert_held(&mr->dev->odp_srcu); 8158c2ecf20Sopenharmony_ci if (unlikely(io_virt < mr->mmkey.iova)) 8168c2ecf20Sopenharmony_ci return -EFAULT; 8178c2ecf20Sopenharmony_ci 8188c2ecf20Sopenharmony_ci if (!odp->is_implicit_odp) { 8198c2ecf20Sopenharmony_ci u64 user_va; 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci if (check_add_overflow(io_virt - mr->mmkey.iova, 8228c2ecf20Sopenharmony_ci (u64)odp->umem.address, &user_va)) 8238c2ecf20Sopenharmony_ci return -EFAULT; 8248c2ecf20Sopenharmony_ci if (unlikely(user_va >= ib_umem_end(odp) || 8258c2ecf20Sopenharmony_ci ib_umem_end(odp) - user_va < bcnt)) 8268c2ecf20Sopenharmony_ci return -EFAULT; 8278c2ecf20Sopenharmony_ci return pagefault_real_mr(mr, odp, user_va, bcnt, bytes_mapped, 8288c2ecf20Sopenharmony_ci flags); 8298c2ecf20Sopenharmony_ci } 8308c2ecf20Sopenharmony_ci return pagefault_implicit_mr(mr, odp, io_virt, bcnt, bytes_mapped, 8318c2ecf20Sopenharmony_ci flags); 8328c2ecf20Sopenharmony_ci} 8338c2ecf20Sopenharmony_ci 8348c2ecf20Sopenharmony_ciint mlx5_ib_init_odp_mr(struct mlx5_ib_mr *mr, bool enable) 8358c2ecf20Sopenharmony_ci{ 8368c2ecf20Sopenharmony_ci u32 flags = MLX5_PF_FLAGS_SNAPSHOT; 8378c2ecf20Sopenharmony_ci int ret; 8388c2ecf20Sopenharmony_ci 8398c2ecf20Sopenharmony_ci if (enable) 8408c2ecf20Sopenharmony_ci flags |= MLX5_PF_FLAGS_ENABLE; 8418c2ecf20Sopenharmony_ci 8428c2ecf20Sopenharmony_ci ret = pagefault_real_mr(mr, to_ib_umem_odp(mr->umem), 8438c2ecf20Sopenharmony_ci mr->umem->address, mr->umem->length, NULL, 8448c2ecf20Sopenharmony_ci flags); 8458c2ecf20Sopenharmony_ci return ret >= 0 ? 0 : ret; 8468c2ecf20Sopenharmony_ci} 8478c2ecf20Sopenharmony_ci 8488c2ecf20Sopenharmony_cistruct pf_frame { 8498c2ecf20Sopenharmony_ci struct pf_frame *next; 8508c2ecf20Sopenharmony_ci u32 key; 8518c2ecf20Sopenharmony_ci u64 io_virt; 8528c2ecf20Sopenharmony_ci size_t bcnt; 8538c2ecf20Sopenharmony_ci int depth; 8548c2ecf20Sopenharmony_ci}; 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_cistatic bool mkey_is_eq(struct mlx5_core_mkey *mmkey, u32 key) 8578c2ecf20Sopenharmony_ci{ 8588c2ecf20Sopenharmony_ci if (!mmkey) 8598c2ecf20Sopenharmony_ci return false; 8608c2ecf20Sopenharmony_ci if (mmkey->type == MLX5_MKEY_MW) 8618c2ecf20Sopenharmony_ci return mlx5_base_mkey(mmkey->key) == mlx5_base_mkey(key); 8628c2ecf20Sopenharmony_ci return mmkey->key == key; 8638c2ecf20Sopenharmony_ci} 8648c2ecf20Sopenharmony_ci 8658c2ecf20Sopenharmony_cistatic int get_indirect_num_descs(struct mlx5_core_mkey *mmkey) 8668c2ecf20Sopenharmony_ci{ 8678c2ecf20Sopenharmony_ci struct mlx5_ib_mw *mw; 8688c2ecf20Sopenharmony_ci struct mlx5_ib_devx_mr *devx_mr; 8698c2ecf20Sopenharmony_ci 8708c2ecf20Sopenharmony_ci if (mmkey->type == MLX5_MKEY_MW) { 8718c2ecf20Sopenharmony_ci mw = container_of(mmkey, struct mlx5_ib_mw, mmkey); 8728c2ecf20Sopenharmony_ci return mw->ndescs; 8738c2ecf20Sopenharmony_ci } 8748c2ecf20Sopenharmony_ci 8758c2ecf20Sopenharmony_ci devx_mr = container_of(mmkey, struct mlx5_ib_devx_mr, 8768c2ecf20Sopenharmony_ci mmkey); 8778c2ecf20Sopenharmony_ci return devx_mr->ndescs; 8788c2ecf20Sopenharmony_ci} 8798c2ecf20Sopenharmony_ci 8808c2ecf20Sopenharmony_ci/* 8818c2ecf20Sopenharmony_ci * Handle a single data segment in a page-fault WQE or RDMA region. 8828c2ecf20Sopenharmony_ci * 8838c2ecf20Sopenharmony_ci * Returns number of OS pages retrieved on success. The caller may continue to 8848c2ecf20Sopenharmony_ci * the next data segment. 8858c2ecf20Sopenharmony_ci * Can return the following error codes: 8868c2ecf20Sopenharmony_ci * -EAGAIN to designate a temporary error. The caller will abort handling the 8878c2ecf20Sopenharmony_ci * page fault and resolve it. 8888c2ecf20Sopenharmony_ci * -EFAULT when there's an error mapping the requested pages. The caller will 8898c2ecf20Sopenharmony_ci * abort the page fault handling. 8908c2ecf20Sopenharmony_ci */ 8918c2ecf20Sopenharmony_cistatic int pagefault_single_data_segment(struct mlx5_ib_dev *dev, 8928c2ecf20Sopenharmony_ci struct ib_pd *pd, u32 key, 8938c2ecf20Sopenharmony_ci u64 io_virt, size_t bcnt, 8948c2ecf20Sopenharmony_ci u32 *bytes_committed, 8958c2ecf20Sopenharmony_ci u32 *bytes_mapped) 8968c2ecf20Sopenharmony_ci{ 8978c2ecf20Sopenharmony_ci int npages = 0, srcu_key, ret, i, outlen, cur_outlen = 0, depth = 0; 8988c2ecf20Sopenharmony_ci struct pf_frame *head = NULL, *frame; 8998c2ecf20Sopenharmony_ci struct mlx5_core_mkey *mmkey; 9008c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr; 9018c2ecf20Sopenharmony_ci struct mlx5_klm *pklm; 9028c2ecf20Sopenharmony_ci u32 *out = NULL; 9038c2ecf20Sopenharmony_ci size_t offset; 9048c2ecf20Sopenharmony_ci int ndescs; 9058c2ecf20Sopenharmony_ci 9068c2ecf20Sopenharmony_ci srcu_key = srcu_read_lock(&dev->odp_srcu); 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_ci io_virt += *bytes_committed; 9098c2ecf20Sopenharmony_ci bcnt -= *bytes_committed; 9108c2ecf20Sopenharmony_ci 9118c2ecf20Sopenharmony_cinext_mr: 9128c2ecf20Sopenharmony_ci mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(key)); 9138c2ecf20Sopenharmony_ci if (!mmkey) { 9148c2ecf20Sopenharmony_ci mlx5_ib_dbg( 9158c2ecf20Sopenharmony_ci dev, 9168c2ecf20Sopenharmony_ci "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n", 9178c2ecf20Sopenharmony_ci key); 9188c2ecf20Sopenharmony_ci if (bytes_mapped) 9198c2ecf20Sopenharmony_ci *bytes_mapped += bcnt; 9208c2ecf20Sopenharmony_ci /* 9218c2ecf20Sopenharmony_ci * The user could specify a SGL with multiple lkeys and only 9228c2ecf20Sopenharmony_ci * some of them are ODP. Treat the non-ODP ones as fully 9238c2ecf20Sopenharmony_ci * faulted. 9248c2ecf20Sopenharmony_ci */ 9258c2ecf20Sopenharmony_ci ret = 0; 9268c2ecf20Sopenharmony_ci goto srcu_unlock; 9278c2ecf20Sopenharmony_ci } 9288c2ecf20Sopenharmony_ci if (!mkey_is_eq(mmkey, key)) { 9298c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "failed to find mkey %x\n", key); 9308c2ecf20Sopenharmony_ci ret = -EFAULT; 9318c2ecf20Sopenharmony_ci goto srcu_unlock; 9328c2ecf20Sopenharmony_ci } 9338c2ecf20Sopenharmony_ci 9348c2ecf20Sopenharmony_ci switch (mmkey->type) { 9358c2ecf20Sopenharmony_ci case MLX5_MKEY_MR: 9368c2ecf20Sopenharmony_ci mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); 9378c2ecf20Sopenharmony_ci 9388c2ecf20Sopenharmony_ci ret = pagefault_mr(mr, io_virt, bcnt, bytes_mapped, 0); 9398c2ecf20Sopenharmony_ci if (ret < 0) 9408c2ecf20Sopenharmony_ci goto srcu_unlock; 9418c2ecf20Sopenharmony_ci 9428c2ecf20Sopenharmony_ci mlx5_update_odp_stats(mr, faults, ret); 9438c2ecf20Sopenharmony_ci 9448c2ecf20Sopenharmony_ci npages += ret; 9458c2ecf20Sopenharmony_ci ret = 0; 9468c2ecf20Sopenharmony_ci break; 9478c2ecf20Sopenharmony_ci 9488c2ecf20Sopenharmony_ci case MLX5_MKEY_MW: 9498c2ecf20Sopenharmony_ci case MLX5_MKEY_INDIRECT_DEVX: 9508c2ecf20Sopenharmony_ci ndescs = get_indirect_num_descs(mmkey); 9518c2ecf20Sopenharmony_ci 9528c2ecf20Sopenharmony_ci if (depth >= MLX5_CAP_GEN(dev->mdev, max_indirection)) { 9538c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "indirection level exceeded\n"); 9548c2ecf20Sopenharmony_ci ret = -EFAULT; 9558c2ecf20Sopenharmony_ci goto srcu_unlock; 9568c2ecf20Sopenharmony_ci } 9578c2ecf20Sopenharmony_ci 9588c2ecf20Sopenharmony_ci outlen = MLX5_ST_SZ_BYTES(query_mkey_out) + 9598c2ecf20Sopenharmony_ci sizeof(*pklm) * (ndescs - 2); 9608c2ecf20Sopenharmony_ci 9618c2ecf20Sopenharmony_ci if (outlen > cur_outlen) { 9628c2ecf20Sopenharmony_ci kfree(out); 9638c2ecf20Sopenharmony_ci out = kzalloc(outlen, GFP_KERNEL); 9648c2ecf20Sopenharmony_ci if (!out) { 9658c2ecf20Sopenharmony_ci ret = -ENOMEM; 9668c2ecf20Sopenharmony_ci goto srcu_unlock; 9678c2ecf20Sopenharmony_ci } 9688c2ecf20Sopenharmony_ci cur_outlen = outlen; 9698c2ecf20Sopenharmony_ci } 9708c2ecf20Sopenharmony_ci 9718c2ecf20Sopenharmony_ci pklm = (struct mlx5_klm *)MLX5_ADDR_OF(query_mkey_out, out, 9728c2ecf20Sopenharmony_ci bsf0_klm0_pas_mtt0_1); 9738c2ecf20Sopenharmony_ci 9748c2ecf20Sopenharmony_ci ret = mlx5_core_query_mkey(dev->mdev, mmkey, out, outlen); 9758c2ecf20Sopenharmony_ci if (ret) 9768c2ecf20Sopenharmony_ci goto srcu_unlock; 9778c2ecf20Sopenharmony_ci 9788c2ecf20Sopenharmony_ci offset = io_virt - MLX5_GET64(query_mkey_out, out, 9798c2ecf20Sopenharmony_ci memory_key_mkey_entry.start_addr); 9808c2ecf20Sopenharmony_ci 9818c2ecf20Sopenharmony_ci for (i = 0; bcnt && i < ndescs; i++, pklm++) { 9828c2ecf20Sopenharmony_ci if (offset >= be32_to_cpu(pklm->bcount)) { 9838c2ecf20Sopenharmony_ci offset -= be32_to_cpu(pklm->bcount); 9848c2ecf20Sopenharmony_ci continue; 9858c2ecf20Sopenharmony_ci } 9868c2ecf20Sopenharmony_ci 9878c2ecf20Sopenharmony_ci frame = kzalloc(sizeof(*frame), GFP_KERNEL); 9888c2ecf20Sopenharmony_ci if (!frame) { 9898c2ecf20Sopenharmony_ci ret = -ENOMEM; 9908c2ecf20Sopenharmony_ci goto srcu_unlock; 9918c2ecf20Sopenharmony_ci } 9928c2ecf20Sopenharmony_ci 9938c2ecf20Sopenharmony_ci frame->key = be32_to_cpu(pklm->key); 9948c2ecf20Sopenharmony_ci frame->io_virt = be64_to_cpu(pklm->va) + offset; 9958c2ecf20Sopenharmony_ci frame->bcnt = min_t(size_t, bcnt, 9968c2ecf20Sopenharmony_ci be32_to_cpu(pklm->bcount) - offset); 9978c2ecf20Sopenharmony_ci frame->depth = depth + 1; 9988c2ecf20Sopenharmony_ci frame->next = head; 9998c2ecf20Sopenharmony_ci head = frame; 10008c2ecf20Sopenharmony_ci 10018c2ecf20Sopenharmony_ci bcnt -= frame->bcnt; 10028c2ecf20Sopenharmony_ci offset = 0; 10038c2ecf20Sopenharmony_ci } 10048c2ecf20Sopenharmony_ci break; 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci default: 10078c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "wrong mkey type %d\n", mmkey->type); 10088c2ecf20Sopenharmony_ci ret = -EFAULT; 10098c2ecf20Sopenharmony_ci goto srcu_unlock; 10108c2ecf20Sopenharmony_ci } 10118c2ecf20Sopenharmony_ci 10128c2ecf20Sopenharmony_ci if (head) { 10138c2ecf20Sopenharmony_ci frame = head; 10148c2ecf20Sopenharmony_ci head = frame->next; 10158c2ecf20Sopenharmony_ci 10168c2ecf20Sopenharmony_ci key = frame->key; 10178c2ecf20Sopenharmony_ci io_virt = frame->io_virt; 10188c2ecf20Sopenharmony_ci bcnt = frame->bcnt; 10198c2ecf20Sopenharmony_ci depth = frame->depth; 10208c2ecf20Sopenharmony_ci kfree(frame); 10218c2ecf20Sopenharmony_ci 10228c2ecf20Sopenharmony_ci goto next_mr; 10238c2ecf20Sopenharmony_ci } 10248c2ecf20Sopenharmony_ci 10258c2ecf20Sopenharmony_cisrcu_unlock: 10268c2ecf20Sopenharmony_ci while (head) { 10278c2ecf20Sopenharmony_ci frame = head; 10288c2ecf20Sopenharmony_ci head = frame->next; 10298c2ecf20Sopenharmony_ci kfree(frame); 10308c2ecf20Sopenharmony_ci } 10318c2ecf20Sopenharmony_ci kfree(out); 10328c2ecf20Sopenharmony_ci 10338c2ecf20Sopenharmony_ci srcu_read_unlock(&dev->odp_srcu, srcu_key); 10348c2ecf20Sopenharmony_ci *bytes_committed = 0; 10358c2ecf20Sopenharmony_ci return ret ? ret : npages; 10368c2ecf20Sopenharmony_ci} 10378c2ecf20Sopenharmony_ci 10388c2ecf20Sopenharmony_ci/** 10398c2ecf20Sopenharmony_ci * Parse a series of data segments for page fault handling. 10408c2ecf20Sopenharmony_ci * 10418c2ecf20Sopenharmony_ci * @pfault contains page fault information. 10428c2ecf20Sopenharmony_ci * @wqe points at the first data segment in the WQE. 10438c2ecf20Sopenharmony_ci * @wqe_end points after the end of the WQE. 10448c2ecf20Sopenharmony_ci * @bytes_mapped receives the number of bytes that the function was able to 10458c2ecf20Sopenharmony_ci * map. This allows the caller to decide intelligently whether 10468c2ecf20Sopenharmony_ci * enough memory was mapped to resolve the page fault 10478c2ecf20Sopenharmony_ci * successfully (e.g. enough for the next MTU, or the entire 10488c2ecf20Sopenharmony_ci * WQE). 10498c2ecf20Sopenharmony_ci * @total_wqe_bytes receives the total data size of this WQE in bytes (minus 10508c2ecf20Sopenharmony_ci * the committed bytes). 10518c2ecf20Sopenharmony_ci * 10528c2ecf20Sopenharmony_ci * Returns the number of pages loaded if positive, zero for an empty WQE, or a 10538c2ecf20Sopenharmony_ci * negative error code. 10548c2ecf20Sopenharmony_ci */ 10558c2ecf20Sopenharmony_cistatic int pagefault_data_segments(struct mlx5_ib_dev *dev, 10568c2ecf20Sopenharmony_ci struct mlx5_pagefault *pfault, 10578c2ecf20Sopenharmony_ci void *wqe, 10588c2ecf20Sopenharmony_ci void *wqe_end, u32 *bytes_mapped, 10598c2ecf20Sopenharmony_ci u32 *total_wqe_bytes, bool receive_queue) 10608c2ecf20Sopenharmony_ci{ 10618c2ecf20Sopenharmony_ci int ret = 0, npages = 0; 10628c2ecf20Sopenharmony_ci u64 io_virt; 10638c2ecf20Sopenharmony_ci u32 key; 10648c2ecf20Sopenharmony_ci u32 byte_count; 10658c2ecf20Sopenharmony_ci size_t bcnt; 10668c2ecf20Sopenharmony_ci int inline_segment; 10678c2ecf20Sopenharmony_ci 10688c2ecf20Sopenharmony_ci if (bytes_mapped) 10698c2ecf20Sopenharmony_ci *bytes_mapped = 0; 10708c2ecf20Sopenharmony_ci if (total_wqe_bytes) 10718c2ecf20Sopenharmony_ci *total_wqe_bytes = 0; 10728c2ecf20Sopenharmony_ci 10738c2ecf20Sopenharmony_ci while (wqe < wqe_end) { 10748c2ecf20Sopenharmony_ci struct mlx5_wqe_data_seg *dseg = wqe; 10758c2ecf20Sopenharmony_ci 10768c2ecf20Sopenharmony_ci io_virt = be64_to_cpu(dseg->addr); 10778c2ecf20Sopenharmony_ci key = be32_to_cpu(dseg->lkey); 10788c2ecf20Sopenharmony_ci byte_count = be32_to_cpu(dseg->byte_count); 10798c2ecf20Sopenharmony_ci inline_segment = !!(byte_count & MLX5_INLINE_SEG); 10808c2ecf20Sopenharmony_ci bcnt = byte_count & ~MLX5_INLINE_SEG; 10818c2ecf20Sopenharmony_ci 10828c2ecf20Sopenharmony_ci if (inline_segment) { 10838c2ecf20Sopenharmony_ci bcnt = bcnt & MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK; 10848c2ecf20Sopenharmony_ci wqe += ALIGN(sizeof(struct mlx5_wqe_inline_seg) + bcnt, 10858c2ecf20Sopenharmony_ci 16); 10868c2ecf20Sopenharmony_ci } else { 10878c2ecf20Sopenharmony_ci wqe += sizeof(*dseg); 10888c2ecf20Sopenharmony_ci } 10898c2ecf20Sopenharmony_ci 10908c2ecf20Sopenharmony_ci /* receive WQE end of sg list. */ 10918c2ecf20Sopenharmony_ci if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY && 10928c2ecf20Sopenharmony_ci io_virt == 0) 10938c2ecf20Sopenharmony_ci break; 10948c2ecf20Sopenharmony_ci 10958c2ecf20Sopenharmony_ci if (!inline_segment && total_wqe_bytes) { 10968c2ecf20Sopenharmony_ci *total_wqe_bytes += bcnt - min_t(size_t, bcnt, 10978c2ecf20Sopenharmony_ci pfault->bytes_committed); 10988c2ecf20Sopenharmony_ci } 10998c2ecf20Sopenharmony_ci 11008c2ecf20Sopenharmony_ci /* A zero length data segment designates a length of 2GB. */ 11018c2ecf20Sopenharmony_ci if (bcnt == 0) 11028c2ecf20Sopenharmony_ci bcnt = 1U << 31; 11038c2ecf20Sopenharmony_ci 11048c2ecf20Sopenharmony_ci if (inline_segment || bcnt <= pfault->bytes_committed) { 11058c2ecf20Sopenharmony_ci pfault->bytes_committed -= 11068c2ecf20Sopenharmony_ci min_t(size_t, bcnt, 11078c2ecf20Sopenharmony_ci pfault->bytes_committed); 11088c2ecf20Sopenharmony_ci continue; 11098c2ecf20Sopenharmony_ci } 11108c2ecf20Sopenharmony_ci 11118c2ecf20Sopenharmony_ci ret = pagefault_single_data_segment(dev, NULL, key, 11128c2ecf20Sopenharmony_ci io_virt, bcnt, 11138c2ecf20Sopenharmony_ci &pfault->bytes_committed, 11148c2ecf20Sopenharmony_ci bytes_mapped); 11158c2ecf20Sopenharmony_ci if (ret < 0) 11168c2ecf20Sopenharmony_ci break; 11178c2ecf20Sopenharmony_ci npages += ret; 11188c2ecf20Sopenharmony_ci } 11198c2ecf20Sopenharmony_ci 11208c2ecf20Sopenharmony_ci return ret < 0 ? ret : npages; 11218c2ecf20Sopenharmony_ci} 11228c2ecf20Sopenharmony_ci 11238c2ecf20Sopenharmony_ci/* 11248c2ecf20Sopenharmony_ci * Parse initiator WQE. Advances the wqe pointer to point at the 11258c2ecf20Sopenharmony_ci * scatter-gather list, and set wqe_end to the end of the WQE. 11268c2ecf20Sopenharmony_ci */ 11278c2ecf20Sopenharmony_cistatic int mlx5_ib_mr_initiator_pfault_handler( 11288c2ecf20Sopenharmony_ci struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault, 11298c2ecf20Sopenharmony_ci struct mlx5_ib_qp *qp, void **wqe, void **wqe_end, int wqe_length) 11308c2ecf20Sopenharmony_ci{ 11318c2ecf20Sopenharmony_ci struct mlx5_wqe_ctrl_seg *ctrl = *wqe; 11328c2ecf20Sopenharmony_ci u16 wqe_index = pfault->wqe.wqe_index; 11338c2ecf20Sopenharmony_ci struct mlx5_base_av *av; 11348c2ecf20Sopenharmony_ci unsigned ds, opcode; 11358c2ecf20Sopenharmony_ci u32 qpn = qp->trans_qp.base.mqp.qpn; 11368c2ecf20Sopenharmony_ci 11378c2ecf20Sopenharmony_ci ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK; 11388c2ecf20Sopenharmony_ci if (ds * MLX5_WQE_DS_UNITS > wqe_length) { 11398c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Unable to read the complete WQE. ds = 0x%x, ret = 0x%x\n", 11408c2ecf20Sopenharmony_ci ds, wqe_length); 11418c2ecf20Sopenharmony_ci return -EFAULT; 11428c2ecf20Sopenharmony_ci } 11438c2ecf20Sopenharmony_ci 11448c2ecf20Sopenharmony_ci if (ds == 0) { 11458c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n", 11468c2ecf20Sopenharmony_ci wqe_index, qpn); 11478c2ecf20Sopenharmony_ci return -EFAULT; 11488c2ecf20Sopenharmony_ci } 11498c2ecf20Sopenharmony_ci 11508c2ecf20Sopenharmony_ci *wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS; 11518c2ecf20Sopenharmony_ci *wqe += sizeof(*ctrl); 11528c2ecf20Sopenharmony_ci 11538c2ecf20Sopenharmony_ci opcode = be32_to_cpu(ctrl->opmod_idx_opcode) & 11548c2ecf20Sopenharmony_ci MLX5_WQE_CTRL_OPCODE_MASK; 11558c2ecf20Sopenharmony_ci 11568c2ecf20Sopenharmony_ci if (qp->ibqp.qp_type == IB_QPT_XRC_INI) 11578c2ecf20Sopenharmony_ci *wqe += sizeof(struct mlx5_wqe_xrc_seg); 11588c2ecf20Sopenharmony_ci 11598c2ecf20Sopenharmony_ci if (qp->type == IB_QPT_UD || qp->type == MLX5_IB_QPT_DCI) { 11608c2ecf20Sopenharmony_ci av = *wqe; 11618c2ecf20Sopenharmony_ci if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV)) 11628c2ecf20Sopenharmony_ci *wqe += sizeof(struct mlx5_av); 11638c2ecf20Sopenharmony_ci else 11648c2ecf20Sopenharmony_ci *wqe += sizeof(struct mlx5_base_av); 11658c2ecf20Sopenharmony_ci } 11668c2ecf20Sopenharmony_ci 11678c2ecf20Sopenharmony_ci switch (opcode) { 11688c2ecf20Sopenharmony_ci case MLX5_OPCODE_RDMA_WRITE: 11698c2ecf20Sopenharmony_ci case MLX5_OPCODE_RDMA_WRITE_IMM: 11708c2ecf20Sopenharmony_ci case MLX5_OPCODE_RDMA_READ: 11718c2ecf20Sopenharmony_ci *wqe += sizeof(struct mlx5_wqe_raddr_seg); 11728c2ecf20Sopenharmony_ci break; 11738c2ecf20Sopenharmony_ci case MLX5_OPCODE_ATOMIC_CS: 11748c2ecf20Sopenharmony_ci case MLX5_OPCODE_ATOMIC_FA: 11758c2ecf20Sopenharmony_ci *wqe += sizeof(struct mlx5_wqe_raddr_seg); 11768c2ecf20Sopenharmony_ci *wqe += sizeof(struct mlx5_wqe_atomic_seg); 11778c2ecf20Sopenharmony_ci break; 11788c2ecf20Sopenharmony_ci } 11798c2ecf20Sopenharmony_ci 11808c2ecf20Sopenharmony_ci return 0; 11818c2ecf20Sopenharmony_ci} 11828c2ecf20Sopenharmony_ci 11838c2ecf20Sopenharmony_ci/* 11848c2ecf20Sopenharmony_ci * Parse responder WQE and set wqe_end to the end of the WQE. 11858c2ecf20Sopenharmony_ci */ 11868c2ecf20Sopenharmony_cistatic int mlx5_ib_mr_responder_pfault_handler_srq(struct mlx5_ib_dev *dev, 11878c2ecf20Sopenharmony_ci struct mlx5_ib_srq *srq, 11888c2ecf20Sopenharmony_ci void **wqe, void **wqe_end, 11898c2ecf20Sopenharmony_ci int wqe_length) 11908c2ecf20Sopenharmony_ci{ 11918c2ecf20Sopenharmony_ci int wqe_size = 1 << srq->msrq.wqe_shift; 11928c2ecf20Sopenharmony_ci 11938c2ecf20Sopenharmony_ci if (wqe_size > wqe_length) { 11948c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n"); 11958c2ecf20Sopenharmony_ci return -EFAULT; 11968c2ecf20Sopenharmony_ci } 11978c2ecf20Sopenharmony_ci 11988c2ecf20Sopenharmony_ci *wqe_end = *wqe + wqe_size; 11998c2ecf20Sopenharmony_ci *wqe += sizeof(struct mlx5_wqe_srq_next_seg); 12008c2ecf20Sopenharmony_ci 12018c2ecf20Sopenharmony_ci return 0; 12028c2ecf20Sopenharmony_ci} 12038c2ecf20Sopenharmony_ci 12048c2ecf20Sopenharmony_cistatic int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev, 12058c2ecf20Sopenharmony_ci struct mlx5_ib_qp *qp, 12068c2ecf20Sopenharmony_ci void *wqe, void **wqe_end, 12078c2ecf20Sopenharmony_ci int wqe_length) 12088c2ecf20Sopenharmony_ci{ 12098c2ecf20Sopenharmony_ci struct mlx5_ib_wq *wq = &qp->rq; 12108c2ecf20Sopenharmony_ci int wqe_size = 1 << wq->wqe_shift; 12118c2ecf20Sopenharmony_ci 12128c2ecf20Sopenharmony_ci if (qp->flags_en & MLX5_QP_FLAG_SIGNATURE) { 12138c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n"); 12148c2ecf20Sopenharmony_ci return -EFAULT; 12158c2ecf20Sopenharmony_ci } 12168c2ecf20Sopenharmony_ci 12178c2ecf20Sopenharmony_ci if (wqe_size > wqe_length) { 12188c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n"); 12198c2ecf20Sopenharmony_ci return -EFAULT; 12208c2ecf20Sopenharmony_ci } 12218c2ecf20Sopenharmony_ci 12228c2ecf20Sopenharmony_ci *wqe_end = wqe + wqe_size; 12238c2ecf20Sopenharmony_ci 12248c2ecf20Sopenharmony_ci return 0; 12258c2ecf20Sopenharmony_ci} 12268c2ecf20Sopenharmony_ci 12278c2ecf20Sopenharmony_cistatic inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev, 12288c2ecf20Sopenharmony_ci u32 wq_num, int pf_type) 12298c2ecf20Sopenharmony_ci{ 12308c2ecf20Sopenharmony_ci struct mlx5_core_rsc_common *common = NULL; 12318c2ecf20Sopenharmony_ci struct mlx5_core_srq *srq; 12328c2ecf20Sopenharmony_ci 12338c2ecf20Sopenharmony_ci switch (pf_type) { 12348c2ecf20Sopenharmony_ci case MLX5_WQE_PF_TYPE_RMP: 12358c2ecf20Sopenharmony_ci srq = mlx5_cmd_get_srq(dev, wq_num); 12368c2ecf20Sopenharmony_ci if (srq) 12378c2ecf20Sopenharmony_ci common = &srq->common; 12388c2ecf20Sopenharmony_ci break; 12398c2ecf20Sopenharmony_ci case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE: 12408c2ecf20Sopenharmony_ci case MLX5_WQE_PF_TYPE_RESP: 12418c2ecf20Sopenharmony_ci case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC: 12428c2ecf20Sopenharmony_ci common = mlx5_core_res_hold(dev, wq_num, MLX5_RES_QP); 12438c2ecf20Sopenharmony_ci break; 12448c2ecf20Sopenharmony_ci default: 12458c2ecf20Sopenharmony_ci break; 12468c2ecf20Sopenharmony_ci } 12478c2ecf20Sopenharmony_ci 12488c2ecf20Sopenharmony_ci return common; 12498c2ecf20Sopenharmony_ci} 12508c2ecf20Sopenharmony_ci 12518c2ecf20Sopenharmony_cistatic inline struct mlx5_ib_qp *res_to_qp(struct mlx5_core_rsc_common *res) 12528c2ecf20Sopenharmony_ci{ 12538c2ecf20Sopenharmony_ci struct mlx5_core_qp *mqp = (struct mlx5_core_qp *)res; 12548c2ecf20Sopenharmony_ci 12558c2ecf20Sopenharmony_ci return to_mibqp(mqp); 12568c2ecf20Sopenharmony_ci} 12578c2ecf20Sopenharmony_ci 12588c2ecf20Sopenharmony_cistatic inline struct mlx5_ib_srq *res_to_srq(struct mlx5_core_rsc_common *res) 12598c2ecf20Sopenharmony_ci{ 12608c2ecf20Sopenharmony_ci struct mlx5_core_srq *msrq = 12618c2ecf20Sopenharmony_ci container_of(res, struct mlx5_core_srq, common); 12628c2ecf20Sopenharmony_ci 12638c2ecf20Sopenharmony_ci return to_mibsrq(msrq); 12648c2ecf20Sopenharmony_ci} 12658c2ecf20Sopenharmony_ci 12668c2ecf20Sopenharmony_cistatic void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev, 12678c2ecf20Sopenharmony_ci struct mlx5_pagefault *pfault) 12688c2ecf20Sopenharmony_ci{ 12698c2ecf20Sopenharmony_ci bool sq = pfault->type & MLX5_PFAULT_REQUESTOR; 12708c2ecf20Sopenharmony_ci u16 wqe_index = pfault->wqe.wqe_index; 12718c2ecf20Sopenharmony_ci void *wqe, *wqe_start = NULL, *wqe_end = NULL; 12728c2ecf20Sopenharmony_ci u32 bytes_mapped, total_wqe_bytes; 12738c2ecf20Sopenharmony_ci struct mlx5_core_rsc_common *res; 12748c2ecf20Sopenharmony_ci int resume_with_error = 1; 12758c2ecf20Sopenharmony_ci struct mlx5_ib_qp *qp; 12768c2ecf20Sopenharmony_ci size_t bytes_copied; 12778c2ecf20Sopenharmony_ci int ret = 0; 12788c2ecf20Sopenharmony_ci 12798c2ecf20Sopenharmony_ci res = odp_get_rsc(dev, pfault->wqe.wq_num, pfault->type); 12808c2ecf20Sopenharmony_ci if (!res) { 12818c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "wqe page fault for missing resource %d\n", pfault->wqe.wq_num); 12828c2ecf20Sopenharmony_ci return; 12838c2ecf20Sopenharmony_ci } 12848c2ecf20Sopenharmony_ci 12858c2ecf20Sopenharmony_ci if (res->res != MLX5_RES_QP && res->res != MLX5_RES_SRQ && 12868c2ecf20Sopenharmony_ci res->res != MLX5_RES_XSRQ) { 12878c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "wqe page fault for unsupported type %d\n", 12888c2ecf20Sopenharmony_ci pfault->type); 12898c2ecf20Sopenharmony_ci goto resolve_page_fault; 12908c2ecf20Sopenharmony_ci } 12918c2ecf20Sopenharmony_ci 12928c2ecf20Sopenharmony_ci wqe_start = (void *)__get_free_page(GFP_KERNEL); 12938c2ecf20Sopenharmony_ci if (!wqe_start) { 12948c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n"); 12958c2ecf20Sopenharmony_ci goto resolve_page_fault; 12968c2ecf20Sopenharmony_ci } 12978c2ecf20Sopenharmony_ci 12988c2ecf20Sopenharmony_ci wqe = wqe_start; 12998c2ecf20Sopenharmony_ci qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL; 13008c2ecf20Sopenharmony_ci if (qp && sq) { 13018c2ecf20Sopenharmony_ci ret = mlx5_ib_read_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE, 13028c2ecf20Sopenharmony_ci &bytes_copied); 13038c2ecf20Sopenharmony_ci if (ret) 13048c2ecf20Sopenharmony_ci goto read_user; 13058c2ecf20Sopenharmony_ci ret = mlx5_ib_mr_initiator_pfault_handler( 13068c2ecf20Sopenharmony_ci dev, pfault, qp, &wqe, &wqe_end, bytes_copied); 13078c2ecf20Sopenharmony_ci } else if (qp && !sq) { 13088c2ecf20Sopenharmony_ci ret = mlx5_ib_read_wqe_rq(qp, wqe_index, wqe, PAGE_SIZE, 13098c2ecf20Sopenharmony_ci &bytes_copied); 13108c2ecf20Sopenharmony_ci if (ret) 13118c2ecf20Sopenharmony_ci goto read_user; 13128c2ecf20Sopenharmony_ci ret = mlx5_ib_mr_responder_pfault_handler_rq( 13138c2ecf20Sopenharmony_ci dev, qp, wqe, &wqe_end, bytes_copied); 13148c2ecf20Sopenharmony_ci } else if (!qp) { 13158c2ecf20Sopenharmony_ci struct mlx5_ib_srq *srq = res_to_srq(res); 13168c2ecf20Sopenharmony_ci 13178c2ecf20Sopenharmony_ci ret = mlx5_ib_read_wqe_srq(srq, wqe_index, wqe, PAGE_SIZE, 13188c2ecf20Sopenharmony_ci &bytes_copied); 13198c2ecf20Sopenharmony_ci if (ret) 13208c2ecf20Sopenharmony_ci goto read_user; 13218c2ecf20Sopenharmony_ci ret = mlx5_ib_mr_responder_pfault_handler_srq( 13228c2ecf20Sopenharmony_ci dev, srq, &wqe, &wqe_end, bytes_copied); 13238c2ecf20Sopenharmony_ci } 13248c2ecf20Sopenharmony_ci 13258c2ecf20Sopenharmony_ci if (ret < 0 || wqe >= wqe_end) 13268c2ecf20Sopenharmony_ci goto resolve_page_fault; 13278c2ecf20Sopenharmony_ci 13288c2ecf20Sopenharmony_ci ret = pagefault_data_segments(dev, pfault, wqe, wqe_end, &bytes_mapped, 13298c2ecf20Sopenharmony_ci &total_wqe_bytes, !sq); 13308c2ecf20Sopenharmony_ci if (ret == -EAGAIN) 13318c2ecf20Sopenharmony_ci goto out; 13328c2ecf20Sopenharmony_ci 13338c2ecf20Sopenharmony_ci if (ret < 0 || total_wqe_bytes > bytes_mapped) 13348c2ecf20Sopenharmony_ci goto resolve_page_fault; 13358c2ecf20Sopenharmony_ci 13368c2ecf20Sopenharmony_ciout: 13378c2ecf20Sopenharmony_ci ret = 0; 13388c2ecf20Sopenharmony_ci resume_with_error = 0; 13398c2ecf20Sopenharmony_ci 13408c2ecf20Sopenharmony_ciread_user: 13418c2ecf20Sopenharmony_ci if (ret) 13428c2ecf20Sopenharmony_ci mlx5_ib_err( 13438c2ecf20Sopenharmony_ci dev, 13448c2ecf20Sopenharmony_ci "Failed reading a WQE following page fault, error %d, wqe_index %x, qpn %x\n", 13458c2ecf20Sopenharmony_ci ret, wqe_index, pfault->token); 13468c2ecf20Sopenharmony_ci 13478c2ecf20Sopenharmony_ciresolve_page_fault: 13488c2ecf20Sopenharmony_ci mlx5_ib_page_fault_resume(dev, pfault, resume_with_error); 13498c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n", 13508c2ecf20Sopenharmony_ci pfault->wqe.wq_num, resume_with_error, 13518c2ecf20Sopenharmony_ci pfault->type); 13528c2ecf20Sopenharmony_ci mlx5_core_res_put(res); 13538c2ecf20Sopenharmony_ci free_page((unsigned long)wqe_start); 13548c2ecf20Sopenharmony_ci} 13558c2ecf20Sopenharmony_ci 13568c2ecf20Sopenharmony_cistatic int pages_in_range(u64 address, u32 length) 13578c2ecf20Sopenharmony_ci{ 13588c2ecf20Sopenharmony_ci return (ALIGN(address + length, PAGE_SIZE) - 13598c2ecf20Sopenharmony_ci (address & PAGE_MASK)) >> PAGE_SHIFT; 13608c2ecf20Sopenharmony_ci} 13618c2ecf20Sopenharmony_ci 13628c2ecf20Sopenharmony_cistatic void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev, 13638c2ecf20Sopenharmony_ci struct mlx5_pagefault *pfault) 13648c2ecf20Sopenharmony_ci{ 13658c2ecf20Sopenharmony_ci u64 address; 13668c2ecf20Sopenharmony_ci u32 length; 13678c2ecf20Sopenharmony_ci u32 prefetch_len = pfault->bytes_committed; 13688c2ecf20Sopenharmony_ci int prefetch_activated = 0; 13698c2ecf20Sopenharmony_ci u32 rkey = pfault->rdma.r_key; 13708c2ecf20Sopenharmony_ci int ret; 13718c2ecf20Sopenharmony_ci 13728c2ecf20Sopenharmony_ci /* The RDMA responder handler handles the page fault in two parts. 13738c2ecf20Sopenharmony_ci * First it brings the necessary pages for the current packet 13748c2ecf20Sopenharmony_ci * (and uses the pfault context), and then (after resuming the QP) 13758c2ecf20Sopenharmony_ci * prefetches more pages. The second operation cannot use the pfault 13768c2ecf20Sopenharmony_ci * context and therefore uses the dummy_pfault context allocated on 13778c2ecf20Sopenharmony_ci * the stack */ 13788c2ecf20Sopenharmony_ci pfault->rdma.rdma_va += pfault->bytes_committed; 13798c2ecf20Sopenharmony_ci pfault->rdma.rdma_op_len -= min(pfault->bytes_committed, 13808c2ecf20Sopenharmony_ci pfault->rdma.rdma_op_len); 13818c2ecf20Sopenharmony_ci pfault->bytes_committed = 0; 13828c2ecf20Sopenharmony_ci 13838c2ecf20Sopenharmony_ci address = pfault->rdma.rdma_va; 13848c2ecf20Sopenharmony_ci length = pfault->rdma.rdma_op_len; 13858c2ecf20Sopenharmony_ci 13868c2ecf20Sopenharmony_ci /* For some operations, the hardware cannot tell the exact message 13878c2ecf20Sopenharmony_ci * length, and in those cases it reports zero. Use prefetch 13888c2ecf20Sopenharmony_ci * logic. */ 13898c2ecf20Sopenharmony_ci if (length == 0) { 13908c2ecf20Sopenharmony_ci prefetch_activated = 1; 13918c2ecf20Sopenharmony_ci length = pfault->rdma.packet_size; 13928c2ecf20Sopenharmony_ci prefetch_len = min(MAX_PREFETCH_LEN, prefetch_len); 13938c2ecf20Sopenharmony_ci } 13948c2ecf20Sopenharmony_ci 13958c2ecf20Sopenharmony_ci ret = pagefault_single_data_segment(dev, NULL, rkey, address, length, 13968c2ecf20Sopenharmony_ci &pfault->bytes_committed, NULL); 13978c2ecf20Sopenharmony_ci if (ret == -EAGAIN) { 13988c2ecf20Sopenharmony_ci /* We're racing with an invalidation, don't prefetch */ 13998c2ecf20Sopenharmony_ci prefetch_activated = 0; 14008c2ecf20Sopenharmony_ci } else if (ret < 0 || pages_in_range(address, length) > ret) { 14018c2ecf20Sopenharmony_ci mlx5_ib_page_fault_resume(dev, pfault, 1); 14028c2ecf20Sopenharmony_ci if (ret != -ENOENT) 14038c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "PAGE FAULT error %d. QP 0x%x, type: 0x%x\n", 14048c2ecf20Sopenharmony_ci ret, pfault->token, pfault->type); 14058c2ecf20Sopenharmony_ci return; 14068c2ecf20Sopenharmony_ci } 14078c2ecf20Sopenharmony_ci 14088c2ecf20Sopenharmony_ci mlx5_ib_page_fault_resume(dev, pfault, 0); 14098c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x, type: 0x%x, prefetch_activated: %d\n", 14108c2ecf20Sopenharmony_ci pfault->token, pfault->type, 14118c2ecf20Sopenharmony_ci prefetch_activated); 14128c2ecf20Sopenharmony_ci 14138c2ecf20Sopenharmony_ci /* At this point, there might be a new pagefault already arriving in 14148c2ecf20Sopenharmony_ci * the eq, switch to the dummy pagefault for the rest of the 14158c2ecf20Sopenharmony_ci * processing. We're still OK with the objects being alive as the 14168c2ecf20Sopenharmony_ci * work-queue is being fenced. */ 14178c2ecf20Sopenharmony_ci 14188c2ecf20Sopenharmony_ci if (prefetch_activated) { 14198c2ecf20Sopenharmony_ci u32 bytes_committed = 0; 14208c2ecf20Sopenharmony_ci 14218c2ecf20Sopenharmony_ci ret = pagefault_single_data_segment(dev, NULL, rkey, address, 14228c2ecf20Sopenharmony_ci prefetch_len, 14238c2ecf20Sopenharmony_ci &bytes_committed, NULL); 14248c2ecf20Sopenharmony_ci if (ret < 0 && ret != -EAGAIN) { 14258c2ecf20Sopenharmony_ci mlx5_ib_dbg(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n", 14268c2ecf20Sopenharmony_ci ret, pfault->token, address, prefetch_len); 14278c2ecf20Sopenharmony_ci } 14288c2ecf20Sopenharmony_ci } 14298c2ecf20Sopenharmony_ci} 14308c2ecf20Sopenharmony_ci 14318c2ecf20Sopenharmony_cistatic void mlx5_ib_pfault(struct mlx5_ib_dev *dev, struct mlx5_pagefault *pfault) 14328c2ecf20Sopenharmony_ci{ 14338c2ecf20Sopenharmony_ci u8 event_subtype = pfault->event_subtype; 14348c2ecf20Sopenharmony_ci 14358c2ecf20Sopenharmony_ci switch (event_subtype) { 14368c2ecf20Sopenharmony_ci case MLX5_PFAULT_SUBTYPE_WQE: 14378c2ecf20Sopenharmony_ci mlx5_ib_mr_wqe_pfault_handler(dev, pfault); 14388c2ecf20Sopenharmony_ci break; 14398c2ecf20Sopenharmony_ci case MLX5_PFAULT_SUBTYPE_RDMA: 14408c2ecf20Sopenharmony_ci mlx5_ib_mr_rdma_pfault_handler(dev, pfault); 14418c2ecf20Sopenharmony_ci break; 14428c2ecf20Sopenharmony_ci default: 14438c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Invalid page fault event subtype: 0x%x\n", 14448c2ecf20Sopenharmony_ci event_subtype); 14458c2ecf20Sopenharmony_ci mlx5_ib_page_fault_resume(dev, pfault, 1); 14468c2ecf20Sopenharmony_ci } 14478c2ecf20Sopenharmony_ci} 14488c2ecf20Sopenharmony_ci 14498c2ecf20Sopenharmony_cistatic void mlx5_ib_eqe_pf_action(struct work_struct *work) 14508c2ecf20Sopenharmony_ci{ 14518c2ecf20Sopenharmony_ci struct mlx5_pagefault *pfault = container_of(work, 14528c2ecf20Sopenharmony_ci struct mlx5_pagefault, 14538c2ecf20Sopenharmony_ci work); 14548c2ecf20Sopenharmony_ci struct mlx5_ib_pf_eq *eq = pfault->eq; 14558c2ecf20Sopenharmony_ci 14568c2ecf20Sopenharmony_ci mlx5_ib_pfault(eq->dev, pfault); 14578c2ecf20Sopenharmony_ci mempool_free(pfault, eq->pool); 14588c2ecf20Sopenharmony_ci} 14598c2ecf20Sopenharmony_ci 14608c2ecf20Sopenharmony_cistatic void mlx5_ib_eq_pf_process(struct mlx5_ib_pf_eq *eq) 14618c2ecf20Sopenharmony_ci{ 14628c2ecf20Sopenharmony_ci struct mlx5_eqe_page_fault *pf_eqe; 14638c2ecf20Sopenharmony_ci struct mlx5_pagefault *pfault; 14648c2ecf20Sopenharmony_ci struct mlx5_eqe *eqe; 14658c2ecf20Sopenharmony_ci int cc = 0; 14668c2ecf20Sopenharmony_ci 14678c2ecf20Sopenharmony_ci while ((eqe = mlx5_eq_get_eqe(eq->core, cc))) { 14688c2ecf20Sopenharmony_ci pfault = mempool_alloc(eq->pool, GFP_ATOMIC); 14698c2ecf20Sopenharmony_ci if (!pfault) { 14708c2ecf20Sopenharmony_ci schedule_work(&eq->work); 14718c2ecf20Sopenharmony_ci break; 14728c2ecf20Sopenharmony_ci } 14738c2ecf20Sopenharmony_ci 14748c2ecf20Sopenharmony_ci pf_eqe = &eqe->data.page_fault; 14758c2ecf20Sopenharmony_ci pfault->event_subtype = eqe->sub_type; 14768c2ecf20Sopenharmony_ci pfault->bytes_committed = be32_to_cpu(pf_eqe->bytes_committed); 14778c2ecf20Sopenharmony_ci 14788c2ecf20Sopenharmony_ci mlx5_ib_dbg(eq->dev, 14798c2ecf20Sopenharmony_ci "PAGE_FAULT: subtype: 0x%02x, bytes_committed: 0x%06x\n", 14808c2ecf20Sopenharmony_ci eqe->sub_type, pfault->bytes_committed); 14818c2ecf20Sopenharmony_ci 14828c2ecf20Sopenharmony_ci switch (eqe->sub_type) { 14838c2ecf20Sopenharmony_ci case MLX5_PFAULT_SUBTYPE_RDMA: 14848c2ecf20Sopenharmony_ci /* RDMA based event */ 14858c2ecf20Sopenharmony_ci pfault->type = 14868c2ecf20Sopenharmony_ci be32_to_cpu(pf_eqe->rdma.pftype_token) >> 24; 14878c2ecf20Sopenharmony_ci pfault->token = 14888c2ecf20Sopenharmony_ci be32_to_cpu(pf_eqe->rdma.pftype_token) & 14898c2ecf20Sopenharmony_ci MLX5_24BIT_MASK; 14908c2ecf20Sopenharmony_ci pfault->rdma.r_key = 14918c2ecf20Sopenharmony_ci be32_to_cpu(pf_eqe->rdma.r_key); 14928c2ecf20Sopenharmony_ci pfault->rdma.packet_size = 14938c2ecf20Sopenharmony_ci be16_to_cpu(pf_eqe->rdma.packet_length); 14948c2ecf20Sopenharmony_ci pfault->rdma.rdma_op_len = 14958c2ecf20Sopenharmony_ci be32_to_cpu(pf_eqe->rdma.rdma_op_len); 14968c2ecf20Sopenharmony_ci pfault->rdma.rdma_va = 14978c2ecf20Sopenharmony_ci be64_to_cpu(pf_eqe->rdma.rdma_va); 14988c2ecf20Sopenharmony_ci mlx5_ib_dbg(eq->dev, 14998c2ecf20Sopenharmony_ci "PAGE_FAULT: type:0x%x, token: 0x%06x, r_key: 0x%08x\n", 15008c2ecf20Sopenharmony_ci pfault->type, pfault->token, 15018c2ecf20Sopenharmony_ci pfault->rdma.r_key); 15028c2ecf20Sopenharmony_ci mlx5_ib_dbg(eq->dev, 15038c2ecf20Sopenharmony_ci "PAGE_FAULT: rdma_op_len: 0x%08x, rdma_va: 0x%016llx\n", 15048c2ecf20Sopenharmony_ci pfault->rdma.rdma_op_len, 15058c2ecf20Sopenharmony_ci pfault->rdma.rdma_va); 15068c2ecf20Sopenharmony_ci break; 15078c2ecf20Sopenharmony_ci 15088c2ecf20Sopenharmony_ci case MLX5_PFAULT_SUBTYPE_WQE: 15098c2ecf20Sopenharmony_ci /* WQE based event */ 15108c2ecf20Sopenharmony_ci pfault->type = 15118c2ecf20Sopenharmony_ci (be32_to_cpu(pf_eqe->wqe.pftype_wq) >> 24) & 0x7; 15128c2ecf20Sopenharmony_ci pfault->token = 15138c2ecf20Sopenharmony_ci be32_to_cpu(pf_eqe->wqe.token); 15148c2ecf20Sopenharmony_ci pfault->wqe.wq_num = 15158c2ecf20Sopenharmony_ci be32_to_cpu(pf_eqe->wqe.pftype_wq) & 15168c2ecf20Sopenharmony_ci MLX5_24BIT_MASK; 15178c2ecf20Sopenharmony_ci pfault->wqe.wqe_index = 15188c2ecf20Sopenharmony_ci be16_to_cpu(pf_eqe->wqe.wqe_index); 15198c2ecf20Sopenharmony_ci pfault->wqe.packet_size = 15208c2ecf20Sopenharmony_ci be16_to_cpu(pf_eqe->wqe.packet_length); 15218c2ecf20Sopenharmony_ci mlx5_ib_dbg(eq->dev, 15228c2ecf20Sopenharmony_ci "PAGE_FAULT: type:0x%x, token: 0x%06x, wq_num: 0x%06x, wqe_index: 0x%04x\n", 15238c2ecf20Sopenharmony_ci pfault->type, pfault->token, 15248c2ecf20Sopenharmony_ci pfault->wqe.wq_num, 15258c2ecf20Sopenharmony_ci pfault->wqe.wqe_index); 15268c2ecf20Sopenharmony_ci break; 15278c2ecf20Sopenharmony_ci 15288c2ecf20Sopenharmony_ci default: 15298c2ecf20Sopenharmony_ci mlx5_ib_warn(eq->dev, 15308c2ecf20Sopenharmony_ci "Unsupported page fault event sub-type: 0x%02hhx\n", 15318c2ecf20Sopenharmony_ci eqe->sub_type); 15328c2ecf20Sopenharmony_ci /* Unsupported page faults should still be 15338c2ecf20Sopenharmony_ci * resolved by the page fault handler 15348c2ecf20Sopenharmony_ci */ 15358c2ecf20Sopenharmony_ci } 15368c2ecf20Sopenharmony_ci 15378c2ecf20Sopenharmony_ci pfault->eq = eq; 15388c2ecf20Sopenharmony_ci INIT_WORK(&pfault->work, mlx5_ib_eqe_pf_action); 15398c2ecf20Sopenharmony_ci queue_work(eq->wq, &pfault->work); 15408c2ecf20Sopenharmony_ci 15418c2ecf20Sopenharmony_ci cc = mlx5_eq_update_cc(eq->core, ++cc); 15428c2ecf20Sopenharmony_ci } 15438c2ecf20Sopenharmony_ci 15448c2ecf20Sopenharmony_ci mlx5_eq_update_ci(eq->core, cc, 1); 15458c2ecf20Sopenharmony_ci} 15468c2ecf20Sopenharmony_ci 15478c2ecf20Sopenharmony_cistatic int mlx5_ib_eq_pf_int(struct notifier_block *nb, unsigned long type, 15488c2ecf20Sopenharmony_ci void *data) 15498c2ecf20Sopenharmony_ci{ 15508c2ecf20Sopenharmony_ci struct mlx5_ib_pf_eq *eq = 15518c2ecf20Sopenharmony_ci container_of(nb, struct mlx5_ib_pf_eq, irq_nb); 15528c2ecf20Sopenharmony_ci unsigned long flags; 15538c2ecf20Sopenharmony_ci 15548c2ecf20Sopenharmony_ci if (spin_trylock_irqsave(&eq->lock, flags)) { 15558c2ecf20Sopenharmony_ci mlx5_ib_eq_pf_process(eq); 15568c2ecf20Sopenharmony_ci spin_unlock_irqrestore(&eq->lock, flags); 15578c2ecf20Sopenharmony_ci } else { 15588c2ecf20Sopenharmony_ci schedule_work(&eq->work); 15598c2ecf20Sopenharmony_ci } 15608c2ecf20Sopenharmony_ci 15618c2ecf20Sopenharmony_ci return IRQ_HANDLED; 15628c2ecf20Sopenharmony_ci} 15638c2ecf20Sopenharmony_ci 15648c2ecf20Sopenharmony_ci/* mempool_refill() was proposed but unfortunately wasn't accepted 15658c2ecf20Sopenharmony_ci * http://lkml.iu.edu/hypermail/linux/kernel/1512.1/05073.html 15668c2ecf20Sopenharmony_ci * Cheap workaround. 15678c2ecf20Sopenharmony_ci */ 15688c2ecf20Sopenharmony_cistatic void mempool_refill(mempool_t *pool) 15698c2ecf20Sopenharmony_ci{ 15708c2ecf20Sopenharmony_ci while (pool->curr_nr < pool->min_nr) 15718c2ecf20Sopenharmony_ci mempool_free(mempool_alloc(pool, GFP_KERNEL), pool); 15728c2ecf20Sopenharmony_ci} 15738c2ecf20Sopenharmony_ci 15748c2ecf20Sopenharmony_cistatic void mlx5_ib_eq_pf_action(struct work_struct *work) 15758c2ecf20Sopenharmony_ci{ 15768c2ecf20Sopenharmony_ci struct mlx5_ib_pf_eq *eq = 15778c2ecf20Sopenharmony_ci container_of(work, struct mlx5_ib_pf_eq, work); 15788c2ecf20Sopenharmony_ci 15798c2ecf20Sopenharmony_ci mempool_refill(eq->pool); 15808c2ecf20Sopenharmony_ci 15818c2ecf20Sopenharmony_ci spin_lock_irq(&eq->lock); 15828c2ecf20Sopenharmony_ci mlx5_ib_eq_pf_process(eq); 15838c2ecf20Sopenharmony_ci spin_unlock_irq(&eq->lock); 15848c2ecf20Sopenharmony_ci} 15858c2ecf20Sopenharmony_ci 15868c2ecf20Sopenharmony_cienum { 15878c2ecf20Sopenharmony_ci MLX5_IB_NUM_PF_EQE = 0x1000, 15888c2ecf20Sopenharmony_ci MLX5_IB_NUM_PF_DRAIN = 64, 15898c2ecf20Sopenharmony_ci}; 15908c2ecf20Sopenharmony_ci 15918c2ecf20Sopenharmony_cistatic int 15928c2ecf20Sopenharmony_cimlx5_ib_create_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) 15938c2ecf20Sopenharmony_ci{ 15948c2ecf20Sopenharmony_ci struct mlx5_eq_param param = {}; 15958c2ecf20Sopenharmony_ci int err; 15968c2ecf20Sopenharmony_ci 15978c2ecf20Sopenharmony_ci INIT_WORK(&eq->work, mlx5_ib_eq_pf_action); 15988c2ecf20Sopenharmony_ci spin_lock_init(&eq->lock); 15998c2ecf20Sopenharmony_ci eq->dev = dev; 16008c2ecf20Sopenharmony_ci 16018c2ecf20Sopenharmony_ci eq->pool = mempool_create_kmalloc_pool(MLX5_IB_NUM_PF_DRAIN, 16028c2ecf20Sopenharmony_ci sizeof(struct mlx5_pagefault)); 16038c2ecf20Sopenharmony_ci if (!eq->pool) 16048c2ecf20Sopenharmony_ci return -ENOMEM; 16058c2ecf20Sopenharmony_ci 16068c2ecf20Sopenharmony_ci eq->wq = alloc_workqueue("mlx5_ib_page_fault", 16078c2ecf20Sopenharmony_ci WQ_HIGHPRI | WQ_UNBOUND | WQ_MEM_RECLAIM, 16088c2ecf20Sopenharmony_ci MLX5_NUM_CMD_EQE); 16098c2ecf20Sopenharmony_ci if (!eq->wq) { 16108c2ecf20Sopenharmony_ci err = -ENOMEM; 16118c2ecf20Sopenharmony_ci goto err_mempool; 16128c2ecf20Sopenharmony_ci } 16138c2ecf20Sopenharmony_ci 16148c2ecf20Sopenharmony_ci eq->irq_nb.notifier_call = mlx5_ib_eq_pf_int; 16158c2ecf20Sopenharmony_ci param = (struct mlx5_eq_param) { 16168c2ecf20Sopenharmony_ci .irq_index = 0, 16178c2ecf20Sopenharmony_ci .nent = MLX5_IB_NUM_PF_EQE, 16188c2ecf20Sopenharmony_ci }; 16198c2ecf20Sopenharmony_ci param.mask[0] = 1ull << MLX5_EVENT_TYPE_PAGE_FAULT; 16208c2ecf20Sopenharmony_ci eq->core = mlx5_eq_create_generic(dev->mdev, ¶m); 16218c2ecf20Sopenharmony_ci if (IS_ERR(eq->core)) { 16228c2ecf20Sopenharmony_ci err = PTR_ERR(eq->core); 16238c2ecf20Sopenharmony_ci goto err_wq; 16248c2ecf20Sopenharmony_ci } 16258c2ecf20Sopenharmony_ci err = mlx5_eq_enable(dev->mdev, eq->core, &eq->irq_nb); 16268c2ecf20Sopenharmony_ci if (err) { 16278c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "failed to enable odp EQ %d\n", err); 16288c2ecf20Sopenharmony_ci goto err_eq; 16298c2ecf20Sopenharmony_ci } 16308c2ecf20Sopenharmony_ci 16318c2ecf20Sopenharmony_ci return 0; 16328c2ecf20Sopenharmony_cierr_eq: 16338c2ecf20Sopenharmony_ci mlx5_eq_destroy_generic(dev->mdev, eq->core); 16348c2ecf20Sopenharmony_cierr_wq: 16358c2ecf20Sopenharmony_ci destroy_workqueue(eq->wq); 16368c2ecf20Sopenharmony_cierr_mempool: 16378c2ecf20Sopenharmony_ci mempool_destroy(eq->pool); 16388c2ecf20Sopenharmony_ci return err; 16398c2ecf20Sopenharmony_ci} 16408c2ecf20Sopenharmony_ci 16418c2ecf20Sopenharmony_cistatic int 16428c2ecf20Sopenharmony_cimlx5_ib_destroy_pf_eq(struct mlx5_ib_dev *dev, struct mlx5_ib_pf_eq *eq) 16438c2ecf20Sopenharmony_ci{ 16448c2ecf20Sopenharmony_ci int err; 16458c2ecf20Sopenharmony_ci 16468c2ecf20Sopenharmony_ci mlx5_eq_disable(dev->mdev, eq->core, &eq->irq_nb); 16478c2ecf20Sopenharmony_ci err = mlx5_eq_destroy_generic(dev->mdev, eq->core); 16488c2ecf20Sopenharmony_ci cancel_work_sync(&eq->work); 16498c2ecf20Sopenharmony_ci destroy_workqueue(eq->wq); 16508c2ecf20Sopenharmony_ci mempool_destroy(eq->pool); 16518c2ecf20Sopenharmony_ci 16528c2ecf20Sopenharmony_ci return err; 16538c2ecf20Sopenharmony_ci} 16548c2ecf20Sopenharmony_ci 16558c2ecf20Sopenharmony_civoid mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) 16568c2ecf20Sopenharmony_ci{ 16578c2ecf20Sopenharmony_ci if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 16588c2ecf20Sopenharmony_ci return; 16598c2ecf20Sopenharmony_ci 16608c2ecf20Sopenharmony_ci switch (ent->order - 2) { 16618c2ecf20Sopenharmony_ci case MLX5_IMR_MTT_CACHE_ENTRY: 16628c2ecf20Sopenharmony_ci ent->page = PAGE_SHIFT; 16638c2ecf20Sopenharmony_ci ent->xlt = MLX5_IMR_MTT_ENTRIES * 16648c2ecf20Sopenharmony_ci sizeof(struct mlx5_mtt) / 16658c2ecf20Sopenharmony_ci MLX5_IB_UMR_OCTOWORD; 16668c2ecf20Sopenharmony_ci ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; 16678c2ecf20Sopenharmony_ci ent->limit = 0; 16688c2ecf20Sopenharmony_ci break; 16698c2ecf20Sopenharmony_ci 16708c2ecf20Sopenharmony_ci case MLX5_IMR_KSM_CACHE_ENTRY: 16718c2ecf20Sopenharmony_ci ent->page = MLX5_KSM_PAGE_SHIFT; 16728c2ecf20Sopenharmony_ci ent->xlt = mlx5_imr_ksm_entries * 16738c2ecf20Sopenharmony_ci sizeof(struct mlx5_klm) / 16748c2ecf20Sopenharmony_ci MLX5_IB_UMR_OCTOWORD; 16758c2ecf20Sopenharmony_ci ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM; 16768c2ecf20Sopenharmony_ci ent->limit = 0; 16778c2ecf20Sopenharmony_ci break; 16788c2ecf20Sopenharmony_ci } 16798c2ecf20Sopenharmony_ci} 16808c2ecf20Sopenharmony_ci 16818c2ecf20Sopenharmony_cistatic const struct ib_device_ops mlx5_ib_dev_odp_ops = { 16828c2ecf20Sopenharmony_ci .advise_mr = mlx5_ib_advise_mr, 16838c2ecf20Sopenharmony_ci}; 16848c2ecf20Sopenharmony_ci 16858c2ecf20Sopenharmony_ciint mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev) 16868c2ecf20Sopenharmony_ci{ 16878c2ecf20Sopenharmony_ci int ret = 0; 16888c2ecf20Sopenharmony_ci 16898c2ecf20Sopenharmony_ci if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) 16908c2ecf20Sopenharmony_ci return ret; 16918c2ecf20Sopenharmony_ci 16928c2ecf20Sopenharmony_ci ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops); 16938c2ecf20Sopenharmony_ci 16948c2ecf20Sopenharmony_ci if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) { 16958c2ecf20Sopenharmony_ci ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey); 16968c2ecf20Sopenharmony_ci if (ret) { 16978c2ecf20Sopenharmony_ci mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret); 16988c2ecf20Sopenharmony_ci return ret; 16998c2ecf20Sopenharmony_ci } 17008c2ecf20Sopenharmony_ci } 17018c2ecf20Sopenharmony_ci 17028c2ecf20Sopenharmony_ci ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq); 17038c2ecf20Sopenharmony_ci 17048c2ecf20Sopenharmony_ci return ret; 17058c2ecf20Sopenharmony_ci} 17068c2ecf20Sopenharmony_ci 17078c2ecf20Sopenharmony_civoid mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev) 17088c2ecf20Sopenharmony_ci{ 17098c2ecf20Sopenharmony_ci if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT)) 17108c2ecf20Sopenharmony_ci return; 17118c2ecf20Sopenharmony_ci 17128c2ecf20Sopenharmony_ci mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq); 17138c2ecf20Sopenharmony_ci} 17148c2ecf20Sopenharmony_ci 17158c2ecf20Sopenharmony_ciint mlx5_ib_odp_init(void) 17168c2ecf20Sopenharmony_ci{ 17178c2ecf20Sopenharmony_ci mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) - 17188c2ecf20Sopenharmony_ci MLX5_IMR_MTT_BITS); 17198c2ecf20Sopenharmony_ci 17208c2ecf20Sopenharmony_ci return 0; 17218c2ecf20Sopenharmony_ci} 17228c2ecf20Sopenharmony_ci 17238c2ecf20Sopenharmony_cistruct prefetch_mr_work { 17248c2ecf20Sopenharmony_ci struct work_struct work; 17258c2ecf20Sopenharmony_ci u32 pf_flags; 17268c2ecf20Sopenharmony_ci u32 num_sge; 17278c2ecf20Sopenharmony_ci struct { 17288c2ecf20Sopenharmony_ci u64 io_virt; 17298c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr; 17308c2ecf20Sopenharmony_ci size_t length; 17318c2ecf20Sopenharmony_ci } frags[]; 17328c2ecf20Sopenharmony_ci}; 17338c2ecf20Sopenharmony_ci 17348c2ecf20Sopenharmony_cistatic void destroy_prefetch_work(struct prefetch_mr_work *work) 17358c2ecf20Sopenharmony_ci{ 17368c2ecf20Sopenharmony_ci u32 i; 17378c2ecf20Sopenharmony_ci 17388c2ecf20Sopenharmony_ci for (i = 0; i < work->num_sge; ++i) 17398c2ecf20Sopenharmony_ci if (atomic_dec_and_test(&work->frags[i].mr->num_deferred_work)) 17408c2ecf20Sopenharmony_ci wake_up(&work->frags[i].mr->q_deferred_work); 17418c2ecf20Sopenharmony_ci kvfree(work); 17428c2ecf20Sopenharmony_ci} 17438c2ecf20Sopenharmony_ci 17448c2ecf20Sopenharmony_cistatic struct mlx5_ib_mr * 17458c2ecf20Sopenharmony_ciget_prefetchable_mr(struct ib_pd *pd, enum ib_uverbs_advise_mr_advice advice, 17468c2ecf20Sopenharmony_ci u32 lkey) 17478c2ecf20Sopenharmony_ci{ 17488c2ecf20Sopenharmony_ci struct mlx5_ib_dev *dev = to_mdev(pd->device); 17498c2ecf20Sopenharmony_ci struct mlx5_core_mkey *mmkey; 17508c2ecf20Sopenharmony_ci struct ib_umem_odp *odp; 17518c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr; 17528c2ecf20Sopenharmony_ci 17538c2ecf20Sopenharmony_ci lockdep_assert_held(&dev->odp_srcu); 17548c2ecf20Sopenharmony_ci 17558c2ecf20Sopenharmony_ci mmkey = xa_load(&dev->odp_mkeys, mlx5_base_mkey(lkey)); 17568c2ecf20Sopenharmony_ci if (!mmkey || mmkey->key != lkey || mmkey->type != MLX5_MKEY_MR) 17578c2ecf20Sopenharmony_ci return NULL; 17588c2ecf20Sopenharmony_ci 17598c2ecf20Sopenharmony_ci mr = container_of(mmkey, struct mlx5_ib_mr, mmkey); 17608c2ecf20Sopenharmony_ci 17618c2ecf20Sopenharmony_ci if (mr->ibmr.pd != pd) 17628c2ecf20Sopenharmony_ci return NULL; 17638c2ecf20Sopenharmony_ci 17648c2ecf20Sopenharmony_ci odp = to_ib_umem_odp(mr->umem); 17658c2ecf20Sopenharmony_ci 17668c2ecf20Sopenharmony_ci /* prefetch with write-access must be supported by the MR */ 17678c2ecf20Sopenharmony_ci if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE && 17688c2ecf20Sopenharmony_ci !odp->umem.writable) 17698c2ecf20Sopenharmony_ci return NULL; 17708c2ecf20Sopenharmony_ci 17718c2ecf20Sopenharmony_ci return mr; 17728c2ecf20Sopenharmony_ci} 17738c2ecf20Sopenharmony_ci 17748c2ecf20Sopenharmony_cistatic void mlx5_ib_prefetch_mr_work(struct work_struct *w) 17758c2ecf20Sopenharmony_ci{ 17768c2ecf20Sopenharmony_ci struct prefetch_mr_work *work = 17778c2ecf20Sopenharmony_ci container_of(w, struct prefetch_mr_work, work); 17788c2ecf20Sopenharmony_ci struct mlx5_ib_dev *dev; 17798c2ecf20Sopenharmony_ci u32 bytes_mapped = 0; 17808c2ecf20Sopenharmony_ci int srcu_key; 17818c2ecf20Sopenharmony_ci int ret; 17828c2ecf20Sopenharmony_ci u32 i; 17838c2ecf20Sopenharmony_ci 17848c2ecf20Sopenharmony_ci /* We rely on IB/core that work is executed if we have num_sge != 0 only. */ 17858c2ecf20Sopenharmony_ci WARN_ON(!work->num_sge); 17868c2ecf20Sopenharmony_ci dev = work->frags[0].mr->dev; 17878c2ecf20Sopenharmony_ci /* SRCU should be held when calling to mlx5_odp_populate_xlt() */ 17888c2ecf20Sopenharmony_ci srcu_key = srcu_read_lock(&dev->odp_srcu); 17898c2ecf20Sopenharmony_ci for (i = 0; i < work->num_sge; ++i) { 17908c2ecf20Sopenharmony_ci ret = pagefault_mr(work->frags[i].mr, work->frags[i].io_virt, 17918c2ecf20Sopenharmony_ci work->frags[i].length, &bytes_mapped, 17928c2ecf20Sopenharmony_ci work->pf_flags); 17938c2ecf20Sopenharmony_ci if (ret <= 0) 17948c2ecf20Sopenharmony_ci continue; 17958c2ecf20Sopenharmony_ci mlx5_update_odp_stats(work->frags[i].mr, prefetch, ret); 17968c2ecf20Sopenharmony_ci } 17978c2ecf20Sopenharmony_ci srcu_read_unlock(&dev->odp_srcu, srcu_key); 17988c2ecf20Sopenharmony_ci 17998c2ecf20Sopenharmony_ci destroy_prefetch_work(work); 18008c2ecf20Sopenharmony_ci} 18018c2ecf20Sopenharmony_ci 18028c2ecf20Sopenharmony_cistatic bool init_prefetch_work(struct ib_pd *pd, 18038c2ecf20Sopenharmony_ci enum ib_uverbs_advise_mr_advice advice, 18048c2ecf20Sopenharmony_ci u32 pf_flags, struct prefetch_mr_work *work, 18058c2ecf20Sopenharmony_ci struct ib_sge *sg_list, u32 num_sge) 18068c2ecf20Sopenharmony_ci{ 18078c2ecf20Sopenharmony_ci u32 i; 18088c2ecf20Sopenharmony_ci 18098c2ecf20Sopenharmony_ci INIT_WORK(&work->work, mlx5_ib_prefetch_mr_work); 18108c2ecf20Sopenharmony_ci work->pf_flags = pf_flags; 18118c2ecf20Sopenharmony_ci 18128c2ecf20Sopenharmony_ci for (i = 0; i < num_sge; ++i) { 18138c2ecf20Sopenharmony_ci work->frags[i].io_virt = sg_list[i].addr; 18148c2ecf20Sopenharmony_ci work->frags[i].length = sg_list[i].length; 18158c2ecf20Sopenharmony_ci work->frags[i].mr = 18168c2ecf20Sopenharmony_ci get_prefetchable_mr(pd, advice, sg_list[i].lkey); 18178c2ecf20Sopenharmony_ci if (!work->frags[i].mr) { 18188c2ecf20Sopenharmony_ci work->num_sge = i; 18198c2ecf20Sopenharmony_ci return false; 18208c2ecf20Sopenharmony_ci } 18218c2ecf20Sopenharmony_ci 18228c2ecf20Sopenharmony_ci /* Keep the MR pointer will valid outside the SRCU */ 18238c2ecf20Sopenharmony_ci atomic_inc(&work->frags[i].mr->num_deferred_work); 18248c2ecf20Sopenharmony_ci } 18258c2ecf20Sopenharmony_ci work->num_sge = num_sge; 18268c2ecf20Sopenharmony_ci return true; 18278c2ecf20Sopenharmony_ci} 18288c2ecf20Sopenharmony_ci 18298c2ecf20Sopenharmony_cistatic int mlx5_ib_prefetch_sg_list(struct ib_pd *pd, 18308c2ecf20Sopenharmony_ci enum ib_uverbs_advise_mr_advice advice, 18318c2ecf20Sopenharmony_ci u32 pf_flags, struct ib_sge *sg_list, 18328c2ecf20Sopenharmony_ci u32 num_sge) 18338c2ecf20Sopenharmony_ci{ 18348c2ecf20Sopenharmony_ci struct mlx5_ib_dev *dev = to_mdev(pd->device); 18358c2ecf20Sopenharmony_ci u32 bytes_mapped = 0; 18368c2ecf20Sopenharmony_ci int srcu_key; 18378c2ecf20Sopenharmony_ci int ret = 0; 18388c2ecf20Sopenharmony_ci u32 i; 18398c2ecf20Sopenharmony_ci 18408c2ecf20Sopenharmony_ci srcu_key = srcu_read_lock(&dev->odp_srcu); 18418c2ecf20Sopenharmony_ci for (i = 0; i < num_sge; ++i) { 18428c2ecf20Sopenharmony_ci struct mlx5_ib_mr *mr; 18438c2ecf20Sopenharmony_ci 18448c2ecf20Sopenharmony_ci mr = get_prefetchable_mr(pd, advice, sg_list[i].lkey); 18458c2ecf20Sopenharmony_ci if (!mr) { 18468c2ecf20Sopenharmony_ci ret = -ENOENT; 18478c2ecf20Sopenharmony_ci goto out; 18488c2ecf20Sopenharmony_ci } 18498c2ecf20Sopenharmony_ci ret = pagefault_mr(mr, sg_list[i].addr, sg_list[i].length, 18508c2ecf20Sopenharmony_ci &bytes_mapped, pf_flags); 18518c2ecf20Sopenharmony_ci if (ret < 0) 18528c2ecf20Sopenharmony_ci goto out; 18538c2ecf20Sopenharmony_ci mlx5_update_odp_stats(mr, prefetch, ret); 18548c2ecf20Sopenharmony_ci } 18558c2ecf20Sopenharmony_ci ret = 0; 18568c2ecf20Sopenharmony_ci 18578c2ecf20Sopenharmony_ciout: 18588c2ecf20Sopenharmony_ci srcu_read_unlock(&dev->odp_srcu, srcu_key); 18598c2ecf20Sopenharmony_ci return ret; 18608c2ecf20Sopenharmony_ci} 18618c2ecf20Sopenharmony_ci 18628c2ecf20Sopenharmony_ciint mlx5_ib_advise_mr_prefetch(struct ib_pd *pd, 18638c2ecf20Sopenharmony_ci enum ib_uverbs_advise_mr_advice advice, 18648c2ecf20Sopenharmony_ci u32 flags, struct ib_sge *sg_list, u32 num_sge) 18658c2ecf20Sopenharmony_ci{ 18668c2ecf20Sopenharmony_ci struct mlx5_ib_dev *dev = to_mdev(pd->device); 18678c2ecf20Sopenharmony_ci u32 pf_flags = 0; 18688c2ecf20Sopenharmony_ci struct prefetch_mr_work *work; 18698c2ecf20Sopenharmony_ci int srcu_key; 18708c2ecf20Sopenharmony_ci 18718c2ecf20Sopenharmony_ci if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH) 18728c2ecf20Sopenharmony_ci pf_flags |= MLX5_PF_FLAGS_DOWNGRADE; 18738c2ecf20Sopenharmony_ci 18748c2ecf20Sopenharmony_ci if (advice == IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_NO_FAULT) 18758c2ecf20Sopenharmony_ci pf_flags |= MLX5_PF_FLAGS_SNAPSHOT; 18768c2ecf20Sopenharmony_ci 18778c2ecf20Sopenharmony_ci if (flags & IB_UVERBS_ADVISE_MR_FLAG_FLUSH) 18788c2ecf20Sopenharmony_ci return mlx5_ib_prefetch_sg_list(pd, advice, pf_flags, sg_list, 18798c2ecf20Sopenharmony_ci num_sge); 18808c2ecf20Sopenharmony_ci 18818c2ecf20Sopenharmony_ci work = kvzalloc(struct_size(work, frags, num_sge), GFP_KERNEL); 18828c2ecf20Sopenharmony_ci if (!work) 18838c2ecf20Sopenharmony_ci return -ENOMEM; 18848c2ecf20Sopenharmony_ci 18858c2ecf20Sopenharmony_ci srcu_key = srcu_read_lock(&dev->odp_srcu); 18868c2ecf20Sopenharmony_ci if (!init_prefetch_work(pd, advice, pf_flags, work, sg_list, num_sge)) { 18878c2ecf20Sopenharmony_ci srcu_read_unlock(&dev->odp_srcu, srcu_key); 18888c2ecf20Sopenharmony_ci destroy_prefetch_work(work); 18898c2ecf20Sopenharmony_ci return -EINVAL; 18908c2ecf20Sopenharmony_ci } 18918c2ecf20Sopenharmony_ci queue_work(system_unbound_wq, &work->work); 18928c2ecf20Sopenharmony_ci srcu_read_unlock(&dev->odp_srcu, srcu_key); 18938c2ecf20Sopenharmony_ci return 0; 18948c2ecf20Sopenharmony_ci} 1895