18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright (c) 2016 Oracle. All rights reserved. 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two 58c2ecf20Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the 88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below: 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or 118c2ecf20Sopenharmony_ci * without modification, are permitted provided that the following 128c2ecf20Sopenharmony_ci * conditions are met: 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above 158c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 168c2ecf20Sopenharmony_ci * disclaimer. 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above 198c2ecf20Sopenharmony_ci * copyright notice, this list of conditions and the following 208c2ecf20Sopenharmony_ci * disclaimer in the documentation and/or other materials 218c2ecf20Sopenharmony_ci * provided with the distribution. 228c2ecf20Sopenharmony_ci * 238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 308c2ecf20Sopenharmony_ci * SOFTWARE. 318c2ecf20Sopenharmony_ci */ 328c2ecf20Sopenharmony_ci 338c2ecf20Sopenharmony_ci#include "ib_mr.h" 348c2ecf20Sopenharmony_ci 358c2ecf20Sopenharmony_cistatic inline void 368c2ecf20Sopenharmony_cirds_transition_frwr_state(struct rds_ib_mr *ibmr, 378c2ecf20Sopenharmony_ci enum rds_ib_fr_state old_state, 388c2ecf20Sopenharmony_ci enum rds_ib_fr_state new_state) 398c2ecf20Sopenharmony_ci{ 408c2ecf20Sopenharmony_ci if (cmpxchg(&ibmr->u.frmr.fr_state, 418c2ecf20Sopenharmony_ci old_state, new_state) == old_state && 428c2ecf20Sopenharmony_ci old_state == FRMR_IS_INUSE) { 438c2ecf20Sopenharmony_ci /* enforce order of ibmr->u.frmr.fr_state update 448c2ecf20Sopenharmony_ci * before decrementing i_fastreg_inuse_count 458c2ecf20Sopenharmony_ci */ 468c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 478c2ecf20Sopenharmony_ci atomic_dec(&ibmr->ic->i_fastreg_inuse_count); 488c2ecf20Sopenharmony_ci if (waitqueue_active(&rds_ib_ring_empty_wait)) 498c2ecf20Sopenharmony_ci wake_up(&rds_ib_ring_empty_wait); 508c2ecf20Sopenharmony_ci } 518c2ecf20Sopenharmony_ci} 528c2ecf20Sopenharmony_ci 538c2ecf20Sopenharmony_cistatic struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev, 548c2ecf20Sopenharmony_ci int npages) 558c2ecf20Sopenharmony_ci{ 568c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool; 578c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 588c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr; 598c2ecf20Sopenharmony_ci int err = 0; 608c2ecf20Sopenharmony_ci 618c2ecf20Sopenharmony_ci if (npages <= RDS_MR_8K_MSG_SIZE) 628c2ecf20Sopenharmony_ci pool = rds_ibdev->mr_8k_pool; 638c2ecf20Sopenharmony_ci else 648c2ecf20Sopenharmony_ci pool = rds_ibdev->mr_1m_pool; 658c2ecf20Sopenharmony_ci 668c2ecf20Sopenharmony_ci ibmr = rds_ib_try_reuse_ibmr(pool); 678c2ecf20Sopenharmony_ci if (ibmr) 688c2ecf20Sopenharmony_ci return ibmr; 698c2ecf20Sopenharmony_ci 708c2ecf20Sopenharmony_ci ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL, 718c2ecf20Sopenharmony_ci rdsibdev_to_node(rds_ibdev)); 728c2ecf20Sopenharmony_ci if (!ibmr) { 738c2ecf20Sopenharmony_ci err = -ENOMEM; 748c2ecf20Sopenharmony_ci goto out_no_cigar; 758c2ecf20Sopenharmony_ci } 768c2ecf20Sopenharmony_ci 778c2ecf20Sopenharmony_ci frmr = &ibmr->u.frmr; 788c2ecf20Sopenharmony_ci frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG, 798c2ecf20Sopenharmony_ci pool->max_pages); 808c2ecf20Sopenharmony_ci if (IS_ERR(frmr->mr)) { 818c2ecf20Sopenharmony_ci pr_warn("RDS/IB: %s failed to allocate MR", __func__); 828c2ecf20Sopenharmony_ci err = PTR_ERR(frmr->mr); 838c2ecf20Sopenharmony_ci goto out_no_cigar; 848c2ecf20Sopenharmony_ci } 858c2ecf20Sopenharmony_ci 868c2ecf20Sopenharmony_ci ibmr->pool = pool; 878c2ecf20Sopenharmony_ci if (pool->pool_type == RDS_IB_MR_8K_POOL) 888c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc); 898c2ecf20Sopenharmony_ci else 908c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc); 918c2ecf20Sopenharmony_ci 928c2ecf20Sopenharmony_ci if (atomic_read(&pool->item_count) > pool->max_items_soft) 938c2ecf20Sopenharmony_ci pool->max_items_soft = pool->max_items; 948c2ecf20Sopenharmony_ci 958c2ecf20Sopenharmony_ci frmr->fr_state = FRMR_IS_FREE; 968c2ecf20Sopenharmony_ci init_waitqueue_head(&frmr->fr_inv_done); 978c2ecf20Sopenharmony_ci init_waitqueue_head(&frmr->fr_reg_done); 988c2ecf20Sopenharmony_ci return ibmr; 998c2ecf20Sopenharmony_ci 1008c2ecf20Sopenharmony_ciout_no_cigar: 1018c2ecf20Sopenharmony_ci kfree(ibmr); 1028c2ecf20Sopenharmony_ci atomic_dec(&pool->item_count); 1038c2ecf20Sopenharmony_ci return ERR_PTR(err); 1048c2ecf20Sopenharmony_ci} 1058c2ecf20Sopenharmony_ci 1068c2ecf20Sopenharmony_cistatic void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop) 1078c2ecf20Sopenharmony_ci{ 1088c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool = ibmr->pool; 1098c2ecf20Sopenharmony_ci 1108c2ecf20Sopenharmony_ci if (drop) 1118c2ecf20Sopenharmony_ci llist_add(&ibmr->llnode, &pool->drop_list); 1128c2ecf20Sopenharmony_ci else 1138c2ecf20Sopenharmony_ci llist_add(&ibmr->llnode, &pool->free_list); 1148c2ecf20Sopenharmony_ci atomic_add(ibmr->sg_len, &pool->free_pinned); 1158c2ecf20Sopenharmony_ci atomic_inc(&pool->dirty_count); 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci /* If we've pinned too many pages, request a flush */ 1188c2ecf20Sopenharmony_ci if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned || 1198c2ecf20Sopenharmony_ci atomic_read(&pool->dirty_count) >= pool->max_items / 5) 1208c2ecf20Sopenharmony_ci queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10); 1218c2ecf20Sopenharmony_ci} 1228c2ecf20Sopenharmony_ci 1238c2ecf20Sopenharmony_cistatic int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr) 1248c2ecf20Sopenharmony_ci{ 1258c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr = &ibmr->u.frmr; 1268c2ecf20Sopenharmony_ci struct ib_reg_wr reg_wr; 1278c2ecf20Sopenharmony_ci int ret, off = 0; 1288c2ecf20Sopenharmony_ci 1298c2ecf20Sopenharmony_ci while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { 1308c2ecf20Sopenharmony_ci atomic_inc(&ibmr->ic->i_fastreg_wrs); 1318c2ecf20Sopenharmony_ci cpu_relax(); 1328c2ecf20Sopenharmony_ci } 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len, 1358c2ecf20Sopenharmony_ci &off, PAGE_SIZE); 1368c2ecf20Sopenharmony_ci if (unlikely(ret != ibmr->sg_dma_len)) 1378c2ecf20Sopenharmony_ci return ret < 0 ? ret : -EINVAL; 1388c2ecf20Sopenharmony_ci 1398c2ecf20Sopenharmony_ci if (cmpxchg(&frmr->fr_state, 1408c2ecf20Sopenharmony_ci FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE) 1418c2ecf20Sopenharmony_ci return -EBUSY; 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci atomic_inc(&ibmr->ic->i_fastreg_inuse_count); 1448c2ecf20Sopenharmony_ci 1458c2ecf20Sopenharmony_ci /* Perform a WR for the fast_reg_mr. Each individual page 1468c2ecf20Sopenharmony_ci * in the sg list is added to the fast reg page list and placed 1478c2ecf20Sopenharmony_ci * inside the fast_reg_mr WR. The key used is a rolling 8bit 1488c2ecf20Sopenharmony_ci * counter, which should guarantee uniqueness. 1498c2ecf20Sopenharmony_ci */ 1508c2ecf20Sopenharmony_ci ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++); 1518c2ecf20Sopenharmony_ci frmr->fr_reg = true; 1528c2ecf20Sopenharmony_ci 1538c2ecf20Sopenharmony_ci memset(®_wr, 0, sizeof(reg_wr)); 1548c2ecf20Sopenharmony_ci reg_wr.wr.wr_id = (unsigned long)(void *)ibmr; 1558c2ecf20Sopenharmony_ci reg_wr.wr.opcode = IB_WR_REG_MR; 1568c2ecf20Sopenharmony_ci reg_wr.wr.num_sge = 0; 1578c2ecf20Sopenharmony_ci reg_wr.mr = frmr->mr; 1588c2ecf20Sopenharmony_ci reg_wr.key = frmr->mr->rkey; 1598c2ecf20Sopenharmony_ci reg_wr.access = IB_ACCESS_LOCAL_WRITE | 1608c2ecf20Sopenharmony_ci IB_ACCESS_REMOTE_READ | 1618c2ecf20Sopenharmony_ci IB_ACCESS_REMOTE_WRITE; 1628c2ecf20Sopenharmony_ci reg_wr.wr.send_flags = IB_SEND_SIGNALED; 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci ret = ib_post_send(ibmr->ic->i_cm_id->qp, ®_wr.wr, NULL); 1658c2ecf20Sopenharmony_ci if (unlikely(ret)) { 1668c2ecf20Sopenharmony_ci /* Failure here can be because of -ENOMEM as well */ 1678c2ecf20Sopenharmony_ci rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); 1688c2ecf20Sopenharmony_ci 1698c2ecf20Sopenharmony_ci atomic_inc(&ibmr->ic->i_fastreg_wrs); 1708c2ecf20Sopenharmony_ci if (printk_ratelimit()) 1718c2ecf20Sopenharmony_ci pr_warn("RDS/IB: %s returned error(%d)\n", 1728c2ecf20Sopenharmony_ci __func__, ret); 1738c2ecf20Sopenharmony_ci goto out; 1748c2ecf20Sopenharmony_ci } 1758c2ecf20Sopenharmony_ci 1768c2ecf20Sopenharmony_ci /* Wait for the registration to complete in order to prevent an invalid 1778c2ecf20Sopenharmony_ci * access error resulting from a race between the memory region already 1788c2ecf20Sopenharmony_ci * being accessed while registration is still pending. 1798c2ecf20Sopenharmony_ci */ 1808c2ecf20Sopenharmony_ci wait_event(frmr->fr_reg_done, !frmr->fr_reg); 1818c2ecf20Sopenharmony_ci 1828c2ecf20Sopenharmony_ciout: 1838c2ecf20Sopenharmony_ci 1848c2ecf20Sopenharmony_ci return ret; 1858c2ecf20Sopenharmony_ci} 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_cistatic int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev, 1888c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool, 1898c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr, 1908c2ecf20Sopenharmony_ci struct scatterlist *sg, unsigned int sg_len) 1918c2ecf20Sopenharmony_ci{ 1928c2ecf20Sopenharmony_ci struct ib_device *dev = rds_ibdev->dev; 1938c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr = &ibmr->u.frmr; 1948c2ecf20Sopenharmony_ci int i; 1958c2ecf20Sopenharmony_ci u32 len; 1968c2ecf20Sopenharmony_ci int ret = 0; 1978c2ecf20Sopenharmony_ci 1988c2ecf20Sopenharmony_ci /* We want to teardown old ibmr values here and fill it up with 1998c2ecf20Sopenharmony_ci * new sg values 2008c2ecf20Sopenharmony_ci */ 2018c2ecf20Sopenharmony_ci rds_ib_teardown_mr(ibmr); 2028c2ecf20Sopenharmony_ci 2038c2ecf20Sopenharmony_ci ibmr->sg = sg; 2048c2ecf20Sopenharmony_ci ibmr->sg_len = sg_len; 2058c2ecf20Sopenharmony_ci ibmr->sg_dma_len = 0; 2068c2ecf20Sopenharmony_ci frmr->sg_byte_len = 0; 2078c2ecf20Sopenharmony_ci WARN_ON(ibmr->sg_dma_len); 2088c2ecf20Sopenharmony_ci ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len, 2098c2ecf20Sopenharmony_ci DMA_BIDIRECTIONAL); 2108c2ecf20Sopenharmony_ci if (unlikely(!ibmr->sg_dma_len)) { 2118c2ecf20Sopenharmony_ci pr_warn("RDS/IB: %s failed!\n", __func__); 2128c2ecf20Sopenharmony_ci return -EBUSY; 2138c2ecf20Sopenharmony_ci } 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci frmr->sg_byte_len = 0; 2168c2ecf20Sopenharmony_ci frmr->dma_npages = 0; 2178c2ecf20Sopenharmony_ci len = 0; 2188c2ecf20Sopenharmony_ci 2198c2ecf20Sopenharmony_ci ret = -EINVAL; 2208c2ecf20Sopenharmony_ci for (i = 0; i < ibmr->sg_dma_len; ++i) { 2218c2ecf20Sopenharmony_ci unsigned int dma_len = sg_dma_len(&ibmr->sg[i]); 2228c2ecf20Sopenharmony_ci u64 dma_addr = sg_dma_address(&ibmr->sg[i]); 2238c2ecf20Sopenharmony_ci 2248c2ecf20Sopenharmony_ci frmr->sg_byte_len += dma_len; 2258c2ecf20Sopenharmony_ci if (dma_addr & ~PAGE_MASK) { 2268c2ecf20Sopenharmony_ci if (i > 0) 2278c2ecf20Sopenharmony_ci goto out_unmap; 2288c2ecf20Sopenharmony_ci else 2298c2ecf20Sopenharmony_ci ++frmr->dma_npages; 2308c2ecf20Sopenharmony_ci } 2318c2ecf20Sopenharmony_ci 2328c2ecf20Sopenharmony_ci if ((dma_addr + dma_len) & ~PAGE_MASK) { 2338c2ecf20Sopenharmony_ci if (i < ibmr->sg_dma_len - 1) 2348c2ecf20Sopenharmony_ci goto out_unmap; 2358c2ecf20Sopenharmony_ci else 2368c2ecf20Sopenharmony_ci ++frmr->dma_npages; 2378c2ecf20Sopenharmony_ci } 2388c2ecf20Sopenharmony_ci 2398c2ecf20Sopenharmony_ci len += dma_len; 2408c2ecf20Sopenharmony_ci } 2418c2ecf20Sopenharmony_ci frmr->dma_npages += len >> PAGE_SHIFT; 2428c2ecf20Sopenharmony_ci 2438c2ecf20Sopenharmony_ci if (frmr->dma_npages > ibmr->pool->max_pages) { 2448c2ecf20Sopenharmony_ci ret = -EMSGSIZE; 2458c2ecf20Sopenharmony_ci goto out_unmap; 2468c2ecf20Sopenharmony_ci } 2478c2ecf20Sopenharmony_ci 2488c2ecf20Sopenharmony_ci ret = rds_ib_post_reg_frmr(ibmr); 2498c2ecf20Sopenharmony_ci if (ret) 2508c2ecf20Sopenharmony_ci goto out_unmap; 2518c2ecf20Sopenharmony_ci 2528c2ecf20Sopenharmony_ci if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) 2538c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_used); 2548c2ecf20Sopenharmony_ci else 2558c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_used); 2568c2ecf20Sopenharmony_ci 2578c2ecf20Sopenharmony_ci return ret; 2588c2ecf20Sopenharmony_ci 2598c2ecf20Sopenharmony_ciout_unmap: 2608c2ecf20Sopenharmony_ci ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len, 2618c2ecf20Sopenharmony_ci DMA_BIDIRECTIONAL); 2628c2ecf20Sopenharmony_ci ibmr->sg_dma_len = 0; 2638c2ecf20Sopenharmony_ci return ret; 2648c2ecf20Sopenharmony_ci} 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_cistatic int rds_ib_post_inv(struct rds_ib_mr *ibmr) 2678c2ecf20Sopenharmony_ci{ 2688c2ecf20Sopenharmony_ci struct ib_send_wr *s_wr; 2698c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr = &ibmr->u.frmr; 2708c2ecf20Sopenharmony_ci struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id; 2718c2ecf20Sopenharmony_ci int ret = -EINVAL; 2728c2ecf20Sopenharmony_ci 2738c2ecf20Sopenharmony_ci if (!i_cm_id || !i_cm_id->qp || !frmr->mr) 2748c2ecf20Sopenharmony_ci goto out; 2758c2ecf20Sopenharmony_ci 2768c2ecf20Sopenharmony_ci if (frmr->fr_state != FRMR_IS_INUSE) 2778c2ecf20Sopenharmony_ci goto out; 2788c2ecf20Sopenharmony_ci 2798c2ecf20Sopenharmony_ci while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) { 2808c2ecf20Sopenharmony_ci atomic_inc(&ibmr->ic->i_fastreg_wrs); 2818c2ecf20Sopenharmony_ci cpu_relax(); 2828c2ecf20Sopenharmony_ci } 2838c2ecf20Sopenharmony_ci 2848c2ecf20Sopenharmony_ci frmr->fr_inv = true; 2858c2ecf20Sopenharmony_ci s_wr = &frmr->fr_wr; 2868c2ecf20Sopenharmony_ci 2878c2ecf20Sopenharmony_ci memset(s_wr, 0, sizeof(*s_wr)); 2888c2ecf20Sopenharmony_ci s_wr->wr_id = (unsigned long)(void *)ibmr; 2898c2ecf20Sopenharmony_ci s_wr->opcode = IB_WR_LOCAL_INV; 2908c2ecf20Sopenharmony_ci s_wr->ex.invalidate_rkey = frmr->mr->rkey; 2918c2ecf20Sopenharmony_ci s_wr->send_flags = IB_SEND_SIGNALED; 2928c2ecf20Sopenharmony_ci 2938c2ecf20Sopenharmony_ci ret = ib_post_send(i_cm_id->qp, s_wr, NULL); 2948c2ecf20Sopenharmony_ci if (unlikely(ret)) { 2958c2ecf20Sopenharmony_ci rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); 2968c2ecf20Sopenharmony_ci frmr->fr_inv = false; 2978c2ecf20Sopenharmony_ci /* enforce order of frmr->fr_inv update 2988c2ecf20Sopenharmony_ci * before incrementing i_fastreg_wrs 2998c2ecf20Sopenharmony_ci */ 3008c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 3018c2ecf20Sopenharmony_ci atomic_inc(&ibmr->ic->i_fastreg_wrs); 3028c2ecf20Sopenharmony_ci pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret); 3038c2ecf20Sopenharmony_ci goto out; 3048c2ecf20Sopenharmony_ci } 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci /* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to 3078c2ecf20Sopenharmony_ci * 1) avoid a silly bouncing between "clean_list" and "drop_list" 3088c2ecf20Sopenharmony_ci * triggered by function "rds_ib_reg_frmr" as it is releases frmr 3098c2ecf20Sopenharmony_ci * regions whose state is not "FRMR_IS_FREE" right away. 3108c2ecf20Sopenharmony_ci * 2) prevents an invalid access error in a race 3118c2ecf20Sopenharmony_ci * from a pending "IB_WR_LOCAL_INV" operation 3128c2ecf20Sopenharmony_ci * with a teardown ("dma_unmap_sg", "put_page") 3138c2ecf20Sopenharmony_ci * and de-registration ("ib_dereg_mr") of the corresponding 3148c2ecf20Sopenharmony_ci * memory region. 3158c2ecf20Sopenharmony_ci */ 3168c2ecf20Sopenharmony_ci wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE); 3178c2ecf20Sopenharmony_ci 3188c2ecf20Sopenharmony_ciout: 3198c2ecf20Sopenharmony_ci return ret; 3208c2ecf20Sopenharmony_ci} 3218c2ecf20Sopenharmony_ci 3228c2ecf20Sopenharmony_civoid rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc) 3238c2ecf20Sopenharmony_ci{ 3248c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id; 3258c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr = &ibmr->u.frmr; 3268c2ecf20Sopenharmony_ci 3278c2ecf20Sopenharmony_ci if (wc->status != IB_WC_SUCCESS) { 3288c2ecf20Sopenharmony_ci rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE); 3298c2ecf20Sopenharmony_ci if (rds_conn_up(ic->conn)) 3308c2ecf20Sopenharmony_ci rds_ib_conn_error(ic->conn, 3318c2ecf20Sopenharmony_ci "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n", 3328c2ecf20Sopenharmony_ci &ic->conn->c_laddr, 3338c2ecf20Sopenharmony_ci &ic->conn->c_faddr, 3348c2ecf20Sopenharmony_ci wc->status, 3358c2ecf20Sopenharmony_ci ib_wc_status_msg(wc->status), 3368c2ecf20Sopenharmony_ci wc->vendor_err); 3378c2ecf20Sopenharmony_ci } 3388c2ecf20Sopenharmony_ci 3398c2ecf20Sopenharmony_ci if (frmr->fr_inv) { 3408c2ecf20Sopenharmony_ci rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE); 3418c2ecf20Sopenharmony_ci frmr->fr_inv = false; 3428c2ecf20Sopenharmony_ci wake_up(&frmr->fr_inv_done); 3438c2ecf20Sopenharmony_ci } 3448c2ecf20Sopenharmony_ci 3458c2ecf20Sopenharmony_ci if (frmr->fr_reg) { 3468c2ecf20Sopenharmony_ci frmr->fr_reg = false; 3478c2ecf20Sopenharmony_ci wake_up(&frmr->fr_reg_done); 3488c2ecf20Sopenharmony_ci } 3498c2ecf20Sopenharmony_ci 3508c2ecf20Sopenharmony_ci /* enforce order of frmr->{fr_reg,fr_inv} update 3518c2ecf20Sopenharmony_ci * before incrementing i_fastreg_wrs 3528c2ecf20Sopenharmony_ci */ 3538c2ecf20Sopenharmony_ci smp_mb__before_atomic(); 3548c2ecf20Sopenharmony_ci atomic_inc(&ic->i_fastreg_wrs); 3558c2ecf20Sopenharmony_ci} 3568c2ecf20Sopenharmony_ci 3578c2ecf20Sopenharmony_civoid rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed, 3588c2ecf20Sopenharmony_ci unsigned long *unpinned, unsigned int goal) 3598c2ecf20Sopenharmony_ci{ 3608c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr, *next; 3618c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr; 3628c2ecf20Sopenharmony_ci int ret = 0, ret2; 3638c2ecf20Sopenharmony_ci unsigned int freed = *nfreed; 3648c2ecf20Sopenharmony_ci 3658c2ecf20Sopenharmony_ci /* String all ib_mr's onto one list and hand them to ib_unmap_fmr */ 3668c2ecf20Sopenharmony_ci list_for_each_entry(ibmr, list, unmap_list) { 3678c2ecf20Sopenharmony_ci if (ibmr->sg_dma_len) { 3688c2ecf20Sopenharmony_ci ret2 = rds_ib_post_inv(ibmr); 3698c2ecf20Sopenharmony_ci if (ret2 && !ret) 3708c2ecf20Sopenharmony_ci ret = ret2; 3718c2ecf20Sopenharmony_ci } 3728c2ecf20Sopenharmony_ci } 3738c2ecf20Sopenharmony_ci 3748c2ecf20Sopenharmony_ci if (ret) 3758c2ecf20Sopenharmony_ci pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret); 3768c2ecf20Sopenharmony_ci 3778c2ecf20Sopenharmony_ci /* Now we can destroy the DMA mapping and unpin any pages */ 3788c2ecf20Sopenharmony_ci list_for_each_entry_safe(ibmr, next, list, unmap_list) { 3798c2ecf20Sopenharmony_ci *unpinned += ibmr->sg_len; 3808c2ecf20Sopenharmony_ci frmr = &ibmr->u.frmr; 3818c2ecf20Sopenharmony_ci __rds_ib_teardown_mr(ibmr); 3828c2ecf20Sopenharmony_ci if (freed < goal || frmr->fr_state == FRMR_IS_STALE) { 3838c2ecf20Sopenharmony_ci /* Don't de-allocate if the MR is not free yet */ 3848c2ecf20Sopenharmony_ci if (frmr->fr_state == FRMR_IS_INUSE) 3858c2ecf20Sopenharmony_ci continue; 3868c2ecf20Sopenharmony_ci 3878c2ecf20Sopenharmony_ci if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL) 3888c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_8k_free); 3898c2ecf20Sopenharmony_ci else 3908c2ecf20Sopenharmony_ci rds_ib_stats_inc(s_ib_rdma_mr_1m_free); 3918c2ecf20Sopenharmony_ci list_del(&ibmr->unmap_list); 3928c2ecf20Sopenharmony_ci if (frmr->mr) 3938c2ecf20Sopenharmony_ci ib_dereg_mr(frmr->mr); 3948c2ecf20Sopenharmony_ci kfree(ibmr); 3958c2ecf20Sopenharmony_ci freed++; 3968c2ecf20Sopenharmony_ci } 3978c2ecf20Sopenharmony_ci } 3988c2ecf20Sopenharmony_ci *nfreed = freed; 3998c2ecf20Sopenharmony_ci} 4008c2ecf20Sopenharmony_ci 4018c2ecf20Sopenharmony_cistruct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev, 4028c2ecf20Sopenharmony_ci struct rds_ib_connection *ic, 4038c2ecf20Sopenharmony_ci struct scatterlist *sg, 4048c2ecf20Sopenharmony_ci unsigned long nents, u32 *key) 4058c2ecf20Sopenharmony_ci{ 4068c2ecf20Sopenharmony_ci struct rds_ib_mr *ibmr = NULL; 4078c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr; 4088c2ecf20Sopenharmony_ci int ret; 4098c2ecf20Sopenharmony_ci 4108c2ecf20Sopenharmony_ci if (!ic) { 4118c2ecf20Sopenharmony_ci /* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/ 4128c2ecf20Sopenharmony_ci return ERR_PTR(-EOPNOTSUPP); 4138c2ecf20Sopenharmony_ci } 4148c2ecf20Sopenharmony_ci 4158c2ecf20Sopenharmony_ci do { 4168c2ecf20Sopenharmony_ci if (ibmr) 4178c2ecf20Sopenharmony_ci rds_ib_free_frmr(ibmr, true); 4188c2ecf20Sopenharmony_ci ibmr = rds_ib_alloc_frmr(rds_ibdev, nents); 4198c2ecf20Sopenharmony_ci if (IS_ERR(ibmr)) 4208c2ecf20Sopenharmony_ci return ibmr; 4218c2ecf20Sopenharmony_ci frmr = &ibmr->u.frmr; 4228c2ecf20Sopenharmony_ci } while (frmr->fr_state != FRMR_IS_FREE); 4238c2ecf20Sopenharmony_ci 4248c2ecf20Sopenharmony_ci ibmr->ic = ic; 4258c2ecf20Sopenharmony_ci ibmr->device = rds_ibdev; 4268c2ecf20Sopenharmony_ci ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents); 4278c2ecf20Sopenharmony_ci if (ret == 0) { 4288c2ecf20Sopenharmony_ci *key = frmr->mr->rkey; 4298c2ecf20Sopenharmony_ci } else { 4308c2ecf20Sopenharmony_ci rds_ib_free_frmr(ibmr, false); 4318c2ecf20Sopenharmony_ci ibmr = ERR_PTR(ret); 4328c2ecf20Sopenharmony_ci } 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci return ibmr; 4358c2ecf20Sopenharmony_ci} 4368c2ecf20Sopenharmony_ci 4378c2ecf20Sopenharmony_civoid rds_ib_free_frmr_list(struct rds_ib_mr *ibmr) 4388c2ecf20Sopenharmony_ci{ 4398c2ecf20Sopenharmony_ci struct rds_ib_mr_pool *pool = ibmr->pool; 4408c2ecf20Sopenharmony_ci struct rds_ib_frmr *frmr = &ibmr->u.frmr; 4418c2ecf20Sopenharmony_ci 4428c2ecf20Sopenharmony_ci if (frmr->fr_state == FRMR_IS_STALE) 4438c2ecf20Sopenharmony_ci llist_add(&ibmr->llnode, &pool->drop_list); 4448c2ecf20Sopenharmony_ci else 4458c2ecf20Sopenharmony_ci llist_add(&ibmr->llnode, &pool->free_list); 4468c2ecf20Sopenharmony_ci} 447