18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (c) 2016 Oracle.  All rights reserved.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two
58c2ecf20Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
68c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
78c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the
88c2ecf20Sopenharmony_ci * OpenIB.org BSD license below:
98c2ecf20Sopenharmony_ci *
108c2ecf20Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
118c2ecf20Sopenharmony_ci *     without modification, are permitted provided that the following
128c2ecf20Sopenharmony_ci *     conditions are met:
138c2ecf20Sopenharmony_ci *
148c2ecf20Sopenharmony_ci *      - Redistributions of source code must retain the above
158c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
168c2ecf20Sopenharmony_ci *        disclaimer.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
198c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
208c2ecf20Sopenharmony_ci *        disclaimer in the documentation and/or other materials
218c2ecf20Sopenharmony_ci *        provided with the distribution.
228c2ecf20Sopenharmony_ci *
238c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
248c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
258c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
268c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
278c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
288c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
298c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
308c2ecf20Sopenharmony_ci * SOFTWARE.
318c2ecf20Sopenharmony_ci */
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci#include "ib_mr.h"
348c2ecf20Sopenharmony_ci
358c2ecf20Sopenharmony_cistatic inline void
368c2ecf20Sopenharmony_cirds_transition_frwr_state(struct rds_ib_mr *ibmr,
378c2ecf20Sopenharmony_ci			  enum rds_ib_fr_state old_state,
388c2ecf20Sopenharmony_ci			  enum rds_ib_fr_state new_state)
398c2ecf20Sopenharmony_ci{
408c2ecf20Sopenharmony_ci	if (cmpxchg(&ibmr->u.frmr.fr_state,
418c2ecf20Sopenharmony_ci		    old_state, new_state) == old_state &&
428c2ecf20Sopenharmony_ci	    old_state == FRMR_IS_INUSE) {
438c2ecf20Sopenharmony_ci		/* enforce order of ibmr->u.frmr.fr_state update
448c2ecf20Sopenharmony_ci		 * before decrementing i_fastreg_inuse_count
458c2ecf20Sopenharmony_ci		 */
468c2ecf20Sopenharmony_ci		smp_mb__before_atomic();
478c2ecf20Sopenharmony_ci		atomic_dec(&ibmr->ic->i_fastreg_inuse_count);
488c2ecf20Sopenharmony_ci		if (waitqueue_active(&rds_ib_ring_empty_wait))
498c2ecf20Sopenharmony_ci			wake_up(&rds_ib_ring_empty_wait);
508c2ecf20Sopenharmony_ci	}
518c2ecf20Sopenharmony_ci}
528c2ecf20Sopenharmony_ci
538c2ecf20Sopenharmony_cistatic struct rds_ib_mr *rds_ib_alloc_frmr(struct rds_ib_device *rds_ibdev,
548c2ecf20Sopenharmony_ci					   int npages)
558c2ecf20Sopenharmony_ci{
568c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool;
578c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = NULL;
588c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr;
598c2ecf20Sopenharmony_ci	int err = 0;
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_ci	if (npages <= RDS_MR_8K_MSG_SIZE)
628c2ecf20Sopenharmony_ci		pool = rds_ibdev->mr_8k_pool;
638c2ecf20Sopenharmony_ci	else
648c2ecf20Sopenharmony_ci		pool = rds_ibdev->mr_1m_pool;
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_ci	ibmr = rds_ib_try_reuse_ibmr(pool);
678c2ecf20Sopenharmony_ci	if (ibmr)
688c2ecf20Sopenharmony_ci		return ibmr;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	ibmr = kzalloc_node(sizeof(*ibmr), GFP_KERNEL,
718c2ecf20Sopenharmony_ci			    rdsibdev_to_node(rds_ibdev));
728c2ecf20Sopenharmony_ci	if (!ibmr) {
738c2ecf20Sopenharmony_ci		err = -ENOMEM;
748c2ecf20Sopenharmony_ci		goto out_no_cigar;
758c2ecf20Sopenharmony_ci	}
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	frmr = &ibmr->u.frmr;
788c2ecf20Sopenharmony_ci	frmr->mr = ib_alloc_mr(rds_ibdev->pd, IB_MR_TYPE_MEM_REG,
798c2ecf20Sopenharmony_ci			 pool->max_pages);
808c2ecf20Sopenharmony_ci	if (IS_ERR(frmr->mr)) {
818c2ecf20Sopenharmony_ci		pr_warn("RDS/IB: %s failed to allocate MR", __func__);
828c2ecf20Sopenharmony_ci		err = PTR_ERR(frmr->mr);
838c2ecf20Sopenharmony_ci		goto out_no_cigar;
848c2ecf20Sopenharmony_ci	}
858c2ecf20Sopenharmony_ci
868c2ecf20Sopenharmony_ci	ibmr->pool = pool;
878c2ecf20Sopenharmony_ci	if (pool->pool_type == RDS_IB_MR_8K_POOL)
888c2ecf20Sopenharmony_ci		rds_ib_stats_inc(s_ib_rdma_mr_8k_alloc);
898c2ecf20Sopenharmony_ci	else
908c2ecf20Sopenharmony_ci		rds_ib_stats_inc(s_ib_rdma_mr_1m_alloc);
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci	if (atomic_read(&pool->item_count) > pool->max_items_soft)
938c2ecf20Sopenharmony_ci		pool->max_items_soft = pool->max_items;
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	frmr->fr_state = FRMR_IS_FREE;
968c2ecf20Sopenharmony_ci	init_waitqueue_head(&frmr->fr_inv_done);
978c2ecf20Sopenharmony_ci	init_waitqueue_head(&frmr->fr_reg_done);
988c2ecf20Sopenharmony_ci	return ibmr;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ciout_no_cigar:
1018c2ecf20Sopenharmony_ci	kfree(ibmr);
1028c2ecf20Sopenharmony_ci	atomic_dec(&pool->item_count);
1038c2ecf20Sopenharmony_ci	return ERR_PTR(err);
1048c2ecf20Sopenharmony_ci}
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_cistatic void rds_ib_free_frmr(struct rds_ib_mr *ibmr, bool drop)
1078c2ecf20Sopenharmony_ci{
1088c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool = ibmr->pool;
1098c2ecf20Sopenharmony_ci
1108c2ecf20Sopenharmony_ci	if (drop)
1118c2ecf20Sopenharmony_ci		llist_add(&ibmr->llnode, &pool->drop_list);
1128c2ecf20Sopenharmony_ci	else
1138c2ecf20Sopenharmony_ci		llist_add(&ibmr->llnode, &pool->free_list);
1148c2ecf20Sopenharmony_ci	atomic_add(ibmr->sg_len, &pool->free_pinned);
1158c2ecf20Sopenharmony_ci	atomic_inc(&pool->dirty_count);
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	/* If we've pinned too many pages, request a flush */
1188c2ecf20Sopenharmony_ci	if (atomic_read(&pool->free_pinned) >= pool->max_free_pinned ||
1198c2ecf20Sopenharmony_ci	    atomic_read(&pool->dirty_count) >= pool->max_items / 5)
1208c2ecf20Sopenharmony_ci		queue_delayed_work(rds_ib_mr_wq, &pool->flush_worker, 10);
1218c2ecf20Sopenharmony_ci}
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_cistatic int rds_ib_post_reg_frmr(struct rds_ib_mr *ibmr)
1248c2ecf20Sopenharmony_ci{
1258c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
1268c2ecf20Sopenharmony_ci	struct ib_reg_wr reg_wr;
1278c2ecf20Sopenharmony_ci	int ret, off = 0;
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ci	while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
1308c2ecf20Sopenharmony_ci		atomic_inc(&ibmr->ic->i_fastreg_wrs);
1318c2ecf20Sopenharmony_ci		cpu_relax();
1328c2ecf20Sopenharmony_ci	}
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ci	ret = ib_map_mr_sg_zbva(frmr->mr, ibmr->sg, ibmr->sg_dma_len,
1358c2ecf20Sopenharmony_ci				&off, PAGE_SIZE);
1368c2ecf20Sopenharmony_ci	if (unlikely(ret != ibmr->sg_dma_len))
1378c2ecf20Sopenharmony_ci		return ret < 0 ? ret : -EINVAL;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci	if (cmpxchg(&frmr->fr_state,
1408c2ecf20Sopenharmony_ci		    FRMR_IS_FREE, FRMR_IS_INUSE) != FRMR_IS_FREE)
1418c2ecf20Sopenharmony_ci		return -EBUSY;
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	atomic_inc(&ibmr->ic->i_fastreg_inuse_count);
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	/* Perform a WR for the fast_reg_mr. Each individual page
1468c2ecf20Sopenharmony_ci	 * in the sg list is added to the fast reg page list and placed
1478c2ecf20Sopenharmony_ci	 * inside the fast_reg_mr WR.  The key used is a rolling 8bit
1488c2ecf20Sopenharmony_ci	 * counter, which should guarantee uniqueness.
1498c2ecf20Sopenharmony_ci	 */
1508c2ecf20Sopenharmony_ci	ib_update_fast_reg_key(frmr->mr, ibmr->remap_count++);
1518c2ecf20Sopenharmony_ci	frmr->fr_reg = true;
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	memset(&reg_wr, 0, sizeof(reg_wr));
1548c2ecf20Sopenharmony_ci	reg_wr.wr.wr_id = (unsigned long)(void *)ibmr;
1558c2ecf20Sopenharmony_ci	reg_wr.wr.opcode = IB_WR_REG_MR;
1568c2ecf20Sopenharmony_ci	reg_wr.wr.num_sge = 0;
1578c2ecf20Sopenharmony_ci	reg_wr.mr = frmr->mr;
1588c2ecf20Sopenharmony_ci	reg_wr.key = frmr->mr->rkey;
1598c2ecf20Sopenharmony_ci	reg_wr.access = IB_ACCESS_LOCAL_WRITE |
1608c2ecf20Sopenharmony_ci			IB_ACCESS_REMOTE_READ |
1618c2ecf20Sopenharmony_ci			IB_ACCESS_REMOTE_WRITE;
1628c2ecf20Sopenharmony_ci	reg_wr.wr.send_flags = IB_SEND_SIGNALED;
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	ret = ib_post_send(ibmr->ic->i_cm_id->qp, &reg_wr.wr, NULL);
1658c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
1668c2ecf20Sopenharmony_ci		/* Failure here can be because of -ENOMEM as well */
1678c2ecf20Sopenharmony_ci		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
1688c2ecf20Sopenharmony_ci
1698c2ecf20Sopenharmony_ci		atomic_inc(&ibmr->ic->i_fastreg_wrs);
1708c2ecf20Sopenharmony_ci		if (printk_ratelimit())
1718c2ecf20Sopenharmony_ci			pr_warn("RDS/IB: %s returned error(%d)\n",
1728c2ecf20Sopenharmony_ci				__func__, ret);
1738c2ecf20Sopenharmony_ci		goto out;
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci
1768c2ecf20Sopenharmony_ci	/* Wait for the registration to complete in order to prevent an invalid
1778c2ecf20Sopenharmony_ci	 * access error resulting from a race between the memory region already
1788c2ecf20Sopenharmony_ci	 * being accessed while registration is still pending.
1798c2ecf20Sopenharmony_ci	 */
1808c2ecf20Sopenharmony_ci	wait_event(frmr->fr_reg_done, !frmr->fr_reg);
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ciout:
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	return ret;
1858c2ecf20Sopenharmony_ci}
1868c2ecf20Sopenharmony_ci
1878c2ecf20Sopenharmony_cistatic int rds_ib_map_frmr(struct rds_ib_device *rds_ibdev,
1888c2ecf20Sopenharmony_ci			   struct rds_ib_mr_pool *pool,
1898c2ecf20Sopenharmony_ci			   struct rds_ib_mr *ibmr,
1908c2ecf20Sopenharmony_ci			   struct scatterlist *sg, unsigned int sg_len)
1918c2ecf20Sopenharmony_ci{
1928c2ecf20Sopenharmony_ci	struct ib_device *dev = rds_ibdev->dev;
1938c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
1948c2ecf20Sopenharmony_ci	int i;
1958c2ecf20Sopenharmony_ci	u32 len;
1968c2ecf20Sopenharmony_ci	int ret = 0;
1978c2ecf20Sopenharmony_ci
1988c2ecf20Sopenharmony_ci	/* We want to teardown old ibmr values here and fill it up with
1998c2ecf20Sopenharmony_ci	 * new sg values
2008c2ecf20Sopenharmony_ci	 */
2018c2ecf20Sopenharmony_ci	rds_ib_teardown_mr(ibmr);
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	ibmr->sg = sg;
2048c2ecf20Sopenharmony_ci	ibmr->sg_len = sg_len;
2058c2ecf20Sopenharmony_ci	ibmr->sg_dma_len = 0;
2068c2ecf20Sopenharmony_ci	frmr->sg_byte_len = 0;
2078c2ecf20Sopenharmony_ci	WARN_ON(ibmr->sg_dma_len);
2088c2ecf20Sopenharmony_ci	ibmr->sg_dma_len = ib_dma_map_sg(dev, ibmr->sg, ibmr->sg_len,
2098c2ecf20Sopenharmony_ci					 DMA_BIDIRECTIONAL);
2108c2ecf20Sopenharmony_ci	if (unlikely(!ibmr->sg_dma_len)) {
2118c2ecf20Sopenharmony_ci		pr_warn("RDS/IB: %s failed!\n", __func__);
2128c2ecf20Sopenharmony_ci		return -EBUSY;
2138c2ecf20Sopenharmony_ci	}
2148c2ecf20Sopenharmony_ci
2158c2ecf20Sopenharmony_ci	frmr->sg_byte_len = 0;
2168c2ecf20Sopenharmony_ci	frmr->dma_npages = 0;
2178c2ecf20Sopenharmony_ci	len = 0;
2188c2ecf20Sopenharmony_ci
2198c2ecf20Sopenharmony_ci	ret = -EINVAL;
2208c2ecf20Sopenharmony_ci	for (i = 0; i < ibmr->sg_dma_len; ++i) {
2218c2ecf20Sopenharmony_ci		unsigned int dma_len = sg_dma_len(&ibmr->sg[i]);
2228c2ecf20Sopenharmony_ci		u64 dma_addr = sg_dma_address(&ibmr->sg[i]);
2238c2ecf20Sopenharmony_ci
2248c2ecf20Sopenharmony_ci		frmr->sg_byte_len += dma_len;
2258c2ecf20Sopenharmony_ci		if (dma_addr & ~PAGE_MASK) {
2268c2ecf20Sopenharmony_ci			if (i > 0)
2278c2ecf20Sopenharmony_ci				goto out_unmap;
2288c2ecf20Sopenharmony_ci			else
2298c2ecf20Sopenharmony_ci				++frmr->dma_npages;
2308c2ecf20Sopenharmony_ci		}
2318c2ecf20Sopenharmony_ci
2328c2ecf20Sopenharmony_ci		if ((dma_addr + dma_len) & ~PAGE_MASK) {
2338c2ecf20Sopenharmony_ci			if (i < ibmr->sg_dma_len - 1)
2348c2ecf20Sopenharmony_ci				goto out_unmap;
2358c2ecf20Sopenharmony_ci			else
2368c2ecf20Sopenharmony_ci				++frmr->dma_npages;
2378c2ecf20Sopenharmony_ci		}
2388c2ecf20Sopenharmony_ci
2398c2ecf20Sopenharmony_ci		len += dma_len;
2408c2ecf20Sopenharmony_ci	}
2418c2ecf20Sopenharmony_ci	frmr->dma_npages += len >> PAGE_SHIFT;
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci	if (frmr->dma_npages > ibmr->pool->max_pages) {
2448c2ecf20Sopenharmony_ci		ret = -EMSGSIZE;
2458c2ecf20Sopenharmony_ci		goto out_unmap;
2468c2ecf20Sopenharmony_ci	}
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	ret = rds_ib_post_reg_frmr(ibmr);
2498c2ecf20Sopenharmony_ci	if (ret)
2508c2ecf20Sopenharmony_ci		goto out_unmap;
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
2538c2ecf20Sopenharmony_ci		rds_ib_stats_inc(s_ib_rdma_mr_8k_used);
2548c2ecf20Sopenharmony_ci	else
2558c2ecf20Sopenharmony_ci		rds_ib_stats_inc(s_ib_rdma_mr_1m_used);
2568c2ecf20Sopenharmony_ci
2578c2ecf20Sopenharmony_ci	return ret;
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_ciout_unmap:
2608c2ecf20Sopenharmony_ci	ib_dma_unmap_sg(rds_ibdev->dev, ibmr->sg, ibmr->sg_len,
2618c2ecf20Sopenharmony_ci			DMA_BIDIRECTIONAL);
2628c2ecf20Sopenharmony_ci	ibmr->sg_dma_len = 0;
2638c2ecf20Sopenharmony_ci	return ret;
2648c2ecf20Sopenharmony_ci}
2658c2ecf20Sopenharmony_ci
2668c2ecf20Sopenharmony_cistatic int rds_ib_post_inv(struct rds_ib_mr *ibmr)
2678c2ecf20Sopenharmony_ci{
2688c2ecf20Sopenharmony_ci	struct ib_send_wr *s_wr;
2698c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
2708c2ecf20Sopenharmony_ci	struct rdma_cm_id *i_cm_id = ibmr->ic->i_cm_id;
2718c2ecf20Sopenharmony_ci	int ret = -EINVAL;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	if (!i_cm_id || !i_cm_id->qp || !frmr->mr)
2748c2ecf20Sopenharmony_ci		goto out;
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	if (frmr->fr_state != FRMR_IS_INUSE)
2778c2ecf20Sopenharmony_ci		goto out;
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	while (atomic_dec_return(&ibmr->ic->i_fastreg_wrs) <= 0) {
2808c2ecf20Sopenharmony_ci		atomic_inc(&ibmr->ic->i_fastreg_wrs);
2818c2ecf20Sopenharmony_ci		cpu_relax();
2828c2ecf20Sopenharmony_ci	}
2838c2ecf20Sopenharmony_ci
2848c2ecf20Sopenharmony_ci	frmr->fr_inv = true;
2858c2ecf20Sopenharmony_ci	s_wr = &frmr->fr_wr;
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	memset(s_wr, 0, sizeof(*s_wr));
2888c2ecf20Sopenharmony_ci	s_wr->wr_id = (unsigned long)(void *)ibmr;
2898c2ecf20Sopenharmony_ci	s_wr->opcode = IB_WR_LOCAL_INV;
2908c2ecf20Sopenharmony_ci	s_wr->ex.invalidate_rkey = frmr->mr->rkey;
2918c2ecf20Sopenharmony_ci	s_wr->send_flags = IB_SEND_SIGNALED;
2928c2ecf20Sopenharmony_ci
2938c2ecf20Sopenharmony_ci	ret = ib_post_send(i_cm_id->qp, s_wr, NULL);
2948c2ecf20Sopenharmony_ci	if (unlikely(ret)) {
2958c2ecf20Sopenharmony_ci		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
2968c2ecf20Sopenharmony_ci		frmr->fr_inv = false;
2978c2ecf20Sopenharmony_ci		/* enforce order of frmr->fr_inv update
2988c2ecf20Sopenharmony_ci		 * before incrementing i_fastreg_wrs
2998c2ecf20Sopenharmony_ci		 */
3008c2ecf20Sopenharmony_ci		smp_mb__before_atomic();
3018c2ecf20Sopenharmony_ci		atomic_inc(&ibmr->ic->i_fastreg_wrs);
3028c2ecf20Sopenharmony_ci		pr_err("RDS/IB: %s returned error(%d)\n", __func__, ret);
3038c2ecf20Sopenharmony_ci		goto out;
3048c2ecf20Sopenharmony_ci	}
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci	/* Wait for the FRMR_IS_FREE (or FRMR_IS_STALE) transition in order to
3078c2ecf20Sopenharmony_ci	 * 1) avoid a silly bouncing between "clean_list" and "drop_list"
3088c2ecf20Sopenharmony_ci	 *    triggered by function "rds_ib_reg_frmr" as it is releases frmr
3098c2ecf20Sopenharmony_ci	 *    regions whose state is not "FRMR_IS_FREE" right away.
3108c2ecf20Sopenharmony_ci	 * 2) prevents an invalid access error in a race
3118c2ecf20Sopenharmony_ci	 *    from a pending "IB_WR_LOCAL_INV" operation
3128c2ecf20Sopenharmony_ci	 *    with a teardown ("dma_unmap_sg", "put_page")
3138c2ecf20Sopenharmony_ci	 *    and de-registration ("ib_dereg_mr") of the corresponding
3148c2ecf20Sopenharmony_ci	 *    memory region.
3158c2ecf20Sopenharmony_ci	 */
3168c2ecf20Sopenharmony_ci	wait_event(frmr->fr_inv_done, frmr->fr_state != FRMR_IS_INUSE);
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ciout:
3198c2ecf20Sopenharmony_ci	return ret;
3208c2ecf20Sopenharmony_ci}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_civoid rds_ib_mr_cqe_handler(struct rds_ib_connection *ic, struct ib_wc *wc)
3238c2ecf20Sopenharmony_ci{
3248c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = (void *)(unsigned long)wc->wr_id;
3258c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
3268c2ecf20Sopenharmony_ci
3278c2ecf20Sopenharmony_ci	if (wc->status != IB_WC_SUCCESS) {
3288c2ecf20Sopenharmony_ci		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_STALE);
3298c2ecf20Sopenharmony_ci		if (rds_conn_up(ic->conn))
3308c2ecf20Sopenharmony_ci			rds_ib_conn_error(ic->conn,
3318c2ecf20Sopenharmony_ci					  "frmr completion <%pI4,%pI4> status %u(%s), vendor_err 0x%x, disconnecting and reconnecting\n",
3328c2ecf20Sopenharmony_ci					  &ic->conn->c_laddr,
3338c2ecf20Sopenharmony_ci					  &ic->conn->c_faddr,
3348c2ecf20Sopenharmony_ci					  wc->status,
3358c2ecf20Sopenharmony_ci					  ib_wc_status_msg(wc->status),
3368c2ecf20Sopenharmony_ci					  wc->vendor_err);
3378c2ecf20Sopenharmony_ci	}
3388c2ecf20Sopenharmony_ci
3398c2ecf20Sopenharmony_ci	if (frmr->fr_inv) {
3408c2ecf20Sopenharmony_ci		rds_transition_frwr_state(ibmr, FRMR_IS_INUSE, FRMR_IS_FREE);
3418c2ecf20Sopenharmony_ci		frmr->fr_inv = false;
3428c2ecf20Sopenharmony_ci		wake_up(&frmr->fr_inv_done);
3438c2ecf20Sopenharmony_ci	}
3448c2ecf20Sopenharmony_ci
3458c2ecf20Sopenharmony_ci	if (frmr->fr_reg) {
3468c2ecf20Sopenharmony_ci		frmr->fr_reg = false;
3478c2ecf20Sopenharmony_ci		wake_up(&frmr->fr_reg_done);
3488c2ecf20Sopenharmony_ci	}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_ci	/* enforce order of frmr->{fr_reg,fr_inv} update
3518c2ecf20Sopenharmony_ci	 * before incrementing i_fastreg_wrs
3528c2ecf20Sopenharmony_ci	 */
3538c2ecf20Sopenharmony_ci	smp_mb__before_atomic();
3548c2ecf20Sopenharmony_ci	atomic_inc(&ic->i_fastreg_wrs);
3558c2ecf20Sopenharmony_ci}
3568c2ecf20Sopenharmony_ci
3578c2ecf20Sopenharmony_civoid rds_ib_unreg_frmr(struct list_head *list, unsigned int *nfreed,
3588c2ecf20Sopenharmony_ci		       unsigned long *unpinned, unsigned int goal)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr, *next;
3618c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr;
3628c2ecf20Sopenharmony_ci	int ret = 0, ret2;
3638c2ecf20Sopenharmony_ci	unsigned int freed = *nfreed;
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci	/* String all ib_mr's onto one list and hand them to ib_unmap_fmr */
3668c2ecf20Sopenharmony_ci	list_for_each_entry(ibmr, list, unmap_list) {
3678c2ecf20Sopenharmony_ci		if (ibmr->sg_dma_len) {
3688c2ecf20Sopenharmony_ci			ret2 = rds_ib_post_inv(ibmr);
3698c2ecf20Sopenharmony_ci			if (ret2 && !ret)
3708c2ecf20Sopenharmony_ci				ret = ret2;
3718c2ecf20Sopenharmony_ci		}
3728c2ecf20Sopenharmony_ci	}
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	if (ret)
3758c2ecf20Sopenharmony_ci		pr_warn("RDS/IB: %s failed (err=%d)\n", __func__, ret);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	/* Now we can destroy the DMA mapping and unpin any pages */
3788c2ecf20Sopenharmony_ci	list_for_each_entry_safe(ibmr, next, list, unmap_list) {
3798c2ecf20Sopenharmony_ci		*unpinned += ibmr->sg_len;
3808c2ecf20Sopenharmony_ci		frmr = &ibmr->u.frmr;
3818c2ecf20Sopenharmony_ci		__rds_ib_teardown_mr(ibmr);
3828c2ecf20Sopenharmony_ci		if (freed < goal || frmr->fr_state == FRMR_IS_STALE) {
3838c2ecf20Sopenharmony_ci			/* Don't de-allocate if the MR is not free yet */
3848c2ecf20Sopenharmony_ci			if (frmr->fr_state == FRMR_IS_INUSE)
3858c2ecf20Sopenharmony_ci				continue;
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci			if (ibmr->pool->pool_type == RDS_IB_MR_8K_POOL)
3888c2ecf20Sopenharmony_ci				rds_ib_stats_inc(s_ib_rdma_mr_8k_free);
3898c2ecf20Sopenharmony_ci			else
3908c2ecf20Sopenharmony_ci				rds_ib_stats_inc(s_ib_rdma_mr_1m_free);
3918c2ecf20Sopenharmony_ci			list_del(&ibmr->unmap_list);
3928c2ecf20Sopenharmony_ci			if (frmr->mr)
3938c2ecf20Sopenharmony_ci				ib_dereg_mr(frmr->mr);
3948c2ecf20Sopenharmony_ci			kfree(ibmr);
3958c2ecf20Sopenharmony_ci			freed++;
3968c2ecf20Sopenharmony_ci		}
3978c2ecf20Sopenharmony_ci	}
3988c2ecf20Sopenharmony_ci	*nfreed = freed;
3998c2ecf20Sopenharmony_ci}
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_cistruct rds_ib_mr *rds_ib_reg_frmr(struct rds_ib_device *rds_ibdev,
4028c2ecf20Sopenharmony_ci				  struct rds_ib_connection *ic,
4038c2ecf20Sopenharmony_ci				  struct scatterlist *sg,
4048c2ecf20Sopenharmony_ci				  unsigned long nents, u32 *key)
4058c2ecf20Sopenharmony_ci{
4068c2ecf20Sopenharmony_ci	struct rds_ib_mr *ibmr = NULL;
4078c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr;
4088c2ecf20Sopenharmony_ci	int ret;
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	if (!ic) {
4118c2ecf20Sopenharmony_ci		/* TODO: Add FRWR support for RDS_GET_MR using proxy qp*/
4128c2ecf20Sopenharmony_ci		return ERR_PTR(-EOPNOTSUPP);
4138c2ecf20Sopenharmony_ci	}
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci	do {
4168c2ecf20Sopenharmony_ci		if (ibmr)
4178c2ecf20Sopenharmony_ci			rds_ib_free_frmr(ibmr, true);
4188c2ecf20Sopenharmony_ci		ibmr = rds_ib_alloc_frmr(rds_ibdev, nents);
4198c2ecf20Sopenharmony_ci		if (IS_ERR(ibmr))
4208c2ecf20Sopenharmony_ci			return ibmr;
4218c2ecf20Sopenharmony_ci		frmr = &ibmr->u.frmr;
4228c2ecf20Sopenharmony_ci	} while (frmr->fr_state != FRMR_IS_FREE);
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	ibmr->ic = ic;
4258c2ecf20Sopenharmony_ci	ibmr->device = rds_ibdev;
4268c2ecf20Sopenharmony_ci	ret = rds_ib_map_frmr(rds_ibdev, ibmr->pool, ibmr, sg, nents);
4278c2ecf20Sopenharmony_ci	if (ret == 0) {
4288c2ecf20Sopenharmony_ci		*key = frmr->mr->rkey;
4298c2ecf20Sopenharmony_ci	} else {
4308c2ecf20Sopenharmony_ci		rds_ib_free_frmr(ibmr, false);
4318c2ecf20Sopenharmony_ci		ibmr = ERR_PTR(ret);
4328c2ecf20Sopenharmony_ci	}
4338c2ecf20Sopenharmony_ci
4348c2ecf20Sopenharmony_ci	return ibmr;
4358c2ecf20Sopenharmony_ci}
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_civoid rds_ib_free_frmr_list(struct rds_ib_mr *ibmr)
4388c2ecf20Sopenharmony_ci{
4398c2ecf20Sopenharmony_ci	struct rds_ib_mr_pool *pool = ibmr->pool;
4408c2ecf20Sopenharmony_ci	struct rds_ib_frmr *frmr = &ibmr->u.frmr;
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ci	if (frmr->fr_state == FRMR_IS_STALE)
4438c2ecf20Sopenharmony_ci		llist_add(&ibmr->llnode, &pool->drop_list);
4448c2ecf20Sopenharmony_ci	else
4458c2ecf20Sopenharmony_ci		llist_add(&ibmr->llnode, &pool->free_list);
4468c2ecf20Sopenharmony_ci}
447