18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright(c) 2016 Intel Corporation.
38c2ecf20Sopenharmony_ci *
48c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license.  When using or
58c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify
108c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as
118c2ecf20Sopenharmony_ci * published by the Free Software Foundation.
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but
148c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of
158c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
168c2ecf20Sopenharmony_ci * General Public License for more details.
178c2ecf20Sopenharmony_ci *
188c2ecf20Sopenharmony_ci * BSD LICENSE
198c2ecf20Sopenharmony_ci *
208c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without
218c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions
228c2ecf20Sopenharmony_ci * are met:
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci *  - Redistributions of source code must retain the above copyright
258c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer.
268c2ecf20Sopenharmony_ci *  - Redistributions in binary form must reproduce the above copyright
278c2ecf20Sopenharmony_ci *    notice, this list of conditions and the following disclaimer in
288c2ecf20Sopenharmony_ci *    the documentation and/or other materials provided with the
298c2ecf20Sopenharmony_ci *    distribution.
308c2ecf20Sopenharmony_ci *  - Neither the name of Intel Corporation nor the names of its
318c2ecf20Sopenharmony_ci *    contributors may be used to endorse or promote products derived
328c2ecf20Sopenharmony_ci *    from this software without specific prior written permission.
338c2ecf20Sopenharmony_ci *
348c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
358c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
368c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
378c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
388c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
398c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
408c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
418c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
428c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
438c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
448c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
458c2ecf20Sopenharmony_ci *
468c2ecf20Sopenharmony_ci */
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci#include <linux/slab.h>
498c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
508c2ecf20Sopenharmony_ci#include <rdma/ib_umem.h>
518c2ecf20Sopenharmony_ci#include <rdma/rdma_vt.h>
528c2ecf20Sopenharmony_ci#include "vt.h"
538c2ecf20Sopenharmony_ci#include "mr.h"
548c2ecf20Sopenharmony_ci#include "trace.h"
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_ci/**
578c2ecf20Sopenharmony_ci * rvt_driver_mr_init - Init MR resources per driver
588c2ecf20Sopenharmony_ci * @rdi: rvt dev struct
598c2ecf20Sopenharmony_ci *
608c2ecf20Sopenharmony_ci * Do any intilization needed when a driver registers with rdmavt.
618c2ecf20Sopenharmony_ci *
628c2ecf20Sopenharmony_ci * Return: 0 on success or errno on failure
638c2ecf20Sopenharmony_ci */
648c2ecf20Sopenharmony_ciint rvt_driver_mr_init(struct rvt_dev_info *rdi)
658c2ecf20Sopenharmony_ci{
668c2ecf20Sopenharmony_ci	unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
678c2ecf20Sopenharmony_ci	unsigned lk_tab_size;
688c2ecf20Sopenharmony_ci	int i;
698c2ecf20Sopenharmony_ci
708c2ecf20Sopenharmony_ci	/*
718c2ecf20Sopenharmony_ci	 * The top hfi1_lkey_table_size bits are used to index the
728c2ecf20Sopenharmony_ci	 * table.  The lower 8 bits can be owned by the user (copied from
738c2ecf20Sopenharmony_ci	 * the LKEY).  The remaining bits act as a generation number or tag.
748c2ecf20Sopenharmony_ci	 */
758c2ecf20Sopenharmony_ci	if (!lkey_table_size)
768c2ecf20Sopenharmony_ci		return -EINVAL;
778c2ecf20Sopenharmony_ci
788c2ecf20Sopenharmony_ci	spin_lock_init(&rdi->lkey_table.lock);
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	/* ensure generation is at least 4 bits */
818c2ecf20Sopenharmony_ci	if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
828c2ecf20Sopenharmony_ci		rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
838c2ecf20Sopenharmony_ci			    lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
848c2ecf20Sopenharmony_ci		rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
858c2ecf20Sopenharmony_ci		lkey_table_size = rdi->dparms.lkey_table_size;
868c2ecf20Sopenharmony_ci	}
878c2ecf20Sopenharmony_ci	rdi->lkey_table.max = 1 << lkey_table_size;
888c2ecf20Sopenharmony_ci	rdi->lkey_table.shift = 32 - lkey_table_size;
898c2ecf20Sopenharmony_ci	lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
908c2ecf20Sopenharmony_ci	rdi->lkey_table.table = (struct rvt_mregion __rcu **)
918c2ecf20Sopenharmony_ci			       vmalloc_node(lk_tab_size, rdi->dparms.node);
928c2ecf20Sopenharmony_ci	if (!rdi->lkey_table.table)
938c2ecf20Sopenharmony_ci		return -ENOMEM;
948c2ecf20Sopenharmony_ci
958c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(rdi->dma_mr, NULL);
968c2ecf20Sopenharmony_ci	for (i = 0; i < rdi->lkey_table.max; i++)
978c2ecf20Sopenharmony_ci		RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
988c2ecf20Sopenharmony_ci
998c2ecf20Sopenharmony_ci	rdi->dparms.props.max_mr = rdi->lkey_table.max;
1008c2ecf20Sopenharmony_ci	return 0;
1018c2ecf20Sopenharmony_ci}
1028c2ecf20Sopenharmony_ci
1038c2ecf20Sopenharmony_ci/**
1048c2ecf20Sopenharmony_ci *rvt_mr_exit: clean up MR
1058c2ecf20Sopenharmony_ci *@rdi: rvt dev structure
1068c2ecf20Sopenharmony_ci *
1078c2ecf20Sopenharmony_ci * called when drivers have unregistered or perhaps failed to register with us
1088c2ecf20Sopenharmony_ci */
1098c2ecf20Sopenharmony_civoid rvt_mr_exit(struct rvt_dev_info *rdi)
1108c2ecf20Sopenharmony_ci{
1118c2ecf20Sopenharmony_ci	if (rdi->dma_mr)
1128c2ecf20Sopenharmony_ci		rvt_pr_err(rdi, "DMA MR not null!\n");
1138c2ecf20Sopenharmony_ci
1148c2ecf20Sopenharmony_ci	vfree(rdi->lkey_table.table);
1158c2ecf20Sopenharmony_ci}
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_cistatic void rvt_deinit_mregion(struct rvt_mregion *mr)
1188c2ecf20Sopenharmony_ci{
1198c2ecf20Sopenharmony_ci	int i = mr->mapsz;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci	mr->mapsz = 0;
1228c2ecf20Sopenharmony_ci	while (i)
1238c2ecf20Sopenharmony_ci		kfree(mr->map[--i]);
1248c2ecf20Sopenharmony_ci	percpu_ref_exit(&mr->refcount);
1258c2ecf20Sopenharmony_ci}
1268c2ecf20Sopenharmony_ci
1278c2ecf20Sopenharmony_cistatic void __rvt_mregion_complete(struct percpu_ref *ref)
1288c2ecf20Sopenharmony_ci{
1298c2ecf20Sopenharmony_ci	struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
1308c2ecf20Sopenharmony_ci					      refcount);
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	complete(&mr->comp);
1338c2ecf20Sopenharmony_ci}
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_cistatic int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
1368c2ecf20Sopenharmony_ci			    int count, unsigned int percpu_flags)
1378c2ecf20Sopenharmony_ci{
1388c2ecf20Sopenharmony_ci	int m, i = 0;
1398c2ecf20Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_ci	mr->mapsz = 0;
1428c2ecf20Sopenharmony_ci	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
1438c2ecf20Sopenharmony_ci	for (; i < m; i++) {
1448c2ecf20Sopenharmony_ci		mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
1458c2ecf20Sopenharmony_ci					  dev->dparms.node);
1468c2ecf20Sopenharmony_ci		if (!mr->map[i])
1478c2ecf20Sopenharmony_ci			goto bail;
1488c2ecf20Sopenharmony_ci		mr->mapsz++;
1498c2ecf20Sopenharmony_ci	}
1508c2ecf20Sopenharmony_ci	init_completion(&mr->comp);
1518c2ecf20Sopenharmony_ci	/* count returning the ptr to user */
1528c2ecf20Sopenharmony_ci	if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
1538c2ecf20Sopenharmony_ci			    percpu_flags, GFP_KERNEL))
1548c2ecf20Sopenharmony_ci		goto bail;
1558c2ecf20Sopenharmony_ci
1568c2ecf20Sopenharmony_ci	atomic_set(&mr->lkey_invalid, 0);
1578c2ecf20Sopenharmony_ci	mr->pd = pd;
1588c2ecf20Sopenharmony_ci	mr->max_segs = count;
1598c2ecf20Sopenharmony_ci	return 0;
1608c2ecf20Sopenharmony_cibail:
1618c2ecf20Sopenharmony_ci	rvt_deinit_mregion(mr);
1628c2ecf20Sopenharmony_ci	return -ENOMEM;
1638c2ecf20Sopenharmony_ci}
1648c2ecf20Sopenharmony_ci
1658c2ecf20Sopenharmony_ci/**
1668c2ecf20Sopenharmony_ci * rvt_alloc_lkey - allocate an lkey
1678c2ecf20Sopenharmony_ci * @mr: memory region that this lkey protects
1688c2ecf20Sopenharmony_ci * @dma_region: 0->normal key, 1->restricted DMA key
1698c2ecf20Sopenharmony_ci *
1708c2ecf20Sopenharmony_ci * Returns 0 if successful, otherwise returns -errno.
1718c2ecf20Sopenharmony_ci *
1728c2ecf20Sopenharmony_ci * Increments mr reference count as required.
1738c2ecf20Sopenharmony_ci *
1748c2ecf20Sopenharmony_ci * Sets the lkey field mr for non-dma regions.
1758c2ecf20Sopenharmony_ci *
1768c2ecf20Sopenharmony_ci */
1778c2ecf20Sopenharmony_cistatic int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
1788c2ecf20Sopenharmony_ci{
1798c2ecf20Sopenharmony_ci	unsigned long flags;
1808c2ecf20Sopenharmony_ci	u32 r;
1818c2ecf20Sopenharmony_ci	u32 n;
1828c2ecf20Sopenharmony_ci	int ret = 0;
1838c2ecf20Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
1848c2ecf20Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
1858c2ecf20Sopenharmony_ci
1868c2ecf20Sopenharmony_ci	rvt_get_mr(mr);
1878c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rkt->lock, flags);
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_ci	/* special case for dma_mr lkey == 0 */
1908c2ecf20Sopenharmony_ci	if (dma_region) {
1918c2ecf20Sopenharmony_ci		struct rvt_mregion *tmr;
1928c2ecf20Sopenharmony_ci
1938c2ecf20Sopenharmony_ci		tmr = rcu_access_pointer(dev->dma_mr);
1948c2ecf20Sopenharmony_ci		if (!tmr) {
1958c2ecf20Sopenharmony_ci			mr->lkey_published = 1;
1968c2ecf20Sopenharmony_ci			/* Insure published written first */
1978c2ecf20Sopenharmony_ci			rcu_assign_pointer(dev->dma_mr, mr);
1988c2ecf20Sopenharmony_ci			rvt_get_mr(mr);
1998c2ecf20Sopenharmony_ci		}
2008c2ecf20Sopenharmony_ci		goto success;
2018c2ecf20Sopenharmony_ci	}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	/* Find the next available LKEY */
2048c2ecf20Sopenharmony_ci	r = rkt->next;
2058c2ecf20Sopenharmony_ci	n = r;
2068c2ecf20Sopenharmony_ci	for (;;) {
2078c2ecf20Sopenharmony_ci		if (!rcu_access_pointer(rkt->table[r]))
2088c2ecf20Sopenharmony_ci			break;
2098c2ecf20Sopenharmony_ci		r = (r + 1) & (rkt->max - 1);
2108c2ecf20Sopenharmony_ci		if (r == n)
2118c2ecf20Sopenharmony_ci			goto bail;
2128c2ecf20Sopenharmony_ci	}
2138c2ecf20Sopenharmony_ci	rkt->next = (r + 1) & (rkt->max - 1);
2148c2ecf20Sopenharmony_ci	/*
2158c2ecf20Sopenharmony_ci	 * Make sure lkey is never zero which is reserved to indicate an
2168c2ecf20Sopenharmony_ci	 * unrestricted LKEY.
2178c2ecf20Sopenharmony_ci	 */
2188c2ecf20Sopenharmony_ci	rkt->gen++;
2198c2ecf20Sopenharmony_ci	/*
2208c2ecf20Sopenharmony_ci	 * bits are capped to ensure enough bits for generation number
2218c2ecf20Sopenharmony_ci	 */
2228c2ecf20Sopenharmony_ci	mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
2238c2ecf20Sopenharmony_ci		((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
2248c2ecf20Sopenharmony_ci		 << 8);
2258c2ecf20Sopenharmony_ci	if (mr->lkey == 0) {
2268c2ecf20Sopenharmony_ci		mr->lkey |= 1 << 8;
2278c2ecf20Sopenharmony_ci		rkt->gen++;
2288c2ecf20Sopenharmony_ci	}
2298c2ecf20Sopenharmony_ci	mr->lkey_published = 1;
2308c2ecf20Sopenharmony_ci	/* Insure published written first */
2318c2ecf20Sopenharmony_ci	rcu_assign_pointer(rkt->table[r], mr);
2328c2ecf20Sopenharmony_cisuccess:
2338c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rkt->lock, flags);
2348c2ecf20Sopenharmony_ciout:
2358c2ecf20Sopenharmony_ci	return ret;
2368c2ecf20Sopenharmony_cibail:
2378c2ecf20Sopenharmony_ci	rvt_put_mr(mr);
2388c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rkt->lock, flags);
2398c2ecf20Sopenharmony_ci	ret = -ENOMEM;
2408c2ecf20Sopenharmony_ci	goto out;
2418c2ecf20Sopenharmony_ci}
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ci/**
2448c2ecf20Sopenharmony_ci * rvt_free_lkey - free an lkey
2458c2ecf20Sopenharmony_ci * @mr: mr to free from tables
2468c2ecf20Sopenharmony_ci */
2478c2ecf20Sopenharmony_cistatic void rvt_free_lkey(struct rvt_mregion *mr)
2488c2ecf20Sopenharmony_ci{
2498c2ecf20Sopenharmony_ci	unsigned long flags;
2508c2ecf20Sopenharmony_ci	u32 lkey = mr->lkey;
2518c2ecf20Sopenharmony_ci	u32 r;
2528c2ecf20Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
2538c2ecf20Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
2548c2ecf20Sopenharmony_ci	int freed = 0;
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	spin_lock_irqsave(&rkt->lock, flags);
2578c2ecf20Sopenharmony_ci	if (!lkey) {
2588c2ecf20Sopenharmony_ci		if (mr->lkey_published) {
2598c2ecf20Sopenharmony_ci			mr->lkey_published = 0;
2608c2ecf20Sopenharmony_ci			/* insure published is written before pointer */
2618c2ecf20Sopenharmony_ci			rcu_assign_pointer(dev->dma_mr, NULL);
2628c2ecf20Sopenharmony_ci			rvt_put_mr(mr);
2638c2ecf20Sopenharmony_ci		}
2648c2ecf20Sopenharmony_ci	} else {
2658c2ecf20Sopenharmony_ci		if (!mr->lkey_published)
2668c2ecf20Sopenharmony_ci			goto out;
2678c2ecf20Sopenharmony_ci		r = lkey >> (32 - dev->dparms.lkey_table_size);
2688c2ecf20Sopenharmony_ci		mr->lkey_published = 0;
2698c2ecf20Sopenharmony_ci		/* insure published is written before pointer */
2708c2ecf20Sopenharmony_ci		rcu_assign_pointer(rkt->table[r], NULL);
2718c2ecf20Sopenharmony_ci	}
2728c2ecf20Sopenharmony_ci	freed++;
2738c2ecf20Sopenharmony_ciout:
2748c2ecf20Sopenharmony_ci	spin_unlock_irqrestore(&rkt->lock, flags);
2758c2ecf20Sopenharmony_ci	if (freed)
2768c2ecf20Sopenharmony_ci		percpu_ref_kill(&mr->refcount);
2778c2ecf20Sopenharmony_ci}
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_cistatic struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
2808c2ecf20Sopenharmony_ci{
2818c2ecf20Sopenharmony_ci	struct rvt_mr *mr;
2828c2ecf20Sopenharmony_ci	int rval = -ENOMEM;
2838c2ecf20Sopenharmony_ci	int m;
2848c2ecf20Sopenharmony_ci
2858c2ecf20Sopenharmony_ci	/* Allocate struct plus pointers to first level page tables. */
2868c2ecf20Sopenharmony_ci	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
2878c2ecf20Sopenharmony_ci	mr = kzalloc(struct_size(mr, mr.map, m), GFP_KERNEL);
2888c2ecf20Sopenharmony_ci	if (!mr)
2898c2ecf20Sopenharmony_ci		goto bail;
2908c2ecf20Sopenharmony_ci
2918c2ecf20Sopenharmony_ci	rval = rvt_init_mregion(&mr->mr, pd, count, 0);
2928c2ecf20Sopenharmony_ci	if (rval)
2938c2ecf20Sopenharmony_ci		goto bail;
2948c2ecf20Sopenharmony_ci	/*
2958c2ecf20Sopenharmony_ci	 * ib_reg_phys_mr() will initialize mr->ibmr except for
2968c2ecf20Sopenharmony_ci	 * lkey and rkey.
2978c2ecf20Sopenharmony_ci	 */
2988c2ecf20Sopenharmony_ci	rval = rvt_alloc_lkey(&mr->mr, 0);
2998c2ecf20Sopenharmony_ci	if (rval)
3008c2ecf20Sopenharmony_ci		goto bail_mregion;
3018c2ecf20Sopenharmony_ci	mr->ibmr.lkey = mr->mr.lkey;
3028c2ecf20Sopenharmony_ci	mr->ibmr.rkey = mr->mr.lkey;
3038c2ecf20Sopenharmony_cidone:
3048c2ecf20Sopenharmony_ci	return mr;
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_cibail_mregion:
3078c2ecf20Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
3088c2ecf20Sopenharmony_cibail:
3098c2ecf20Sopenharmony_ci	kfree(mr);
3108c2ecf20Sopenharmony_ci	mr = ERR_PTR(rval);
3118c2ecf20Sopenharmony_ci	goto done;
3128c2ecf20Sopenharmony_ci}
3138c2ecf20Sopenharmony_ci
3148c2ecf20Sopenharmony_cistatic void __rvt_free_mr(struct rvt_mr *mr)
3158c2ecf20Sopenharmony_ci{
3168c2ecf20Sopenharmony_ci	rvt_free_lkey(&mr->mr);
3178c2ecf20Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
3188c2ecf20Sopenharmony_ci	kfree(mr);
3198c2ecf20Sopenharmony_ci}
3208c2ecf20Sopenharmony_ci
3218c2ecf20Sopenharmony_ci/**
3228c2ecf20Sopenharmony_ci * rvt_get_dma_mr - get a DMA memory region
3238c2ecf20Sopenharmony_ci * @pd: protection domain for this memory region
3248c2ecf20Sopenharmony_ci * @acc: access flags
3258c2ecf20Sopenharmony_ci *
3268c2ecf20Sopenharmony_ci * Return: the memory region on success, otherwise returns an errno.
3278c2ecf20Sopenharmony_ci */
3288c2ecf20Sopenharmony_cistruct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
3298c2ecf20Sopenharmony_ci{
3308c2ecf20Sopenharmony_ci	struct rvt_mr *mr;
3318c2ecf20Sopenharmony_ci	struct ib_mr *ret;
3328c2ecf20Sopenharmony_ci	int rval;
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	if (ibpd_to_rvtpd(pd)->user)
3358c2ecf20Sopenharmony_ci		return ERR_PTR(-EPERM);
3368c2ecf20Sopenharmony_ci
3378c2ecf20Sopenharmony_ci	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
3388c2ecf20Sopenharmony_ci	if (!mr) {
3398c2ecf20Sopenharmony_ci		ret = ERR_PTR(-ENOMEM);
3408c2ecf20Sopenharmony_ci		goto bail;
3418c2ecf20Sopenharmony_ci	}
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci	rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
3448c2ecf20Sopenharmony_ci	if (rval) {
3458c2ecf20Sopenharmony_ci		ret = ERR_PTR(rval);
3468c2ecf20Sopenharmony_ci		goto bail;
3478c2ecf20Sopenharmony_ci	}
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_ci	rval = rvt_alloc_lkey(&mr->mr, 1);
3508c2ecf20Sopenharmony_ci	if (rval) {
3518c2ecf20Sopenharmony_ci		ret = ERR_PTR(rval);
3528c2ecf20Sopenharmony_ci		goto bail_mregion;
3538c2ecf20Sopenharmony_ci	}
3548c2ecf20Sopenharmony_ci
3558c2ecf20Sopenharmony_ci	mr->mr.access_flags = acc;
3568c2ecf20Sopenharmony_ci	ret = &mr->ibmr;
3578c2ecf20Sopenharmony_cidone:
3588c2ecf20Sopenharmony_ci	return ret;
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_cibail_mregion:
3618c2ecf20Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
3628c2ecf20Sopenharmony_cibail:
3638c2ecf20Sopenharmony_ci	kfree(mr);
3648c2ecf20Sopenharmony_ci	goto done;
3658c2ecf20Sopenharmony_ci}
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci/**
3688c2ecf20Sopenharmony_ci * rvt_reg_user_mr - register a userspace memory region
3698c2ecf20Sopenharmony_ci * @pd: protection domain for this memory region
3708c2ecf20Sopenharmony_ci * @start: starting userspace address
3718c2ecf20Sopenharmony_ci * @length: length of region to register
3728c2ecf20Sopenharmony_ci * @mr_access_flags: access flags for this memory region
3738c2ecf20Sopenharmony_ci * @udata: unused by the driver
3748c2ecf20Sopenharmony_ci *
3758c2ecf20Sopenharmony_ci * Return: the memory region on success, otherwise returns an errno.
3768c2ecf20Sopenharmony_ci */
3778c2ecf20Sopenharmony_cistruct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
3788c2ecf20Sopenharmony_ci			      u64 virt_addr, int mr_access_flags,
3798c2ecf20Sopenharmony_ci			      struct ib_udata *udata)
3808c2ecf20Sopenharmony_ci{
3818c2ecf20Sopenharmony_ci	struct rvt_mr *mr;
3828c2ecf20Sopenharmony_ci	struct ib_umem *umem;
3838c2ecf20Sopenharmony_ci	struct sg_page_iter sg_iter;
3848c2ecf20Sopenharmony_ci	int n, m;
3858c2ecf20Sopenharmony_ci	struct ib_mr *ret;
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	if (length == 0)
3888c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	umem = ib_umem_get(pd->device, start, length, mr_access_flags);
3918c2ecf20Sopenharmony_ci	if (IS_ERR(umem))
3928c2ecf20Sopenharmony_ci		return (void *)umem;
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	n = ib_umem_num_pages(umem);
3958c2ecf20Sopenharmony_ci
3968c2ecf20Sopenharmony_ci	mr = __rvt_alloc_mr(n, pd);
3978c2ecf20Sopenharmony_ci	if (IS_ERR(mr)) {
3988c2ecf20Sopenharmony_ci		ret = (struct ib_mr *)mr;
3998c2ecf20Sopenharmony_ci		goto bail_umem;
4008c2ecf20Sopenharmony_ci	}
4018c2ecf20Sopenharmony_ci
4028c2ecf20Sopenharmony_ci	mr->mr.user_base = start;
4038c2ecf20Sopenharmony_ci	mr->mr.iova = virt_addr;
4048c2ecf20Sopenharmony_ci	mr->mr.length = length;
4058c2ecf20Sopenharmony_ci	mr->mr.offset = ib_umem_offset(umem);
4068c2ecf20Sopenharmony_ci	mr->mr.access_flags = mr_access_flags;
4078c2ecf20Sopenharmony_ci	mr->umem = umem;
4088c2ecf20Sopenharmony_ci
4098c2ecf20Sopenharmony_ci	mr->mr.page_shift = PAGE_SHIFT;
4108c2ecf20Sopenharmony_ci	m = 0;
4118c2ecf20Sopenharmony_ci	n = 0;
4128c2ecf20Sopenharmony_ci	for_each_sg_page (umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
4138c2ecf20Sopenharmony_ci		void *vaddr;
4148c2ecf20Sopenharmony_ci
4158c2ecf20Sopenharmony_ci		vaddr = page_address(sg_page_iter_page(&sg_iter));
4168c2ecf20Sopenharmony_ci		if (!vaddr) {
4178c2ecf20Sopenharmony_ci			ret = ERR_PTR(-EINVAL);
4188c2ecf20Sopenharmony_ci			goto bail_inval;
4198c2ecf20Sopenharmony_ci		}
4208c2ecf20Sopenharmony_ci		mr->mr.map[m]->segs[n].vaddr = vaddr;
4218c2ecf20Sopenharmony_ci		mr->mr.map[m]->segs[n].length = PAGE_SIZE;
4228c2ecf20Sopenharmony_ci		trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, PAGE_SIZE);
4238c2ecf20Sopenharmony_ci		if (++n == RVT_SEGSZ) {
4248c2ecf20Sopenharmony_ci			m++;
4258c2ecf20Sopenharmony_ci			n = 0;
4268c2ecf20Sopenharmony_ci		}
4278c2ecf20Sopenharmony_ci	}
4288c2ecf20Sopenharmony_ci	return &mr->ibmr;
4298c2ecf20Sopenharmony_ci
4308c2ecf20Sopenharmony_cibail_inval:
4318c2ecf20Sopenharmony_ci	__rvt_free_mr(mr);
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_cibail_umem:
4348c2ecf20Sopenharmony_ci	ib_umem_release(umem);
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_ci	return ret;
4378c2ecf20Sopenharmony_ci}
4388c2ecf20Sopenharmony_ci
4398c2ecf20Sopenharmony_ci/**
4408c2ecf20Sopenharmony_ci * rvt_dereg_clean_qp_cb - callback from iterator
4418c2ecf20Sopenharmony_ci * @qp - the qp
4428c2ecf20Sopenharmony_ci * @v - the mregion (as u64)
4438c2ecf20Sopenharmony_ci *
4448c2ecf20Sopenharmony_ci * This routine fields the callback for all QPs and
4458c2ecf20Sopenharmony_ci * for QPs in the same PD as the MR will call the
4468c2ecf20Sopenharmony_ci * rvt_qp_mr_clean() to potentially cleanup references.
4478c2ecf20Sopenharmony_ci */
4488c2ecf20Sopenharmony_cistatic void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
4498c2ecf20Sopenharmony_ci{
4508c2ecf20Sopenharmony_ci	struct rvt_mregion *mr = (struct rvt_mregion *)v;
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci	/* skip PDs that are not ours */
4538c2ecf20Sopenharmony_ci	if (mr->pd != qp->ibqp.pd)
4548c2ecf20Sopenharmony_ci		return;
4558c2ecf20Sopenharmony_ci	rvt_qp_mr_clean(qp, mr->lkey);
4568c2ecf20Sopenharmony_ci}
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ci/**
4598c2ecf20Sopenharmony_ci * rvt_dereg_clean_qps - find QPs for reference cleanup
4608c2ecf20Sopenharmony_ci * @mr - the MR that is being deregistered
4618c2ecf20Sopenharmony_ci *
4628c2ecf20Sopenharmony_ci * This routine iterates RC QPs looking for references
4638c2ecf20Sopenharmony_ci * to the lkey noted in mr.
4648c2ecf20Sopenharmony_ci */
4658c2ecf20Sopenharmony_cistatic void rvt_dereg_clean_qps(struct rvt_mregion *mr)
4668c2ecf20Sopenharmony_ci{
4678c2ecf20Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb);
4708c2ecf20Sopenharmony_ci}
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci/**
4738c2ecf20Sopenharmony_ci * rvt_check_refs - check references
4748c2ecf20Sopenharmony_ci * @mr - the megion
4758c2ecf20Sopenharmony_ci * @t - the caller identification
4768c2ecf20Sopenharmony_ci *
4778c2ecf20Sopenharmony_ci * This routine checks MRs holding a reference during
4788c2ecf20Sopenharmony_ci * when being de-registered.
4798c2ecf20Sopenharmony_ci *
4808c2ecf20Sopenharmony_ci * If the count is non-zero, the code calls a clean routine then
4818c2ecf20Sopenharmony_ci * waits for the timeout for the count to zero.
4828c2ecf20Sopenharmony_ci */
4838c2ecf20Sopenharmony_cistatic int rvt_check_refs(struct rvt_mregion *mr, const char *t)
4848c2ecf20Sopenharmony_ci{
4858c2ecf20Sopenharmony_ci	unsigned long timeout;
4868c2ecf20Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
4878c2ecf20Sopenharmony_ci
4888c2ecf20Sopenharmony_ci	if (mr->lkey) {
4898c2ecf20Sopenharmony_ci		/* avoid dma mr */
4908c2ecf20Sopenharmony_ci		rvt_dereg_clean_qps(mr);
4918c2ecf20Sopenharmony_ci		/* @mr was indexed on rcu protected @lkey_table */
4928c2ecf20Sopenharmony_ci		synchronize_rcu();
4938c2ecf20Sopenharmony_ci	}
4948c2ecf20Sopenharmony_ci
4958c2ecf20Sopenharmony_ci	timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
4968c2ecf20Sopenharmony_ci	if (!timeout) {
4978c2ecf20Sopenharmony_ci		rvt_pr_err(rdi,
4988c2ecf20Sopenharmony_ci			   "%s timeout mr %p pd %p lkey %x refcount %ld\n",
4998c2ecf20Sopenharmony_ci			   t, mr, mr->pd, mr->lkey,
5008c2ecf20Sopenharmony_ci			   atomic_long_read(&mr->refcount.data->count));
5018c2ecf20Sopenharmony_ci		rvt_get_mr(mr);
5028c2ecf20Sopenharmony_ci		return -EBUSY;
5038c2ecf20Sopenharmony_ci	}
5048c2ecf20Sopenharmony_ci	return 0;
5058c2ecf20Sopenharmony_ci}
5068c2ecf20Sopenharmony_ci
5078c2ecf20Sopenharmony_ci/**
5088c2ecf20Sopenharmony_ci * rvt_mr_has_lkey - is MR
5098c2ecf20Sopenharmony_ci * @mr - the mregion
5108c2ecf20Sopenharmony_ci * @lkey - the lkey
5118c2ecf20Sopenharmony_ci */
5128c2ecf20Sopenharmony_cibool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
5138c2ecf20Sopenharmony_ci{
5148c2ecf20Sopenharmony_ci	return mr && lkey == mr->lkey;
5158c2ecf20Sopenharmony_ci}
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci/**
5188c2ecf20Sopenharmony_ci * rvt_ss_has_lkey - is mr in sge tests
5198c2ecf20Sopenharmony_ci * @ss - the sge state
5208c2ecf20Sopenharmony_ci * @lkey
5218c2ecf20Sopenharmony_ci *
5228c2ecf20Sopenharmony_ci * This code tests for an MR in the indicated
5238c2ecf20Sopenharmony_ci * sge state.
5248c2ecf20Sopenharmony_ci */
5258c2ecf20Sopenharmony_cibool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
5268c2ecf20Sopenharmony_ci{
5278c2ecf20Sopenharmony_ci	int i;
5288c2ecf20Sopenharmony_ci	bool rval = false;
5298c2ecf20Sopenharmony_ci
5308c2ecf20Sopenharmony_ci	if (!ss->num_sge)
5318c2ecf20Sopenharmony_ci		return rval;
5328c2ecf20Sopenharmony_ci	/* first one */
5338c2ecf20Sopenharmony_ci	rval = rvt_mr_has_lkey(ss->sge.mr, lkey);
5348c2ecf20Sopenharmony_ci	/* any others */
5358c2ecf20Sopenharmony_ci	for (i = 0; !rval && i < ss->num_sge - 1; i++)
5368c2ecf20Sopenharmony_ci		rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey);
5378c2ecf20Sopenharmony_ci	return rval;
5388c2ecf20Sopenharmony_ci}
5398c2ecf20Sopenharmony_ci
5408c2ecf20Sopenharmony_ci/**
5418c2ecf20Sopenharmony_ci * rvt_dereg_mr - unregister and free a memory region
5428c2ecf20Sopenharmony_ci * @ibmr: the memory region to free
5438c2ecf20Sopenharmony_ci *
5448c2ecf20Sopenharmony_ci *
5458c2ecf20Sopenharmony_ci * Note that this is called to free MRs created by rvt_get_dma_mr()
5468c2ecf20Sopenharmony_ci * or rvt_reg_user_mr().
5478c2ecf20Sopenharmony_ci *
5488c2ecf20Sopenharmony_ci * Returns 0 on success.
5498c2ecf20Sopenharmony_ci */
5508c2ecf20Sopenharmony_ciint rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
5518c2ecf20Sopenharmony_ci{
5528c2ecf20Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
5538c2ecf20Sopenharmony_ci	int ret;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	rvt_free_lkey(&mr->mr);
5568c2ecf20Sopenharmony_ci
5578c2ecf20Sopenharmony_ci	rvt_put_mr(&mr->mr); /* will set completion if last */
5588c2ecf20Sopenharmony_ci	ret = rvt_check_refs(&mr->mr, __func__);
5598c2ecf20Sopenharmony_ci	if (ret)
5608c2ecf20Sopenharmony_ci		goto out;
5618c2ecf20Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
5628c2ecf20Sopenharmony_ci	ib_umem_release(mr->umem);
5638c2ecf20Sopenharmony_ci	kfree(mr);
5648c2ecf20Sopenharmony_ciout:
5658c2ecf20Sopenharmony_ci	return ret;
5668c2ecf20Sopenharmony_ci}
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci/**
5698c2ecf20Sopenharmony_ci * rvt_alloc_mr - Allocate a memory region usable with the
5708c2ecf20Sopenharmony_ci * @pd: protection domain for this memory region
5718c2ecf20Sopenharmony_ci * @mr_type: mem region type
5728c2ecf20Sopenharmony_ci * @max_num_sg: Max number of segments allowed
5738c2ecf20Sopenharmony_ci *
5748c2ecf20Sopenharmony_ci * Return: the memory region on success, otherwise return an errno.
5758c2ecf20Sopenharmony_ci */
5768c2ecf20Sopenharmony_cistruct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
5778c2ecf20Sopenharmony_ci			   u32 max_num_sg)
5788c2ecf20Sopenharmony_ci{
5798c2ecf20Sopenharmony_ci	struct rvt_mr *mr;
5808c2ecf20Sopenharmony_ci
5818c2ecf20Sopenharmony_ci	if (mr_type != IB_MR_TYPE_MEM_REG)
5828c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	mr = __rvt_alloc_mr(max_num_sg, pd);
5858c2ecf20Sopenharmony_ci	if (IS_ERR(mr))
5868c2ecf20Sopenharmony_ci		return (struct ib_mr *)mr;
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci	return &mr->ibmr;
5898c2ecf20Sopenharmony_ci}
5908c2ecf20Sopenharmony_ci
5918c2ecf20Sopenharmony_ci/**
5928c2ecf20Sopenharmony_ci * rvt_set_page - page assignment function called by ib_sg_to_pages
5938c2ecf20Sopenharmony_ci * @ibmr: memory region
5948c2ecf20Sopenharmony_ci * @addr: dma address of mapped page
5958c2ecf20Sopenharmony_ci *
5968c2ecf20Sopenharmony_ci * Return: 0 on success
5978c2ecf20Sopenharmony_ci */
5988c2ecf20Sopenharmony_cistatic int rvt_set_page(struct ib_mr *ibmr, u64 addr)
5998c2ecf20Sopenharmony_ci{
6008c2ecf20Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
6018c2ecf20Sopenharmony_ci	u32 ps = 1 << mr->mr.page_shift;
6028c2ecf20Sopenharmony_ci	u32 mapped_segs = mr->mr.length >> mr->mr.page_shift;
6038c2ecf20Sopenharmony_ci	int m, n;
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	if (unlikely(mapped_segs == mr->mr.max_segs))
6068c2ecf20Sopenharmony_ci		return -ENOMEM;
6078c2ecf20Sopenharmony_ci
6088c2ecf20Sopenharmony_ci	m = mapped_segs / RVT_SEGSZ;
6098c2ecf20Sopenharmony_ci	n = mapped_segs % RVT_SEGSZ;
6108c2ecf20Sopenharmony_ci	mr->mr.map[m]->segs[n].vaddr = (void *)addr;
6118c2ecf20Sopenharmony_ci	mr->mr.map[m]->segs[n].length = ps;
6128c2ecf20Sopenharmony_ci	mr->mr.length += ps;
6138c2ecf20Sopenharmony_ci	trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
6148c2ecf20Sopenharmony_ci
6158c2ecf20Sopenharmony_ci	return 0;
6168c2ecf20Sopenharmony_ci}
6178c2ecf20Sopenharmony_ci
6188c2ecf20Sopenharmony_ci/**
6198c2ecf20Sopenharmony_ci * rvt_map_mr_sg - map sg list and set it the memory region
6208c2ecf20Sopenharmony_ci * @ibmr: memory region
6218c2ecf20Sopenharmony_ci * @sg: dma mapped scatterlist
6228c2ecf20Sopenharmony_ci * @sg_nents: number of entries in sg
6238c2ecf20Sopenharmony_ci * @sg_offset: offset in bytes into sg
6248c2ecf20Sopenharmony_ci *
6258c2ecf20Sopenharmony_ci * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
6268c2ecf20Sopenharmony_ci *
6278c2ecf20Sopenharmony_ci * Return: number of sg elements mapped to the memory region
6288c2ecf20Sopenharmony_ci */
6298c2ecf20Sopenharmony_ciint rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
6308c2ecf20Sopenharmony_ci		  int sg_nents, unsigned int *sg_offset)
6318c2ecf20Sopenharmony_ci{
6328c2ecf20Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
6338c2ecf20Sopenharmony_ci	int ret;
6348c2ecf20Sopenharmony_ci
6358c2ecf20Sopenharmony_ci	mr->mr.length = 0;
6368c2ecf20Sopenharmony_ci	mr->mr.page_shift = PAGE_SHIFT;
6378c2ecf20Sopenharmony_ci	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
6388c2ecf20Sopenharmony_ci	mr->mr.user_base = ibmr->iova;
6398c2ecf20Sopenharmony_ci	mr->mr.iova = ibmr->iova;
6408c2ecf20Sopenharmony_ci	mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
6418c2ecf20Sopenharmony_ci	mr->mr.length = (size_t)ibmr->length;
6428c2ecf20Sopenharmony_ci	trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset);
6438c2ecf20Sopenharmony_ci	return ret;
6448c2ecf20Sopenharmony_ci}
6458c2ecf20Sopenharmony_ci
6468c2ecf20Sopenharmony_ci/**
6478c2ecf20Sopenharmony_ci * rvt_fast_reg_mr - fast register physical MR
6488c2ecf20Sopenharmony_ci * @qp: the queue pair where the work request comes from
6498c2ecf20Sopenharmony_ci * @ibmr: the memory region to be registered
6508c2ecf20Sopenharmony_ci * @key: updated key for this memory region
6518c2ecf20Sopenharmony_ci * @access: access flags for this memory region
6528c2ecf20Sopenharmony_ci *
6538c2ecf20Sopenharmony_ci * Returns 0 on success.
6548c2ecf20Sopenharmony_ci */
6558c2ecf20Sopenharmony_ciint rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
6568c2ecf20Sopenharmony_ci		    int access)
6578c2ecf20Sopenharmony_ci{
6588c2ecf20Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
6598c2ecf20Sopenharmony_ci
6608c2ecf20Sopenharmony_ci	if (qp->ibqp.pd != mr->mr.pd)
6618c2ecf20Sopenharmony_ci		return -EACCES;
6628c2ecf20Sopenharmony_ci
6638c2ecf20Sopenharmony_ci	/* not applicable to dma MR or user MR */
6648c2ecf20Sopenharmony_ci	if (!mr->mr.lkey || mr->umem)
6658c2ecf20Sopenharmony_ci		return -EINVAL;
6668c2ecf20Sopenharmony_ci
6678c2ecf20Sopenharmony_ci	if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00))
6688c2ecf20Sopenharmony_ci		return -EINVAL;
6698c2ecf20Sopenharmony_ci
6708c2ecf20Sopenharmony_ci	ibmr->lkey = key;
6718c2ecf20Sopenharmony_ci	ibmr->rkey = key;
6728c2ecf20Sopenharmony_ci	mr->mr.lkey = key;
6738c2ecf20Sopenharmony_ci	mr->mr.access_flags = access;
6748c2ecf20Sopenharmony_ci	mr->mr.iova = ibmr->iova;
6758c2ecf20Sopenharmony_ci	atomic_set(&mr->mr.lkey_invalid, 0);
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci	return 0;
6788c2ecf20Sopenharmony_ci}
6798c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_fast_reg_mr);
6808c2ecf20Sopenharmony_ci
6818c2ecf20Sopenharmony_ci/**
6828c2ecf20Sopenharmony_ci * rvt_invalidate_rkey - invalidate an MR rkey
6838c2ecf20Sopenharmony_ci * @qp: queue pair associated with the invalidate op
6848c2ecf20Sopenharmony_ci * @rkey: rkey to invalidate
6858c2ecf20Sopenharmony_ci *
6868c2ecf20Sopenharmony_ci * Returns 0 on success.
6878c2ecf20Sopenharmony_ci */
6888c2ecf20Sopenharmony_ciint rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey)
6898c2ecf20Sopenharmony_ci{
6908c2ecf20Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
6918c2ecf20Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
6928c2ecf20Sopenharmony_ci	struct rvt_mregion *mr;
6938c2ecf20Sopenharmony_ci
6948c2ecf20Sopenharmony_ci	if (rkey == 0)
6958c2ecf20Sopenharmony_ci		return -EINVAL;
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	rcu_read_lock();
6988c2ecf20Sopenharmony_ci	mr = rcu_dereference(
6998c2ecf20Sopenharmony_ci		rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
7008c2ecf20Sopenharmony_ci	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
7018c2ecf20Sopenharmony_ci		goto bail;
7028c2ecf20Sopenharmony_ci
7038c2ecf20Sopenharmony_ci	atomic_set(&mr->lkey_invalid, 1);
7048c2ecf20Sopenharmony_ci	rcu_read_unlock();
7058c2ecf20Sopenharmony_ci	return 0;
7068c2ecf20Sopenharmony_ci
7078c2ecf20Sopenharmony_cibail:
7088c2ecf20Sopenharmony_ci	rcu_read_unlock();
7098c2ecf20Sopenharmony_ci	return -EINVAL;
7108c2ecf20Sopenharmony_ci}
7118c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_invalidate_rkey);
7128c2ecf20Sopenharmony_ci
7138c2ecf20Sopenharmony_ci/**
7148c2ecf20Sopenharmony_ci * rvt_sge_adjacent - is isge compressible
7158c2ecf20Sopenharmony_ci * @last_sge: last outgoing SGE written
7168c2ecf20Sopenharmony_ci * @sge: SGE to check
7178c2ecf20Sopenharmony_ci *
7188c2ecf20Sopenharmony_ci * If adjacent will update last_sge to add length.
7198c2ecf20Sopenharmony_ci *
7208c2ecf20Sopenharmony_ci * Return: true if isge is adjacent to last sge
7218c2ecf20Sopenharmony_ci */
7228c2ecf20Sopenharmony_cistatic inline bool rvt_sge_adjacent(struct rvt_sge *last_sge,
7238c2ecf20Sopenharmony_ci				    struct ib_sge *sge)
7248c2ecf20Sopenharmony_ci{
7258c2ecf20Sopenharmony_ci	if (last_sge && sge->lkey == last_sge->mr->lkey &&
7268c2ecf20Sopenharmony_ci	    ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
7278c2ecf20Sopenharmony_ci		if (sge->lkey) {
7288c2ecf20Sopenharmony_ci			if (unlikely((sge->addr - last_sge->mr->user_base +
7298c2ecf20Sopenharmony_ci			      sge->length > last_sge->mr->length)))
7308c2ecf20Sopenharmony_ci				return false; /* overrun, caller will catch */
7318c2ecf20Sopenharmony_ci		} else {
7328c2ecf20Sopenharmony_ci			last_sge->length += sge->length;
7338c2ecf20Sopenharmony_ci		}
7348c2ecf20Sopenharmony_ci		last_sge->sge_length += sge->length;
7358c2ecf20Sopenharmony_ci		trace_rvt_sge_adjacent(last_sge, sge);
7368c2ecf20Sopenharmony_ci		return true;
7378c2ecf20Sopenharmony_ci	}
7388c2ecf20Sopenharmony_ci	return false;
7398c2ecf20Sopenharmony_ci}
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci/**
7428c2ecf20Sopenharmony_ci * rvt_lkey_ok - check IB SGE for validity and initialize
7438c2ecf20Sopenharmony_ci * @rkt: table containing lkey to check SGE against
7448c2ecf20Sopenharmony_ci * @pd: protection domain
7458c2ecf20Sopenharmony_ci * @isge: outgoing internal SGE
7468c2ecf20Sopenharmony_ci * @last_sge: last outgoing SGE written
7478c2ecf20Sopenharmony_ci * @sge: SGE to check
7488c2ecf20Sopenharmony_ci * @acc: access flags
7498c2ecf20Sopenharmony_ci *
7508c2ecf20Sopenharmony_ci * Check the IB SGE for validity and initialize our internal version
7518c2ecf20Sopenharmony_ci * of it.
7528c2ecf20Sopenharmony_ci *
7538c2ecf20Sopenharmony_ci * Increments the reference count when a new sge is stored.
7548c2ecf20Sopenharmony_ci *
7558c2ecf20Sopenharmony_ci * Return: 0 if compressed, 1 if added , otherwise returns -errno.
7568c2ecf20Sopenharmony_ci */
7578c2ecf20Sopenharmony_ciint rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
7588c2ecf20Sopenharmony_ci		struct rvt_sge *isge, struct rvt_sge *last_sge,
7598c2ecf20Sopenharmony_ci		struct ib_sge *sge, int acc)
7608c2ecf20Sopenharmony_ci{
7618c2ecf20Sopenharmony_ci	struct rvt_mregion *mr;
7628c2ecf20Sopenharmony_ci	unsigned n, m;
7638c2ecf20Sopenharmony_ci	size_t off;
7648c2ecf20Sopenharmony_ci
7658c2ecf20Sopenharmony_ci	/*
7668c2ecf20Sopenharmony_ci	 * We use LKEY == zero for kernel virtual addresses
7678c2ecf20Sopenharmony_ci	 * (see rvt_get_dma_mr()).
7688c2ecf20Sopenharmony_ci	 */
7698c2ecf20Sopenharmony_ci	if (sge->lkey == 0) {
7708c2ecf20Sopenharmony_ci		struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci		if (pd->user)
7738c2ecf20Sopenharmony_ci			return -EINVAL;
7748c2ecf20Sopenharmony_ci		if (rvt_sge_adjacent(last_sge, sge))
7758c2ecf20Sopenharmony_ci			return 0;
7768c2ecf20Sopenharmony_ci		rcu_read_lock();
7778c2ecf20Sopenharmony_ci		mr = rcu_dereference(dev->dma_mr);
7788c2ecf20Sopenharmony_ci		if (!mr)
7798c2ecf20Sopenharmony_ci			goto bail;
7808c2ecf20Sopenharmony_ci		rvt_get_mr(mr);
7818c2ecf20Sopenharmony_ci		rcu_read_unlock();
7828c2ecf20Sopenharmony_ci
7838c2ecf20Sopenharmony_ci		isge->mr = mr;
7848c2ecf20Sopenharmony_ci		isge->vaddr = (void *)sge->addr;
7858c2ecf20Sopenharmony_ci		isge->length = sge->length;
7868c2ecf20Sopenharmony_ci		isge->sge_length = sge->length;
7878c2ecf20Sopenharmony_ci		isge->m = 0;
7888c2ecf20Sopenharmony_ci		isge->n = 0;
7898c2ecf20Sopenharmony_ci		goto ok;
7908c2ecf20Sopenharmony_ci	}
7918c2ecf20Sopenharmony_ci	if (rvt_sge_adjacent(last_sge, sge))
7928c2ecf20Sopenharmony_ci		return 0;
7938c2ecf20Sopenharmony_ci	rcu_read_lock();
7948c2ecf20Sopenharmony_ci	mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
7958c2ecf20Sopenharmony_ci	if (!mr)
7968c2ecf20Sopenharmony_ci		goto bail;
7978c2ecf20Sopenharmony_ci	rvt_get_mr(mr);
7988c2ecf20Sopenharmony_ci	if (!READ_ONCE(mr->lkey_published))
7998c2ecf20Sopenharmony_ci		goto bail_unref;
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	if (unlikely(atomic_read(&mr->lkey_invalid) ||
8028c2ecf20Sopenharmony_ci		     mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
8038c2ecf20Sopenharmony_ci		goto bail_unref;
8048c2ecf20Sopenharmony_ci
8058c2ecf20Sopenharmony_ci	off = sge->addr - mr->user_base;
8068c2ecf20Sopenharmony_ci	if (unlikely(sge->addr < mr->user_base ||
8078c2ecf20Sopenharmony_ci		     off + sge->length > mr->length ||
8088c2ecf20Sopenharmony_ci		     (mr->access_flags & acc) != acc))
8098c2ecf20Sopenharmony_ci		goto bail_unref;
8108c2ecf20Sopenharmony_ci	rcu_read_unlock();
8118c2ecf20Sopenharmony_ci
8128c2ecf20Sopenharmony_ci	off += mr->offset;
8138c2ecf20Sopenharmony_ci	if (mr->page_shift) {
8148c2ecf20Sopenharmony_ci		/*
8158c2ecf20Sopenharmony_ci		 * page sizes are uniform power of 2 so no loop is necessary
8168c2ecf20Sopenharmony_ci		 * entries_spanned_by_off is the number of times the loop below
8178c2ecf20Sopenharmony_ci		 * would have executed.
8188c2ecf20Sopenharmony_ci		*/
8198c2ecf20Sopenharmony_ci		size_t entries_spanned_by_off;
8208c2ecf20Sopenharmony_ci
8218c2ecf20Sopenharmony_ci		entries_spanned_by_off = off >> mr->page_shift;
8228c2ecf20Sopenharmony_ci		off -= (entries_spanned_by_off << mr->page_shift);
8238c2ecf20Sopenharmony_ci		m = entries_spanned_by_off / RVT_SEGSZ;
8248c2ecf20Sopenharmony_ci		n = entries_spanned_by_off % RVT_SEGSZ;
8258c2ecf20Sopenharmony_ci	} else {
8268c2ecf20Sopenharmony_ci		m = 0;
8278c2ecf20Sopenharmony_ci		n = 0;
8288c2ecf20Sopenharmony_ci		while (off >= mr->map[m]->segs[n].length) {
8298c2ecf20Sopenharmony_ci			off -= mr->map[m]->segs[n].length;
8308c2ecf20Sopenharmony_ci			n++;
8318c2ecf20Sopenharmony_ci			if (n >= RVT_SEGSZ) {
8328c2ecf20Sopenharmony_ci				m++;
8338c2ecf20Sopenharmony_ci				n = 0;
8348c2ecf20Sopenharmony_ci			}
8358c2ecf20Sopenharmony_ci		}
8368c2ecf20Sopenharmony_ci	}
8378c2ecf20Sopenharmony_ci	isge->mr = mr;
8388c2ecf20Sopenharmony_ci	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
8398c2ecf20Sopenharmony_ci	isge->length = mr->map[m]->segs[n].length - off;
8408c2ecf20Sopenharmony_ci	isge->sge_length = sge->length;
8418c2ecf20Sopenharmony_ci	isge->m = m;
8428c2ecf20Sopenharmony_ci	isge->n = n;
8438c2ecf20Sopenharmony_ciok:
8448c2ecf20Sopenharmony_ci	trace_rvt_sge_new(isge, sge);
8458c2ecf20Sopenharmony_ci	return 1;
8468c2ecf20Sopenharmony_cibail_unref:
8478c2ecf20Sopenharmony_ci	rvt_put_mr(mr);
8488c2ecf20Sopenharmony_cibail:
8498c2ecf20Sopenharmony_ci	rcu_read_unlock();
8508c2ecf20Sopenharmony_ci	return -EINVAL;
8518c2ecf20Sopenharmony_ci}
8528c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_lkey_ok);
8538c2ecf20Sopenharmony_ci
8548c2ecf20Sopenharmony_ci/**
8558c2ecf20Sopenharmony_ci * rvt_rkey_ok - check the IB virtual address, length, and RKEY
8568c2ecf20Sopenharmony_ci * @qp: qp for validation
8578c2ecf20Sopenharmony_ci * @sge: SGE state
8588c2ecf20Sopenharmony_ci * @len: length of data
8598c2ecf20Sopenharmony_ci * @vaddr: virtual address to place data
8608c2ecf20Sopenharmony_ci * @rkey: rkey to check
8618c2ecf20Sopenharmony_ci * @acc: access flags
8628c2ecf20Sopenharmony_ci *
8638c2ecf20Sopenharmony_ci * Return: 1 if successful, otherwise 0.
8648c2ecf20Sopenharmony_ci *
8658c2ecf20Sopenharmony_ci * increments the reference count upon success
8668c2ecf20Sopenharmony_ci */
8678c2ecf20Sopenharmony_ciint rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
8688c2ecf20Sopenharmony_ci		u32 len, u64 vaddr, u32 rkey, int acc)
8698c2ecf20Sopenharmony_ci{
8708c2ecf20Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
8718c2ecf20Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
8728c2ecf20Sopenharmony_ci	struct rvt_mregion *mr;
8738c2ecf20Sopenharmony_ci	unsigned n, m;
8748c2ecf20Sopenharmony_ci	size_t off;
8758c2ecf20Sopenharmony_ci
8768c2ecf20Sopenharmony_ci	/*
8778c2ecf20Sopenharmony_ci	 * We use RKEY == zero for kernel virtual addresses
8788c2ecf20Sopenharmony_ci	 * (see rvt_get_dma_mr()).
8798c2ecf20Sopenharmony_ci	 */
8808c2ecf20Sopenharmony_ci	rcu_read_lock();
8818c2ecf20Sopenharmony_ci	if (rkey == 0) {
8828c2ecf20Sopenharmony_ci		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
8838c2ecf20Sopenharmony_ci		struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci		if (pd->user)
8868c2ecf20Sopenharmony_ci			goto bail;
8878c2ecf20Sopenharmony_ci		mr = rcu_dereference(rdi->dma_mr);
8888c2ecf20Sopenharmony_ci		if (!mr)
8898c2ecf20Sopenharmony_ci			goto bail;
8908c2ecf20Sopenharmony_ci		rvt_get_mr(mr);
8918c2ecf20Sopenharmony_ci		rcu_read_unlock();
8928c2ecf20Sopenharmony_ci
8938c2ecf20Sopenharmony_ci		sge->mr = mr;
8948c2ecf20Sopenharmony_ci		sge->vaddr = (void *)vaddr;
8958c2ecf20Sopenharmony_ci		sge->length = len;
8968c2ecf20Sopenharmony_ci		sge->sge_length = len;
8978c2ecf20Sopenharmony_ci		sge->m = 0;
8988c2ecf20Sopenharmony_ci		sge->n = 0;
8998c2ecf20Sopenharmony_ci		goto ok;
9008c2ecf20Sopenharmony_ci	}
9018c2ecf20Sopenharmony_ci
9028c2ecf20Sopenharmony_ci	mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
9038c2ecf20Sopenharmony_ci	if (!mr)
9048c2ecf20Sopenharmony_ci		goto bail;
9058c2ecf20Sopenharmony_ci	rvt_get_mr(mr);
9068c2ecf20Sopenharmony_ci	/* insure mr read is before test */
9078c2ecf20Sopenharmony_ci	if (!READ_ONCE(mr->lkey_published))
9088c2ecf20Sopenharmony_ci		goto bail_unref;
9098c2ecf20Sopenharmony_ci	if (unlikely(atomic_read(&mr->lkey_invalid) ||
9108c2ecf20Sopenharmony_ci		     mr->lkey != rkey || qp->ibqp.pd != mr->pd))
9118c2ecf20Sopenharmony_ci		goto bail_unref;
9128c2ecf20Sopenharmony_ci
9138c2ecf20Sopenharmony_ci	off = vaddr - mr->iova;
9148c2ecf20Sopenharmony_ci	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
9158c2ecf20Sopenharmony_ci		     (mr->access_flags & acc) == 0))
9168c2ecf20Sopenharmony_ci		goto bail_unref;
9178c2ecf20Sopenharmony_ci	rcu_read_unlock();
9188c2ecf20Sopenharmony_ci
9198c2ecf20Sopenharmony_ci	off += mr->offset;
9208c2ecf20Sopenharmony_ci	if (mr->page_shift) {
9218c2ecf20Sopenharmony_ci		/*
9228c2ecf20Sopenharmony_ci		 * page sizes are uniform power of 2 so no loop is necessary
9238c2ecf20Sopenharmony_ci		 * entries_spanned_by_off is the number of times the loop below
9248c2ecf20Sopenharmony_ci		 * would have executed.
9258c2ecf20Sopenharmony_ci		*/
9268c2ecf20Sopenharmony_ci		size_t entries_spanned_by_off;
9278c2ecf20Sopenharmony_ci
9288c2ecf20Sopenharmony_ci		entries_spanned_by_off = off >> mr->page_shift;
9298c2ecf20Sopenharmony_ci		off -= (entries_spanned_by_off << mr->page_shift);
9308c2ecf20Sopenharmony_ci		m = entries_spanned_by_off / RVT_SEGSZ;
9318c2ecf20Sopenharmony_ci		n = entries_spanned_by_off % RVT_SEGSZ;
9328c2ecf20Sopenharmony_ci	} else {
9338c2ecf20Sopenharmony_ci		m = 0;
9348c2ecf20Sopenharmony_ci		n = 0;
9358c2ecf20Sopenharmony_ci		while (off >= mr->map[m]->segs[n].length) {
9368c2ecf20Sopenharmony_ci			off -= mr->map[m]->segs[n].length;
9378c2ecf20Sopenharmony_ci			n++;
9388c2ecf20Sopenharmony_ci			if (n >= RVT_SEGSZ) {
9398c2ecf20Sopenharmony_ci				m++;
9408c2ecf20Sopenharmony_ci				n = 0;
9418c2ecf20Sopenharmony_ci			}
9428c2ecf20Sopenharmony_ci		}
9438c2ecf20Sopenharmony_ci	}
9448c2ecf20Sopenharmony_ci	sge->mr = mr;
9458c2ecf20Sopenharmony_ci	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
9468c2ecf20Sopenharmony_ci	sge->length = mr->map[m]->segs[n].length - off;
9478c2ecf20Sopenharmony_ci	sge->sge_length = len;
9488c2ecf20Sopenharmony_ci	sge->m = m;
9498c2ecf20Sopenharmony_ci	sge->n = n;
9508c2ecf20Sopenharmony_ciok:
9518c2ecf20Sopenharmony_ci	return 1;
9528c2ecf20Sopenharmony_cibail_unref:
9538c2ecf20Sopenharmony_ci	rvt_put_mr(mr);
9548c2ecf20Sopenharmony_cibail:
9558c2ecf20Sopenharmony_ci	rcu_read_unlock();
9568c2ecf20Sopenharmony_ci	return 0;
9578c2ecf20Sopenharmony_ci}
9588c2ecf20Sopenharmony_ciEXPORT_SYMBOL(rvt_rkey_ok);
959