162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * Copyright(c) 2016 Intel Corporation.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/slab.h>
762306a36Sopenharmony_ci#include <linux/vmalloc.h>
862306a36Sopenharmony_ci#include <rdma/ib_umem.h>
962306a36Sopenharmony_ci#include <rdma/rdma_vt.h>
1062306a36Sopenharmony_ci#include "vt.h"
1162306a36Sopenharmony_ci#include "mr.h"
1262306a36Sopenharmony_ci#include "trace.h"
1362306a36Sopenharmony_ci
1462306a36Sopenharmony_ci/**
1562306a36Sopenharmony_ci * rvt_driver_mr_init - Init MR resources per driver
1662306a36Sopenharmony_ci * @rdi: rvt dev struct
1762306a36Sopenharmony_ci *
1862306a36Sopenharmony_ci * Do any intilization needed when a driver registers with rdmavt.
1962306a36Sopenharmony_ci *
2062306a36Sopenharmony_ci * Return: 0 on success or errno on failure
2162306a36Sopenharmony_ci */
2262306a36Sopenharmony_ciint rvt_driver_mr_init(struct rvt_dev_info *rdi)
2362306a36Sopenharmony_ci{
2462306a36Sopenharmony_ci	unsigned int lkey_table_size = rdi->dparms.lkey_table_size;
2562306a36Sopenharmony_ci	unsigned lk_tab_size;
2662306a36Sopenharmony_ci	int i;
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	/*
2962306a36Sopenharmony_ci	 * The top hfi1_lkey_table_size bits are used to index the
3062306a36Sopenharmony_ci	 * table.  The lower 8 bits can be owned by the user (copied from
3162306a36Sopenharmony_ci	 * the LKEY).  The remaining bits act as a generation number or tag.
3262306a36Sopenharmony_ci	 */
3362306a36Sopenharmony_ci	if (!lkey_table_size)
3462306a36Sopenharmony_ci		return -EINVAL;
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	spin_lock_init(&rdi->lkey_table.lock);
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	/* ensure generation is at least 4 bits */
3962306a36Sopenharmony_ci	if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) {
4062306a36Sopenharmony_ci		rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n",
4162306a36Sopenharmony_ci			    lkey_table_size, RVT_MAX_LKEY_TABLE_BITS);
4262306a36Sopenharmony_ci		rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS;
4362306a36Sopenharmony_ci		lkey_table_size = rdi->dparms.lkey_table_size;
4462306a36Sopenharmony_ci	}
4562306a36Sopenharmony_ci	rdi->lkey_table.max = 1 << lkey_table_size;
4662306a36Sopenharmony_ci	rdi->lkey_table.shift = 32 - lkey_table_size;
4762306a36Sopenharmony_ci	lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table);
4862306a36Sopenharmony_ci	rdi->lkey_table.table = (struct rvt_mregion __rcu **)
4962306a36Sopenharmony_ci			       vmalloc_node(lk_tab_size, rdi->dparms.node);
5062306a36Sopenharmony_ci	if (!rdi->lkey_table.table)
5162306a36Sopenharmony_ci		return -ENOMEM;
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_ci	RCU_INIT_POINTER(rdi->dma_mr, NULL);
5462306a36Sopenharmony_ci	for (i = 0; i < rdi->lkey_table.max; i++)
5562306a36Sopenharmony_ci		RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL);
5662306a36Sopenharmony_ci
5762306a36Sopenharmony_ci	rdi->dparms.props.max_mr = rdi->lkey_table.max;
5862306a36Sopenharmony_ci	return 0;
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_ci/**
6262306a36Sopenharmony_ci * rvt_mr_exit - clean up MR
6362306a36Sopenharmony_ci * @rdi: rvt dev structure
6462306a36Sopenharmony_ci *
6562306a36Sopenharmony_ci * called when drivers have unregistered or perhaps failed to register with us
6662306a36Sopenharmony_ci */
6762306a36Sopenharmony_civoid rvt_mr_exit(struct rvt_dev_info *rdi)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	if (rdi->dma_mr)
7062306a36Sopenharmony_ci		rvt_pr_err(rdi, "DMA MR not null!\n");
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci	vfree(rdi->lkey_table.table);
7362306a36Sopenharmony_ci}
7462306a36Sopenharmony_ci
7562306a36Sopenharmony_cistatic void rvt_deinit_mregion(struct rvt_mregion *mr)
7662306a36Sopenharmony_ci{
7762306a36Sopenharmony_ci	int i = mr->mapsz;
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_ci	mr->mapsz = 0;
8062306a36Sopenharmony_ci	while (i)
8162306a36Sopenharmony_ci		kfree(mr->map[--i]);
8262306a36Sopenharmony_ci	percpu_ref_exit(&mr->refcount);
8362306a36Sopenharmony_ci}
8462306a36Sopenharmony_ci
8562306a36Sopenharmony_cistatic void __rvt_mregion_complete(struct percpu_ref *ref)
8662306a36Sopenharmony_ci{
8762306a36Sopenharmony_ci	struct rvt_mregion *mr = container_of(ref, struct rvt_mregion,
8862306a36Sopenharmony_ci					      refcount);
8962306a36Sopenharmony_ci
9062306a36Sopenharmony_ci	complete(&mr->comp);
9162306a36Sopenharmony_ci}
9262306a36Sopenharmony_ci
9362306a36Sopenharmony_cistatic int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd,
9462306a36Sopenharmony_ci			    int count, unsigned int percpu_flags)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	int m, i = 0;
9762306a36Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(pd->device);
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci	mr->mapsz = 0;
10062306a36Sopenharmony_ci	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
10162306a36Sopenharmony_ci	for (; i < m; i++) {
10262306a36Sopenharmony_ci		mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL,
10362306a36Sopenharmony_ci					  dev->dparms.node);
10462306a36Sopenharmony_ci		if (!mr->map[i])
10562306a36Sopenharmony_ci			goto bail;
10662306a36Sopenharmony_ci		mr->mapsz++;
10762306a36Sopenharmony_ci	}
10862306a36Sopenharmony_ci	init_completion(&mr->comp);
10962306a36Sopenharmony_ci	/* count returning the ptr to user */
11062306a36Sopenharmony_ci	if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete,
11162306a36Sopenharmony_ci			    percpu_flags, GFP_KERNEL))
11262306a36Sopenharmony_ci		goto bail;
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci	atomic_set(&mr->lkey_invalid, 0);
11562306a36Sopenharmony_ci	mr->pd = pd;
11662306a36Sopenharmony_ci	mr->max_segs = count;
11762306a36Sopenharmony_ci	return 0;
11862306a36Sopenharmony_cibail:
11962306a36Sopenharmony_ci	rvt_deinit_mregion(mr);
12062306a36Sopenharmony_ci	return -ENOMEM;
12162306a36Sopenharmony_ci}
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci/**
12462306a36Sopenharmony_ci * rvt_alloc_lkey - allocate an lkey
12562306a36Sopenharmony_ci * @mr: memory region that this lkey protects
12662306a36Sopenharmony_ci * @dma_region: 0->normal key, 1->restricted DMA key
12762306a36Sopenharmony_ci *
12862306a36Sopenharmony_ci * Returns 0 if successful, otherwise returns -errno.
12962306a36Sopenharmony_ci *
13062306a36Sopenharmony_ci * Increments mr reference count as required.
13162306a36Sopenharmony_ci *
13262306a36Sopenharmony_ci * Sets the lkey field mr for non-dma regions.
13362306a36Sopenharmony_ci *
13462306a36Sopenharmony_ci */
13562306a36Sopenharmony_cistatic int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region)
13662306a36Sopenharmony_ci{
13762306a36Sopenharmony_ci	unsigned long flags;
13862306a36Sopenharmony_ci	u32 r;
13962306a36Sopenharmony_ci	u32 n;
14062306a36Sopenharmony_ci	int ret = 0;
14162306a36Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
14262306a36Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	rvt_get_mr(mr);
14562306a36Sopenharmony_ci	spin_lock_irqsave(&rkt->lock, flags);
14662306a36Sopenharmony_ci
14762306a36Sopenharmony_ci	/* special case for dma_mr lkey == 0 */
14862306a36Sopenharmony_ci	if (dma_region) {
14962306a36Sopenharmony_ci		struct rvt_mregion *tmr;
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_ci		tmr = rcu_access_pointer(dev->dma_mr);
15262306a36Sopenharmony_ci		if (!tmr) {
15362306a36Sopenharmony_ci			mr->lkey_published = 1;
15462306a36Sopenharmony_ci			/* Insure published written first */
15562306a36Sopenharmony_ci			rcu_assign_pointer(dev->dma_mr, mr);
15662306a36Sopenharmony_ci			rvt_get_mr(mr);
15762306a36Sopenharmony_ci		}
15862306a36Sopenharmony_ci		goto success;
15962306a36Sopenharmony_ci	}
16062306a36Sopenharmony_ci
16162306a36Sopenharmony_ci	/* Find the next available LKEY */
16262306a36Sopenharmony_ci	r = rkt->next;
16362306a36Sopenharmony_ci	n = r;
16462306a36Sopenharmony_ci	for (;;) {
16562306a36Sopenharmony_ci		if (!rcu_access_pointer(rkt->table[r]))
16662306a36Sopenharmony_ci			break;
16762306a36Sopenharmony_ci		r = (r + 1) & (rkt->max - 1);
16862306a36Sopenharmony_ci		if (r == n)
16962306a36Sopenharmony_ci			goto bail;
17062306a36Sopenharmony_ci	}
17162306a36Sopenharmony_ci	rkt->next = (r + 1) & (rkt->max - 1);
17262306a36Sopenharmony_ci	/*
17362306a36Sopenharmony_ci	 * Make sure lkey is never zero which is reserved to indicate an
17462306a36Sopenharmony_ci	 * unrestricted LKEY.
17562306a36Sopenharmony_ci	 */
17662306a36Sopenharmony_ci	rkt->gen++;
17762306a36Sopenharmony_ci	/*
17862306a36Sopenharmony_ci	 * bits are capped to ensure enough bits for generation number
17962306a36Sopenharmony_ci	 */
18062306a36Sopenharmony_ci	mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) |
18162306a36Sopenharmony_ci		((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen)
18262306a36Sopenharmony_ci		 << 8);
18362306a36Sopenharmony_ci	if (mr->lkey == 0) {
18462306a36Sopenharmony_ci		mr->lkey |= 1 << 8;
18562306a36Sopenharmony_ci		rkt->gen++;
18662306a36Sopenharmony_ci	}
18762306a36Sopenharmony_ci	mr->lkey_published = 1;
18862306a36Sopenharmony_ci	/* Insure published written first */
18962306a36Sopenharmony_ci	rcu_assign_pointer(rkt->table[r], mr);
19062306a36Sopenharmony_cisuccess:
19162306a36Sopenharmony_ci	spin_unlock_irqrestore(&rkt->lock, flags);
19262306a36Sopenharmony_ciout:
19362306a36Sopenharmony_ci	return ret;
19462306a36Sopenharmony_cibail:
19562306a36Sopenharmony_ci	rvt_put_mr(mr);
19662306a36Sopenharmony_ci	spin_unlock_irqrestore(&rkt->lock, flags);
19762306a36Sopenharmony_ci	ret = -ENOMEM;
19862306a36Sopenharmony_ci	goto out;
19962306a36Sopenharmony_ci}
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci/**
20262306a36Sopenharmony_ci * rvt_free_lkey - free an lkey
20362306a36Sopenharmony_ci * @mr: mr to free from tables
20462306a36Sopenharmony_ci */
20562306a36Sopenharmony_cistatic void rvt_free_lkey(struct rvt_mregion *mr)
20662306a36Sopenharmony_ci{
20762306a36Sopenharmony_ci	unsigned long flags;
20862306a36Sopenharmony_ci	u32 lkey = mr->lkey;
20962306a36Sopenharmony_ci	u32 r;
21062306a36Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device);
21162306a36Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
21262306a36Sopenharmony_ci	int freed = 0;
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	spin_lock_irqsave(&rkt->lock, flags);
21562306a36Sopenharmony_ci	if (!lkey) {
21662306a36Sopenharmony_ci		if (mr->lkey_published) {
21762306a36Sopenharmony_ci			mr->lkey_published = 0;
21862306a36Sopenharmony_ci			/* insure published is written before pointer */
21962306a36Sopenharmony_ci			rcu_assign_pointer(dev->dma_mr, NULL);
22062306a36Sopenharmony_ci			rvt_put_mr(mr);
22162306a36Sopenharmony_ci		}
22262306a36Sopenharmony_ci	} else {
22362306a36Sopenharmony_ci		if (!mr->lkey_published)
22462306a36Sopenharmony_ci			goto out;
22562306a36Sopenharmony_ci		r = lkey >> (32 - dev->dparms.lkey_table_size);
22662306a36Sopenharmony_ci		mr->lkey_published = 0;
22762306a36Sopenharmony_ci		/* insure published is written before pointer */
22862306a36Sopenharmony_ci		rcu_assign_pointer(rkt->table[r], NULL);
22962306a36Sopenharmony_ci	}
23062306a36Sopenharmony_ci	freed++;
23162306a36Sopenharmony_ciout:
23262306a36Sopenharmony_ci	spin_unlock_irqrestore(&rkt->lock, flags);
23362306a36Sopenharmony_ci	if (freed)
23462306a36Sopenharmony_ci		percpu_ref_kill(&mr->refcount);
23562306a36Sopenharmony_ci}
23662306a36Sopenharmony_ci
23762306a36Sopenharmony_cistatic struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd)
23862306a36Sopenharmony_ci{
23962306a36Sopenharmony_ci	struct rvt_mr *mr;
24062306a36Sopenharmony_ci	int rval = -ENOMEM;
24162306a36Sopenharmony_ci	int m;
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_ci	/* Allocate struct plus pointers to first level page tables. */
24462306a36Sopenharmony_ci	m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ;
24562306a36Sopenharmony_ci	mr = kzalloc(struct_size(mr, mr.map, m), GFP_KERNEL);
24662306a36Sopenharmony_ci	if (!mr)
24762306a36Sopenharmony_ci		goto bail;
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci	rval = rvt_init_mregion(&mr->mr, pd, count, 0);
25062306a36Sopenharmony_ci	if (rval)
25162306a36Sopenharmony_ci		goto bail;
25262306a36Sopenharmony_ci	/*
25362306a36Sopenharmony_ci	 * ib_reg_phys_mr() will initialize mr->ibmr except for
25462306a36Sopenharmony_ci	 * lkey and rkey.
25562306a36Sopenharmony_ci	 */
25662306a36Sopenharmony_ci	rval = rvt_alloc_lkey(&mr->mr, 0);
25762306a36Sopenharmony_ci	if (rval)
25862306a36Sopenharmony_ci		goto bail_mregion;
25962306a36Sopenharmony_ci	mr->ibmr.lkey = mr->mr.lkey;
26062306a36Sopenharmony_ci	mr->ibmr.rkey = mr->mr.lkey;
26162306a36Sopenharmony_cidone:
26262306a36Sopenharmony_ci	return mr;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_cibail_mregion:
26562306a36Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
26662306a36Sopenharmony_cibail:
26762306a36Sopenharmony_ci	kfree(mr);
26862306a36Sopenharmony_ci	mr = ERR_PTR(rval);
26962306a36Sopenharmony_ci	goto done;
27062306a36Sopenharmony_ci}
27162306a36Sopenharmony_ci
27262306a36Sopenharmony_cistatic void __rvt_free_mr(struct rvt_mr *mr)
27362306a36Sopenharmony_ci{
27462306a36Sopenharmony_ci	rvt_free_lkey(&mr->mr);
27562306a36Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
27662306a36Sopenharmony_ci	kfree(mr);
27762306a36Sopenharmony_ci}
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci/**
28062306a36Sopenharmony_ci * rvt_get_dma_mr - get a DMA memory region
28162306a36Sopenharmony_ci * @pd: protection domain for this memory region
28262306a36Sopenharmony_ci * @acc: access flags
28362306a36Sopenharmony_ci *
28462306a36Sopenharmony_ci * Return: the memory region on success, otherwise returns an errno.
28562306a36Sopenharmony_ci */
28662306a36Sopenharmony_cistruct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
28762306a36Sopenharmony_ci{
28862306a36Sopenharmony_ci	struct rvt_mr *mr;
28962306a36Sopenharmony_ci	struct ib_mr *ret;
29062306a36Sopenharmony_ci	int rval;
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	if (ibpd_to_rvtpd(pd)->user)
29362306a36Sopenharmony_ci		return ERR_PTR(-EPERM);
29462306a36Sopenharmony_ci
29562306a36Sopenharmony_ci	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
29662306a36Sopenharmony_ci	if (!mr) {
29762306a36Sopenharmony_ci		ret = ERR_PTR(-ENOMEM);
29862306a36Sopenharmony_ci		goto bail;
29962306a36Sopenharmony_ci	}
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	rval = rvt_init_mregion(&mr->mr, pd, 0, 0);
30262306a36Sopenharmony_ci	if (rval) {
30362306a36Sopenharmony_ci		ret = ERR_PTR(rval);
30462306a36Sopenharmony_ci		goto bail;
30562306a36Sopenharmony_ci	}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_ci	rval = rvt_alloc_lkey(&mr->mr, 1);
30862306a36Sopenharmony_ci	if (rval) {
30962306a36Sopenharmony_ci		ret = ERR_PTR(rval);
31062306a36Sopenharmony_ci		goto bail_mregion;
31162306a36Sopenharmony_ci	}
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	mr->mr.access_flags = acc;
31462306a36Sopenharmony_ci	ret = &mr->ibmr;
31562306a36Sopenharmony_cidone:
31662306a36Sopenharmony_ci	return ret;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_cibail_mregion:
31962306a36Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
32062306a36Sopenharmony_cibail:
32162306a36Sopenharmony_ci	kfree(mr);
32262306a36Sopenharmony_ci	goto done;
32362306a36Sopenharmony_ci}
32462306a36Sopenharmony_ci
32562306a36Sopenharmony_ci/**
32662306a36Sopenharmony_ci * rvt_reg_user_mr - register a userspace memory region
32762306a36Sopenharmony_ci * @pd: protection domain for this memory region
32862306a36Sopenharmony_ci * @start: starting userspace address
32962306a36Sopenharmony_ci * @length: length of region to register
33062306a36Sopenharmony_ci * @virt_addr: associated virtual address
33162306a36Sopenharmony_ci * @mr_access_flags: access flags for this memory region
33262306a36Sopenharmony_ci * @udata: unused by the driver
33362306a36Sopenharmony_ci *
33462306a36Sopenharmony_ci * Return: the memory region on success, otherwise returns an errno.
33562306a36Sopenharmony_ci */
33662306a36Sopenharmony_cistruct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
33762306a36Sopenharmony_ci			      u64 virt_addr, int mr_access_flags,
33862306a36Sopenharmony_ci			      struct ib_udata *udata)
33962306a36Sopenharmony_ci{
34062306a36Sopenharmony_ci	struct rvt_mr *mr;
34162306a36Sopenharmony_ci	struct ib_umem *umem;
34262306a36Sopenharmony_ci	struct sg_page_iter sg_iter;
34362306a36Sopenharmony_ci	int n, m;
34462306a36Sopenharmony_ci	struct ib_mr *ret;
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	if (length == 0)
34762306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_ci	umem = ib_umem_get(pd->device, start, length, mr_access_flags);
35062306a36Sopenharmony_ci	if (IS_ERR(umem))
35162306a36Sopenharmony_ci		return (void *)umem;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	n = ib_umem_num_pages(umem);
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_ci	mr = __rvt_alloc_mr(n, pd);
35662306a36Sopenharmony_ci	if (IS_ERR(mr)) {
35762306a36Sopenharmony_ci		ret = (struct ib_mr *)mr;
35862306a36Sopenharmony_ci		goto bail_umem;
35962306a36Sopenharmony_ci	}
36062306a36Sopenharmony_ci
36162306a36Sopenharmony_ci	mr->mr.user_base = start;
36262306a36Sopenharmony_ci	mr->mr.iova = virt_addr;
36362306a36Sopenharmony_ci	mr->mr.length = length;
36462306a36Sopenharmony_ci	mr->mr.offset = ib_umem_offset(umem);
36562306a36Sopenharmony_ci	mr->mr.access_flags = mr_access_flags;
36662306a36Sopenharmony_ci	mr->umem = umem;
36762306a36Sopenharmony_ci
36862306a36Sopenharmony_ci	mr->mr.page_shift = PAGE_SHIFT;
36962306a36Sopenharmony_ci	m = 0;
37062306a36Sopenharmony_ci	n = 0;
37162306a36Sopenharmony_ci	for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
37262306a36Sopenharmony_ci		void *vaddr;
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci		vaddr = page_address(sg_page_iter_page(&sg_iter));
37562306a36Sopenharmony_ci		if (!vaddr) {
37662306a36Sopenharmony_ci			ret = ERR_PTR(-EINVAL);
37762306a36Sopenharmony_ci			goto bail_inval;
37862306a36Sopenharmony_ci		}
37962306a36Sopenharmony_ci		mr->mr.map[m]->segs[n].vaddr = vaddr;
38062306a36Sopenharmony_ci		mr->mr.map[m]->segs[n].length = PAGE_SIZE;
38162306a36Sopenharmony_ci		trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, PAGE_SIZE);
38262306a36Sopenharmony_ci		if (++n == RVT_SEGSZ) {
38362306a36Sopenharmony_ci			m++;
38462306a36Sopenharmony_ci			n = 0;
38562306a36Sopenharmony_ci		}
38662306a36Sopenharmony_ci	}
38762306a36Sopenharmony_ci	return &mr->ibmr;
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_cibail_inval:
39062306a36Sopenharmony_ci	__rvt_free_mr(mr);
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_cibail_umem:
39362306a36Sopenharmony_ci	ib_umem_release(umem);
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_ci	return ret;
39662306a36Sopenharmony_ci}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci/**
39962306a36Sopenharmony_ci * rvt_dereg_clean_qp_cb - callback from iterator
40062306a36Sopenharmony_ci * @qp: the qp
40162306a36Sopenharmony_ci * @v: the mregion (as u64)
40262306a36Sopenharmony_ci *
40362306a36Sopenharmony_ci * This routine fields the callback for all QPs and
40462306a36Sopenharmony_ci * for QPs in the same PD as the MR will call the
40562306a36Sopenharmony_ci * rvt_qp_mr_clean() to potentially cleanup references.
40662306a36Sopenharmony_ci */
40762306a36Sopenharmony_cistatic void rvt_dereg_clean_qp_cb(struct rvt_qp *qp, u64 v)
40862306a36Sopenharmony_ci{
40962306a36Sopenharmony_ci	struct rvt_mregion *mr = (struct rvt_mregion *)v;
41062306a36Sopenharmony_ci
41162306a36Sopenharmony_ci	/* skip PDs that are not ours */
41262306a36Sopenharmony_ci	if (mr->pd != qp->ibqp.pd)
41362306a36Sopenharmony_ci		return;
41462306a36Sopenharmony_ci	rvt_qp_mr_clean(qp, mr->lkey);
41562306a36Sopenharmony_ci}
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci/**
41862306a36Sopenharmony_ci * rvt_dereg_clean_qps - find QPs for reference cleanup
41962306a36Sopenharmony_ci * @mr: the MR that is being deregistered
42062306a36Sopenharmony_ci *
42162306a36Sopenharmony_ci * This routine iterates RC QPs looking for references
42262306a36Sopenharmony_ci * to the lkey noted in mr.
42362306a36Sopenharmony_ci */
42462306a36Sopenharmony_cistatic void rvt_dereg_clean_qps(struct rvt_mregion *mr)
42562306a36Sopenharmony_ci{
42662306a36Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_ci	rvt_qp_iter(rdi, (u64)mr, rvt_dereg_clean_qp_cb);
42962306a36Sopenharmony_ci}
43062306a36Sopenharmony_ci
43162306a36Sopenharmony_ci/**
43262306a36Sopenharmony_ci * rvt_check_refs - check references
43362306a36Sopenharmony_ci * @mr: the megion
43462306a36Sopenharmony_ci * @t: the caller identification
43562306a36Sopenharmony_ci *
43662306a36Sopenharmony_ci * This routine checks MRs holding a reference during
43762306a36Sopenharmony_ci * when being de-registered.
43862306a36Sopenharmony_ci *
43962306a36Sopenharmony_ci * If the count is non-zero, the code calls a clean routine then
44062306a36Sopenharmony_ci * waits for the timeout for the count to zero.
44162306a36Sopenharmony_ci */
44262306a36Sopenharmony_cistatic int rvt_check_refs(struct rvt_mregion *mr, const char *t)
44362306a36Sopenharmony_ci{
44462306a36Sopenharmony_ci	unsigned long timeout;
44562306a36Sopenharmony_ci	struct rvt_dev_info *rdi = ib_to_rvt(mr->pd->device);
44662306a36Sopenharmony_ci
44762306a36Sopenharmony_ci	if (mr->lkey) {
44862306a36Sopenharmony_ci		/* avoid dma mr */
44962306a36Sopenharmony_ci		rvt_dereg_clean_qps(mr);
45062306a36Sopenharmony_ci		/* @mr was indexed on rcu protected @lkey_table */
45162306a36Sopenharmony_ci		synchronize_rcu();
45262306a36Sopenharmony_ci	}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_ci	timeout = wait_for_completion_timeout(&mr->comp, 5 * HZ);
45562306a36Sopenharmony_ci	if (!timeout) {
45662306a36Sopenharmony_ci		rvt_pr_err(rdi,
45762306a36Sopenharmony_ci			   "%s timeout mr %p pd %p lkey %x refcount %ld\n",
45862306a36Sopenharmony_ci			   t, mr, mr->pd, mr->lkey,
45962306a36Sopenharmony_ci			   atomic_long_read(&mr->refcount.data->count));
46062306a36Sopenharmony_ci		rvt_get_mr(mr);
46162306a36Sopenharmony_ci		return -EBUSY;
46262306a36Sopenharmony_ci	}
46362306a36Sopenharmony_ci	return 0;
46462306a36Sopenharmony_ci}
46562306a36Sopenharmony_ci
46662306a36Sopenharmony_ci/**
46762306a36Sopenharmony_ci * rvt_mr_has_lkey - is MR
46862306a36Sopenharmony_ci * @mr: the mregion
46962306a36Sopenharmony_ci * @lkey: the lkey
47062306a36Sopenharmony_ci */
47162306a36Sopenharmony_cibool rvt_mr_has_lkey(struct rvt_mregion *mr, u32 lkey)
47262306a36Sopenharmony_ci{
47362306a36Sopenharmony_ci	return mr && lkey == mr->lkey;
47462306a36Sopenharmony_ci}
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci/**
47762306a36Sopenharmony_ci * rvt_ss_has_lkey - is mr in sge tests
47862306a36Sopenharmony_ci * @ss: the sge state
47962306a36Sopenharmony_ci * @lkey: the lkey
48062306a36Sopenharmony_ci *
48162306a36Sopenharmony_ci * This code tests for an MR in the indicated
48262306a36Sopenharmony_ci * sge state.
48362306a36Sopenharmony_ci */
48462306a36Sopenharmony_cibool rvt_ss_has_lkey(struct rvt_sge_state *ss, u32 lkey)
48562306a36Sopenharmony_ci{
48662306a36Sopenharmony_ci	int i;
48762306a36Sopenharmony_ci	bool rval = false;
48862306a36Sopenharmony_ci
48962306a36Sopenharmony_ci	if (!ss->num_sge)
49062306a36Sopenharmony_ci		return rval;
49162306a36Sopenharmony_ci	/* first one */
49262306a36Sopenharmony_ci	rval = rvt_mr_has_lkey(ss->sge.mr, lkey);
49362306a36Sopenharmony_ci	/* any others */
49462306a36Sopenharmony_ci	for (i = 0; !rval && i < ss->num_sge - 1; i++)
49562306a36Sopenharmony_ci		rval = rvt_mr_has_lkey(ss->sg_list[i].mr, lkey);
49662306a36Sopenharmony_ci	return rval;
49762306a36Sopenharmony_ci}
49862306a36Sopenharmony_ci
49962306a36Sopenharmony_ci/**
50062306a36Sopenharmony_ci * rvt_dereg_mr - unregister and free a memory region
50162306a36Sopenharmony_ci * @ibmr: the memory region to free
50262306a36Sopenharmony_ci * @udata: unused by the driver
50362306a36Sopenharmony_ci *
50462306a36Sopenharmony_ci * Note that this is called to free MRs created by rvt_get_dma_mr()
50562306a36Sopenharmony_ci * or rvt_reg_user_mr().
50662306a36Sopenharmony_ci *
50762306a36Sopenharmony_ci * Returns 0 on success.
50862306a36Sopenharmony_ci */
50962306a36Sopenharmony_ciint rvt_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
51062306a36Sopenharmony_ci{
51162306a36Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
51262306a36Sopenharmony_ci	int ret;
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_ci	rvt_free_lkey(&mr->mr);
51562306a36Sopenharmony_ci
51662306a36Sopenharmony_ci	rvt_put_mr(&mr->mr); /* will set completion if last */
51762306a36Sopenharmony_ci	ret = rvt_check_refs(&mr->mr, __func__);
51862306a36Sopenharmony_ci	if (ret)
51962306a36Sopenharmony_ci		goto out;
52062306a36Sopenharmony_ci	rvt_deinit_mregion(&mr->mr);
52162306a36Sopenharmony_ci	ib_umem_release(mr->umem);
52262306a36Sopenharmony_ci	kfree(mr);
52362306a36Sopenharmony_ciout:
52462306a36Sopenharmony_ci	return ret;
52562306a36Sopenharmony_ci}
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci/**
52862306a36Sopenharmony_ci * rvt_alloc_mr - Allocate a memory region usable with the
52962306a36Sopenharmony_ci * @pd: protection domain for this memory region
53062306a36Sopenharmony_ci * @mr_type: mem region type
53162306a36Sopenharmony_ci * @max_num_sg: Max number of segments allowed
53262306a36Sopenharmony_ci *
53362306a36Sopenharmony_ci * Return: the memory region on success, otherwise return an errno.
53462306a36Sopenharmony_ci */
53562306a36Sopenharmony_cistruct ib_mr *rvt_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
53662306a36Sopenharmony_ci			   u32 max_num_sg)
53762306a36Sopenharmony_ci{
53862306a36Sopenharmony_ci	struct rvt_mr *mr;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ci	if (mr_type != IB_MR_TYPE_MEM_REG)
54162306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
54262306a36Sopenharmony_ci
54362306a36Sopenharmony_ci	mr = __rvt_alloc_mr(max_num_sg, pd);
54462306a36Sopenharmony_ci	if (IS_ERR(mr))
54562306a36Sopenharmony_ci		return (struct ib_mr *)mr;
54662306a36Sopenharmony_ci
54762306a36Sopenharmony_ci	return &mr->ibmr;
54862306a36Sopenharmony_ci}
54962306a36Sopenharmony_ci
55062306a36Sopenharmony_ci/**
55162306a36Sopenharmony_ci * rvt_set_page - page assignment function called by ib_sg_to_pages
55262306a36Sopenharmony_ci * @ibmr: memory region
55362306a36Sopenharmony_ci * @addr: dma address of mapped page
55462306a36Sopenharmony_ci *
55562306a36Sopenharmony_ci * Return: 0 on success
55662306a36Sopenharmony_ci */
55762306a36Sopenharmony_cistatic int rvt_set_page(struct ib_mr *ibmr, u64 addr)
55862306a36Sopenharmony_ci{
55962306a36Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
56062306a36Sopenharmony_ci	u32 ps = 1 << mr->mr.page_shift;
56162306a36Sopenharmony_ci	u32 mapped_segs = mr->mr.length >> mr->mr.page_shift;
56262306a36Sopenharmony_ci	int m, n;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	if (unlikely(mapped_segs == mr->mr.max_segs))
56562306a36Sopenharmony_ci		return -ENOMEM;
56662306a36Sopenharmony_ci
56762306a36Sopenharmony_ci	m = mapped_segs / RVT_SEGSZ;
56862306a36Sopenharmony_ci	n = mapped_segs % RVT_SEGSZ;
56962306a36Sopenharmony_ci	mr->mr.map[m]->segs[n].vaddr = (void *)addr;
57062306a36Sopenharmony_ci	mr->mr.map[m]->segs[n].length = ps;
57162306a36Sopenharmony_ci	mr->mr.length += ps;
57262306a36Sopenharmony_ci	trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps);
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	return 0;
57562306a36Sopenharmony_ci}
57662306a36Sopenharmony_ci
57762306a36Sopenharmony_ci/**
57862306a36Sopenharmony_ci * rvt_map_mr_sg - map sg list and set it the memory region
57962306a36Sopenharmony_ci * @ibmr: memory region
58062306a36Sopenharmony_ci * @sg: dma mapped scatterlist
58162306a36Sopenharmony_ci * @sg_nents: number of entries in sg
58262306a36Sopenharmony_ci * @sg_offset: offset in bytes into sg
58362306a36Sopenharmony_ci *
58462306a36Sopenharmony_ci * Overwrite rvt_mr length with mr length calculated by ib_sg_to_pages.
58562306a36Sopenharmony_ci *
58662306a36Sopenharmony_ci * Return: number of sg elements mapped to the memory region
58762306a36Sopenharmony_ci */
58862306a36Sopenharmony_ciint rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
58962306a36Sopenharmony_ci		  int sg_nents, unsigned int *sg_offset)
59062306a36Sopenharmony_ci{
59162306a36Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
59262306a36Sopenharmony_ci	int ret;
59362306a36Sopenharmony_ci
59462306a36Sopenharmony_ci	mr->mr.length = 0;
59562306a36Sopenharmony_ci	mr->mr.page_shift = PAGE_SHIFT;
59662306a36Sopenharmony_ci	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rvt_set_page);
59762306a36Sopenharmony_ci	mr->mr.user_base = ibmr->iova;
59862306a36Sopenharmony_ci	mr->mr.iova = ibmr->iova;
59962306a36Sopenharmony_ci	mr->mr.offset = ibmr->iova - (u64)mr->mr.map[0]->segs[0].vaddr;
60062306a36Sopenharmony_ci	mr->mr.length = (size_t)ibmr->length;
60162306a36Sopenharmony_ci	trace_rvt_map_mr_sg(ibmr, sg_nents, sg_offset);
60262306a36Sopenharmony_ci	return ret;
60362306a36Sopenharmony_ci}
60462306a36Sopenharmony_ci
60562306a36Sopenharmony_ci/**
60662306a36Sopenharmony_ci * rvt_fast_reg_mr - fast register physical MR
60762306a36Sopenharmony_ci * @qp: the queue pair where the work request comes from
60862306a36Sopenharmony_ci * @ibmr: the memory region to be registered
60962306a36Sopenharmony_ci * @key: updated key for this memory region
61062306a36Sopenharmony_ci * @access: access flags for this memory region
61162306a36Sopenharmony_ci *
61262306a36Sopenharmony_ci * Returns 0 on success.
61362306a36Sopenharmony_ci */
61462306a36Sopenharmony_ciint rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key,
61562306a36Sopenharmony_ci		    int access)
61662306a36Sopenharmony_ci{
61762306a36Sopenharmony_ci	struct rvt_mr *mr = to_imr(ibmr);
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	if (qp->ibqp.pd != mr->mr.pd)
62062306a36Sopenharmony_ci		return -EACCES;
62162306a36Sopenharmony_ci
62262306a36Sopenharmony_ci	/* not applicable to dma MR or user MR */
62362306a36Sopenharmony_ci	if (!mr->mr.lkey || mr->umem)
62462306a36Sopenharmony_ci		return -EINVAL;
62562306a36Sopenharmony_ci
62662306a36Sopenharmony_ci	if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00))
62762306a36Sopenharmony_ci		return -EINVAL;
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_ci	ibmr->lkey = key;
63062306a36Sopenharmony_ci	ibmr->rkey = key;
63162306a36Sopenharmony_ci	mr->mr.lkey = key;
63262306a36Sopenharmony_ci	mr->mr.access_flags = access;
63362306a36Sopenharmony_ci	mr->mr.iova = ibmr->iova;
63462306a36Sopenharmony_ci	atomic_set(&mr->mr.lkey_invalid, 0);
63562306a36Sopenharmony_ci
63662306a36Sopenharmony_ci	return 0;
63762306a36Sopenharmony_ci}
63862306a36Sopenharmony_ciEXPORT_SYMBOL(rvt_fast_reg_mr);
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_ci/**
64162306a36Sopenharmony_ci * rvt_invalidate_rkey - invalidate an MR rkey
64262306a36Sopenharmony_ci * @qp: queue pair associated with the invalidate op
64362306a36Sopenharmony_ci * @rkey: rkey to invalidate
64462306a36Sopenharmony_ci *
64562306a36Sopenharmony_ci * Returns 0 on success.
64662306a36Sopenharmony_ci */
64762306a36Sopenharmony_ciint rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey)
64862306a36Sopenharmony_ci{
64962306a36Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
65062306a36Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
65162306a36Sopenharmony_ci	struct rvt_mregion *mr;
65262306a36Sopenharmony_ci
65362306a36Sopenharmony_ci	if (rkey == 0)
65462306a36Sopenharmony_ci		return -EINVAL;
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ci	rcu_read_lock();
65762306a36Sopenharmony_ci	mr = rcu_dereference(
65862306a36Sopenharmony_ci		rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]);
65962306a36Sopenharmony_ci	if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd))
66062306a36Sopenharmony_ci		goto bail;
66162306a36Sopenharmony_ci
66262306a36Sopenharmony_ci	atomic_set(&mr->lkey_invalid, 1);
66362306a36Sopenharmony_ci	rcu_read_unlock();
66462306a36Sopenharmony_ci	return 0;
66562306a36Sopenharmony_ci
66662306a36Sopenharmony_cibail:
66762306a36Sopenharmony_ci	rcu_read_unlock();
66862306a36Sopenharmony_ci	return -EINVAL;
66962306a36Sopenharmony_ci}
67062306a36Sopenharmony_ciEXPORT_SYMBOL(rvt_invalidate_rkey);
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci/**
67362306a36Sopenharmony_ci * rvt_sge_adjacent - is isge compressible
67462306a36Sopenharmony_ci * @last_sge: last outgoing SGE written
67562306a36Sopenharmony_ci * @sge: SGE to check
67662306a36Sopenharmony_ci *
67762306a36Sopenharmony_ci * If adjacent will update last_sge to add length.
67862306a36Sopenharmony_ci *
67962306a36Sopenharmony_ci * Return: true if isge is adjacent to last sge
68062306a36Sopenharmony_ci */
68162306a36Sopenharmony_cistatic inline bool rvt_sge_adjacent(struct rvt_sge *last_sge,
68262306a36Sopenharmony_ci				    struct ib_sge *sge)
68362306a36Sopenharmony_ci{
68462306a36Sopenharmony_ci	if (last_sge && sge->lkey == last_sge->mr->lkey &&
68562306a36Sopenharmony_ci	    ((uint64_t)(last_sge->vaddr + last_sge->length) == sge->addr)) {
68662306a36Sopenharmony_ci		if (sge->lkey) {
68762306a36Sopenharmony_ci			if (unlikely((sge->addr - last_sge->mr->user_base +
68862306a36Sopenharmony_ci			      sge->length > last_sge->mr->length)))
68962306a36Sopenharmony_ci				return false; /* overrun, caller will catch */
69062306a36Sopenharmony_ci		} else {
69162306a36Sopenharmony_ci			last_sge->length += sge->length;
69262306a36Sopenharmony_ci		}
69362306a36Sopenharmony_ci		last_sge->sge_length += sge->length;
69462306a36Sopenharmony_ci		trace_rvt_sge_adjacent(last_sge, sge);
69562306a36Sopenharmony_ci		return true;
69662306a36Sopenharmony_ci	}
69762306a36Sopenharmony_ci	return false;
69862306a36Sopenharmony_ci}
69962306a36Sopenharmony_ci
70062306a36Sopenharmony_ci/**
70162306a36Sopenharmony_ci * rvt_lkey_ok - check IB SGE for validity and initialize
70262306a36Sopenharmony_ci * @rkt: table containing lkey to check SGE against
70362306a36Sopenharmony_ci * @pd: protection domain
70462306a36Sopenharmony_ci * @isge: outgoing internal SGE
70562306a36Sopenharmony_ci * @last_sge: last outgoing SGE written
70662306a36Sopenharmony_ci * @sge: SGE to check
70762306a36Sopenharmony_ci * @acc: access flags
70862306a36Sopenharmony_ci *
70962306a36Sopenharmony_ci * Check the IB SGE for validity and initialize our internal version
71062306a36Sopenharmony_ci * of it.
71162306a36Sopenharmony_ci *
71262306a36Sopenharmony_ci * Increments the reference count when a new sge is stored.
71362306a36Sopenharmony_ci *
71462306a36Sopenharmony_ci * Return: 0 if compressed, 1 if added , otherwise returns -errno.
71562306a36Sopenharmony_ci */
71662306a36Sopenharmony_ciint rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
71762306a36Sopenharmony_ci		struct rvt_sge *isge, struct rvt_sge *last_sge,
71862306a36Sopenharmony_ci		struct ib_sge *sge, int acc)
71962306a36Sopenharmony_ci{
72062306a36Sopenharmony_ci	struct rvt_mregion *mr;
72162306a36Sopenharmony_ci	unsigned n, m;
72262306a36Sopenharmony_ci	size_t off;
72362306a36Sopenharmony_ci
72462306a36Sopenharmony_ci	/*
72562306a36Sopenharmony_ci	 * We use LKEY == zero for kernel virtual addresses
72662306a36Sopenharmony_ci	 * (see rvt_get_dma_mr()).
72762306a36Sopenharmony_ci	 */
72862306a36Sopenharmony_ci	if (sge->lkey == 0) {
72962306a36Sopenharmony_ci		struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
73062306a36Sopenharmony_ci
73162306a36Sopenharmony_ci		if (pd->user)
73262306a36Sopenharmony_ci			return -EINVAL;
73362306a36Sopenharmony_ci		if (rvt_sge_adjacent(last_sge, sge))
73462306a36Sopenharmony_ci			return 0;
73562306a36Sopenharmony_ci		rcu_read_lock();
73662306a36Sopenharmony_ci		mr = rcu_dereference(dev->dma_mr);
73762306a36Sopenharmony_ci		if (!mr)
73862306a36Sopenharmony_ci			goto bail;
73962306a36Sopenharmony_ci		rvt_get_mr(mr);
74062306a36Sopenharmony_ci		rcu_read_unlock();
74162306a36Sopenharmony_ci
74262306a36Sopenharmony_ci		isge->mr = mr;
74362306a36Sopenharmony_ci		isge->vaddr = (void *)sge->addr;
74462306a36Sopenharmony_ci		isge->length = sge->length;
74562306a36Sopenharmony_ci		isge->sge_length = sge->length;
74662306a36Sopenharmony_ci		isge->m = 0;
74762306a36Sopenharmony_ci		isge->n = 0;
74862306a36Sopenharmony_ci		goto ok;
74962306a36Sopenharmony_ci	}
75062306a36Sopenharmony_ci	if (rvt_sge_adjacent(last_sge, sge))
75162306a36Sopenharmony_ci		return 0;
75262306a36Sopenharmony_ci	rcu_read_lock();
75362306a36Sopenharmony_ci	mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]);
75462306a36Sopenharmony_ci	if (!mr)
75562306a36Sopenharmony_ci		goto bail;
75662306a36Sopenharmony_ci	rvt_get_mr(mr);
75762306a36Sopenharmony_ci	if (!READ_ONCE(mr->lkey_published))
75862306a36Sopenharmony_ci		goto bail_unref;
75962306a36Sopenharmony_ci
76062306a36Sopenharmony_ci	if (unlikely(atomic_read(&mr->lkey_invalid) ||
76162306a36Sopenharmony_ci		     mr->lkey != sge->lkey || mr->pd != &pd->ibpd))
76262306a36Sopenharmony_ci		goto bail_unref;
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	off = sge->addr - mr->user_base;
76562306a36Sopenharmony_ci	if (unlikely(sge->addr < mr->user_base ||
76662306a36Sopenharmony_ci		     off + sge->length > mr->length ||
76762306a36Sopenharmony_ci		     (mr->access_flags & acc) != acc))
76862306a36Sopenharmony_ci		goto bail_unref;
76962306a36Sopenharmony_ci	rcu_read_unlock();
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	off += mr->offset;
77262306a36Sopenharmony_ci	if (mr->page_shift) {
77362306a36Sopenharmony_ci		/*
77462306a36Sopenharmony_ci		 * page sizes are uniform power of 2 so no loop is necessary
77562306a36Sopenharmony_ci		 * entries_spanned_by_off is the number of times the loop below
77662306a36Sopenharmony_ci		 * would have executed.
77762306a36Sopenharmony_ci		*/
77862306a36Sopenharmony_ci		size_t entries_spanned_by_off;
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci		entries_spanned_by_off = off >> mr->page_shift;
78162306a36Sopenharmony_ci		off -= (entries_spanned_by_off << mr->page_shift);
78262306a36Sopenharmony_ci		m = entries_spanned_by_off / RVT_SEGSZ;
78362306a36Sopenharmony_ci		n = entries_spanned_by_off % RVT_SEGSZ;
78462306a36Sopenharmony_ci	} else {
78562306a36Sopenharmony_ci		m = 0;
78662306a36Sopenharmony_ci		n = 0;
78762306a36Sopenharmony_ci		while (off >= mr->map[m]->segs[n].length) {
78862306a36Sopenharmony_ci			off -= mr->map[m]->segs[n].length;
78962306a36Sopenharmony_ci			n++;
79062306a36Sopenharmony_ci			if (n >= RVT_SEGSZ) {
79162306a36Sopenharmony_ci				m++;
79262306a36Sopenharmony_ci				n = 0;
79362306a36Sopenharmony_ci			}
79462306a36Sopenharmony_ci		}
79562306a36Sopenharmony_ci	}
79662306a36Sopenharmony_ci	isge->mr = mr;
79762306a36Sopenharmony_ci	isge->vaddr = mr->map[m]->segs[n].vaddr + off;
79862306a36Sopenharmony_ci	isge->length = mr->map[m]->segs[n].length - off;
79962306a36Sopenharmony_ci	isge->sge_length = sge->length;
80062306a36Sopenharmony_ci	isge->m = m;
80162306a36Sopenharmony_ci	isge->n = n;
80262306a36Sopenharmony_ciok:
80362306a36Sopenharmony_ci	trace_rvt_sge_new(isge, sge);
80462306a36Sopenharmony_ci	return 1;
80562306a36Sopenharmony_cibail_unref:
80662306a36Sopenharmony_ci	rvt_put_mr(mr);
80762306a36Sopenharmony_cibail:
80862306a36Sopenharmony_ci	rcu_read_unlock();
80962306a36Sopenharmony_ci	return -EINVAL;
81062306a36Sopenharmony_ci}
81162306a36Sopenharmony_ciEXPORT_SYMBOL(rvt_lkey_ok);
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci/**
81462306a36Sopenharmony_ci * rvt_rkey_ok - check the IB virtual address, length, and RKEY
81562306a36Sopenharmony_ci * @qp: qp for validation
81662306a36Sopenharmony_ci * @sge: SGE state
81762306a36Sopenharmony_ci * @len: length of data
81862306a36Sopenharmony_ci * @vaddr: virtual address to place data
81962306a36Sopenharmony_ci * @rkey: rkey to check
82062306a36Sopenharmony_ci * @acc: access flags
82162306a36Sopenharmony_ci *
82262306a36Sopenharmony_ci * Return: 1 if successful, otherwise 0.
82362306a36Sopenharmony_ci *
82462306a36Sopenharmony_ci * increments the reference count upon success
82562306a36Sopenharmony_ci */
82662306a36Sopenharmony_ciint rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
82762306a36Sopenharmony_ci		u32 len, u64 vaddr, u32 rkey, int acc)
82862306a36Sopenharmony_ci{
82962306a36Sopenharmony_ci	struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device);
83062306a36Sopenharmony_ci	struct rvt_lkey_table *rkt = &dev->lkey_table;
83162306a36Sopenharmony_ci	struct rvt_mregion *mr;
83262306a36Sopenharmony_ci	unsigned n, m;
83362306a36Sopenharmony_ci	size_t off;
83462306a36Sopenharmony_ci
83562306a36Sopenharmony_ci	/*
83662306a36Sopenharmony_ci	 * We use RKEY == zero for kernel virtual addresses
83762306a36Sopenharmony_ci	 * (see rvt_get_dma_mr()).
83862306a36Sopenharmony_ci	 */
83962306a36Sopenharmony_ci	rcu_read_lock();
84062306a36Sopenharmony_ci	if (rkey == 0) {
84162306a36Sopenharmony_ci		struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd);
84262306a36Sopenharmony_ci		struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device);
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci		if (pd->user)
84562306a36Sopenharmony_ci			goto bail;
84662306a36Sopenharmony_ci		mr = rcu_dereference(rdi->dma_mr);
84762306a36Sopenharmony_ci		if (!mr)
84862306a36Sopenharmony_ci			goto bail;
84962306a36Sopenharmony_ci		rvt_get_mr(mr);
85062306a36Sopenharmony_ci		rcu_read_unlock();
85162306a36Sopenharmony_ci
85262306a36Sopenharmony_ci		sge->mr = mr;
85362306a36Sopenharmony_ci		sge->vaddr = (void *)vaddr;
85462306a36Sopenharmony_ci		sge->length = len;
85562306a36Sopenharmony_ci		sge->sge_length = len;
85662306a36Sopenharmony_ci		sge->m = 0;
85762306a36Sopenharmony_ci		sge->n = 0;
85862306a36Sopenharmony_ci		goto ok;
85962306a36Sopenharmony_ci	}
86062306a36Sopenharmony_ci
86162306a36Sopenharmony_ci	mr = rcu_dereference(rkt->table[rkey >> rkt->shift]);
86262306a36Sopenharmony_ci	if (!mr)
86362306a36Sopenharmony_ci		goto bail;
86462306a36Sopenharmony_ci	rvt_get_mr(mr);
86562306a36Sopenharmony_ci	/* insure mr read is before test */
86662306a36Sopenharmony_ci	if (!READ_ONCE(mr->lkey_published))
86762306a36Sopenharmony_ci		goto bail_unref;
86862306a36Sopenharmony_ci	if (unlikely(atomic_read(&mr->lkey_invalid) ||
86962306a36Sopenharmony_ci		     mr->lkey != rkey || qp->ibqp.pd != mr->pd))
87062306a36Sopenharmony_ci		goto bail_unref;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	off = vaddr - mr->iova;
87362306a36Sopenharmony_ci	if (unlikely(vaddr < mr->iova || off + len > mr->length ||
87462306a36Sopenharmony_ci		     (mr->access_flags & acc) == 0))
87562306a36Sopenharmony_ci		goto bail_unref;
87662306a36Sopenharmony_ci	rcu_read_unlock();
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	off += mr->offset;
87962306a36Sopenharmony_ci	if (mr->page_shift) {
88062306a36Sopenharmony_ci		/*
88162306a36Sopenharmony_ci		 * page sizes are uniform power of 2 so no loop is necessary
88262306a36Sopenharmony_ci		 * entries_spanned_by_off is the number of times the loop below
88362306a36Sopenharmony_ci		 * would have executed.
88462306a36Sopenharmony_ci		*/
88562306a36Sopenharmony_ci		size_t entries_spanned_by_off;
88662306a36Sopenharmony_ci
88762306a36Sopenharmony_ci		entries_spanned_by_off = off >> mr->page_shift;
88862306a36Sopenharmony_ci		off -= (entries_spanned_by_off << mr->page_shift);
88962306a36Sopenharmony_ci		m = entries_spanned_by_off / RVT_SEGSZ;
89062306a36Sopenharmony_ci		n = entries_spanned_by_off % RVT_SEGSZ;
89162306a36Sopenharmony_ci	} else {
89262306a36Sopenharmony_ci		m = 0;
89362306a36Sopenharmony_ci		n = 0;
89462306a36Sopenharmony_ci		while (off >= mr->map[m]->segs[n].length) {
89562306a36Sopenharmony_ci			off -= mr->map[m]->segs[n].length;
89662306a36Sopenharmony_ci			n++;
89762306a36Sopenharmony_ci			if (n >= RVT_SEGSZ) {
89862306a36Sopenharmony_ci				m++;
89962306a36Sopenharmony_ci				n = 0;
90062306a36Sopenharmony_ci			}
90162306a36Sopenharmony_ci		}
90262306a36Sopenharmony_ci	}
90362306a36Sopenharmony_ci	sge->mr = mr;
90462306a36Sopenharmony_ci	sge->vaddr = mr->map[m]->segs[n].vaddr + off;
90562306a36Sopenharmony_ci	sge->length = mr->map[m]->segs[n].length - off;
90662306a36Sopenharmony_ci	sge->sge_length = len;
90762306a36Sopenharmony_ci	sge->m = m;
90862306a36Sopenharmony_ci	sge->n = n;
90962306a36Sopenharmony_ciok:
91062306a36Sopenharmony_ci	return 1;
91162306a36Sopenharmony_cibail_unref:
91262306a36Sopenharmony_ci	rvt_put_mr(mr);
91362306a36Sopenharmony_cibail:
91462306a36Sopenharmony_ci	rcu_read_unlock();
91562306a36Sopenharmony_ci	return 0;
91662306a36Sopenharmony_ci}
91762306a36Sopenharmony_ciEXPORT_SYMBOL(rvt_rkey_ok);
918