18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
28c2ecf20Sopenharmony_ci
38c2ecf20Sopenharmony_ci/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
48c2ecf20Sopenharmony_ci/* Copyright (c) 2008-2019, IBM Corporation */
58c2ecf20Sopenharmony_ci
68c2ecf20Sopenharmony_ci#include <linux/gfp.h>
78c2ecf20Sopenharmony_ci#include <rdma/ib_verbs.h>
88c2ecf20Sopenharmony_ci#include <linux/dma-mapping.h>
98c2ecf20Sopenharmony_ci#include <linux/slab.h>
108c2ecf20Sopenharmony_ci#include <linux/sched/mm.h>
118c2ecf20Sopenharmony_ci#include <linux/resource.h>
128c2ecf20Sopenharmony_ci
138c2ecf20Sopenharmony_ci#include "siw.h"
148c2ecf20Sopenharmony_ci#include "siw_mem.h"
158c2ecf20Sopenharmony_ci
168c2ecf20Sopenharmony_ci/*
178c2ecf20Sopenharmony_ci * Stag lookup is based on its index part only (24 bits).
188c2ecf20Sopenharmony_ci * The code avoids special Stag of zero and tries to randomize
198c2ecf20Sopenharmony_ci * STag values between 1 and SIW_STAG_MAX_INDEX.
208c2ecf20Sopenharmony_ci */
218c2ecf20Sopenharmony_ciint siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
228c2ecf20Sopenharmony_ci{
238c2ecf20Sopenharmony_ci	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
248c2ecf20Sopenharmony_ci	u32 id, next;
258c2ecf20Sopenharmony_ci
268c2ecf20Sopenharmony_ci	get_random_bytes(&next, 4);
278c2ecf20Sopenharmony_ci	next &= 0x00ffffff;
288c2ecf20Sopenharmony_ci
298c2ecf20Sopenharmony_ci	if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
308c2ecf20Sopenharmony_ci	    GFP_KERNEL) < 0)
318c2ecf20Sopenharmony_ci		return -ENOMEM;
328c2ecf20Sopenharmony_ci
338c2ecf20Sopenharmony_ci	/* Set the STag index part */
348c2ecf20Sopenharmony_ci	m->stag = id << 8;
358c2ecf20Sopenharmony_ci
368c2ecf20Sopenharmony_ci	siw_dbg_mem(m, "new MEM object\n");
378c2ecf20Sopenharmony_ci
388c2ecf20Sopenharmony_ci	return 0;
398c2ecf20Sopenharmony_ci}
408c2ecf20Sopenharmony_ci
418c2ecf20Sopenharmony_ci/*
428c2ecf20Sopenharmony_ci * siw_mem_id2obj()
438c2ecf20Sopenharmony_ci *
448c2ecf20Sopenharmony_ci * resolves memory from stag given by id. might be called from:
458c2ecf20Sopenharmony_ci * o process context before sending out of sgl, or
468c2ecf20Sopenharmony_ci * o in softirq when resolving target memory
478c2ecf20Sopenharmony_ci */
488c2ecf20Sopenharmony_cistruct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
498c2ecf20Sopenharmony_ci{
508c2ecf20Sopenharmony_ci	struct siw_mem *mem;
518c2ecf20Sopenharmony_ci
528c2ecf20Sopenharmony_ci	rcu_read_lock();
538c2ecf20Sopenharmony_ci	mem = xa_load(&sdev->mem_xa, stag_index);
548c2ecf20Sopenharmony_ci	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
558c2ecf20Sopenharmony_ci		rcu_read_unlock();
568c2ecf20Sopenharmony_ci		return mem;
578c2ecf20Sopenharmony_ci	}
588c2ecf20Sopenharmony_ci	rcu_read_unlock();
598c2ecf20Sopenharmony_ci
608c2ecf20Sopenharmony_ci	return NULL;
618c2ecf20Sopenharmony_ci}
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_cistatic void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
648c2ecf20Sopenharmony_ci			   bool dirty)
658c2ecf20Sopenharmony_ci{
668c2ecf20Sopenharmony_ci	unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
678c2ecf20Sopenharmony_ci}
688c2ecf20Sopenharmony_ci
698c2ecf20Sopenharmony_civoid siw_umem_release(struct siw_umem *umem, bool dirty)
708c2ecf20Sopenharmony_ci{
718c2ecf20Sopenharmony_ci	struct mm_struct *mm_s = umem->owning_mm;
728c2ecf20Sopenharmony_ci	int i, num_pages = umem->num_pages;
738c2ecf20Sopenharmony_ci
748c2ecf20Sopenharmony_ci	for (i = 0; num_pages; i++) {
758c2ecf20Sopenharmony_ci		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci		siw_free_plist(&umem->page_chunk[i], to_free,
788c2ecf20Sopenharmony_ci			       umem->writable && dirty);
798c2ecf20Sopenharmony_ci		kfree(umem->page_chunk[i].plist);
808c2ecf20Sopenharmony_ci		num_pages -= to_free;
818c2ecf20Sopenharmony_ci	}
828c2ecf20Sopenharmony_ci	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_ci	mmdrop(mm_s);
858c2ecf20Sopenharmony_ci	kfree(umem->page_chunk);
868c2ecf20Sopenharmony_ci	kfree(umem);
878c2ecf20Sopenharmony_ci}
888c2ecf20Sopenharmony_ci
898c2ecf20Sopenharmony_ciint siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
908c2ecf20Sopenharmony_ci		   u64 start, u64 len, int rights)
918c2ecf20Sopenharmony_ci{
928c2ecf20Sopenharmony_ci	struct siw_device *sdev = to_siw_dev(pd->device);
938c2ecf20Sopenharmony_ci	struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
948c2ecf20Sopenharmony_ci	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
958c2ecf20Sopenharmony_ci	u32 id, next;
968c2ecf20Sopenharmony_ci
978c2ecf20Sopenharmony_ci	if (!mem)
988c2ecf20Sopenharmony_ci		return -ENOMEM;
998c2ecf20Sopenharmony_ci
1008c2ecf20Sopenharmony_ci	mem->mem_obj = mem_obj;
1018c2ecf20Sopenharmony_ci	mem->stag_valid = 0;
1028c2ecf20Sopenharmony_ci	mem->sdev = sdev;
1038c2ecf20Sopenharmony_ci	mem->va = start;
1048c2ecf20Sopenharmony_ci	mem->len = len;
1058c2ecf20Sopenharmony_ci	mem->pd = pd;
1068c2ecf20Sopenharmony_ci	mem->perms = rights & IWARP_ACCESS_MASK;
1078c2ecf20Sopenharmony_ci	kref_init(&mem->ref);
1088c2ecf20Sopenharmony_ci
1098c2ecf20Sopenharmony_ci	get_random_bytes(&next, 4);
1108c2ecf20Sopenharmony_ci	next &= 0x00ffffff;
1118c2ecf20Sopenharmony_ci
1128c2ecf20Sopenharmony_ci	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
1138c2ecf20Sopenharmony_ci	    GFP_KERNEL) < 0) {
1148c2ecf20Sopenharmony_ci		kfree(mem);
1158c2ecf20Sopenharmony_ci		return -ENOMEM;
1168c2ecf20Sopenharmony_ci	}
1178c2ecf20Sopenharmony_ci
1188c2ecf20Sopenharmony_ci	mr->mem = mem;
1198c2ecf20Sopenharmony_ci	/* Set the STag index part */
1208c2ecf20Sopenharmony_ci	mem->stag = id << 8;
1218c2ecf20Sopenharmony_ci	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
1228c2ecf20Sopenharmony_ci
1238c2ecf20Sopenharmony_ci	return 0;
1248c2ecf20Sopenharmony_ci}
1258c2ecf20Sopenharmony_ci
1268c2ecf20Sopenharmony_civoid siw_mr_drop_mem(struct siw_mr *mr)
1278c2ecf20Sopenharmony_ci{
1288c2ecf20Sopenharmony_ci	struct siw_mem *mem = mr->mem, *found;
1298c2ecf20Sopenharmony_ci
1308c2ecf20Sopenharmony_ci	mem->stag_valid = 0;
1318c2ecf20Sopenharmony_ci
1328c2ecf20Sopenharmony_ci	/* make STag invalid visible asap */
1338c2ecf20Sopenharmony_ci	smp_mb();
1348c2ecf20Sopenharmony_ci
1358c2ecf20Sopenharmony_ci	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
1368c2ecf20Sopenharmony_ci	WARN_ON(found != mem);
1378c2ecf20Sopenharmony_ci	siw_mem_put(mem);
1388c2ecf20Sopenharmony_ci}
1398c2ecf20Sopenharmony_ci
1408c2ecf20Sopenharmony_civoid siw_free_mem(struct kref *ref)
1418c2ecf20Sopenharmony_ci{
1428c2ecf20Sopenharmony_ci	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
1458c2ecf20Sopenharmony_ci
1468c2ecf20Sopenharmony_ci	if (!mem->is_mw && mem->mem_obj) {
1478c2ecf20Sopenharmony_ci		if (mem->is_pbl == 0)
1488c2ecf20Sopenharmony_ci			siw_umem_release(mem->umem, true);
1498c2ecf20Sopenharmony_ci		else
1508c2ecf20Sopenharmony_ci			kfree(mem->pbl);
1518c2ecf20Sopenharmony_ci	}
1528c2ecf20Sopenharmony_ci	kfree(mem);
1538c2ecf20Sopenharmony_ci}
1548c2ecf20Sopenharmony_ci
1558c2ecf20Sopenharmony_ci/*
1568c2ecf20Sopenharmony_ci * siw_check_mem()
1578c2ecf20Sopenharmony_ci *
1588c2ecf20Sopenharmony_ci * Check protection domain, STAG state, access permissions and
1598c2ecf20Sopenharmony_ci * address range for memory object.
1608c2ecf20Sopenharmony_ci *
1618c2ecf20Sopenharmony_ci * @pd:		Protection Domain memory should belong to
1628c2ecf20Sopenharmony_ci * @mem:	memory to be checked
1638c2ecf20Sopenharmony_ci * @addr:	starting addr of mem
1648c2ecf20Sopenharmony_ci * @perms:	requested access permissions
1658c2ecf20Sopenharmony_ci * @len:	len of memory interval to be checked
1668c2ecf20Sopenharmony_ci *
1678c2ecf20Sopenharmony_ci */
1688c2ecf20Sopenharmony_ciint siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
1698c2ecf20Sopenharmony_ci		  enum ib_access_flags perms, int len)
1708c2ecf20Sopenharmony_ci{
1718c2ecf20Sopenharmony_ci	if (!mem->stag_valid) {
1728c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
1738c2ecf20Sopenharmony_ci		return -E_STAG_INVALID;
1748c2ecf20Sopenharmony_ci	}
1758c2ecf20Sopenharmony_ci	if (mem->pd != pd) {
1768c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
1778c2ecf20Sopenharmony_ci		return -E_PD_MISMATCH;
1788c2ecf20Sopenharmony_ci	}
1798c2ecf20Sopenharmony_ci	/*
1808c2ecf20Sopenharmony_ci	 * check access permissions
1818c2ecf20Sopenharmony_ci	 */
1828c2ecf20Sopenharmony_ci	if ((mem->perms & perms) < perms) {
1838c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
1848c2ecf20Sopenharmony_ci			   mem->perms, perms);
1858c2ecf20Sopenharmony_ci		return -E_ACCESS_PERM;
1868c2ecf20Sopenharmony_ci	}
1878c2ecf20Sopenharmony_ci	/*
1888c2ecf20Sopenharmony_ci	 * Check if access falls into valid memory interval.
1898c2ecf20Sopenharmony_ci	 */
1908c2ecf20Sopenharmony_ci	if (addr < mem->va || addr + len > mem->va + mem->len) {
1918c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "MEM interval len %d\n", len);
1928c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
1938c2ecf20Sopenharmony_ci			   (void *)(uintptr_t)addr,
1948c2ecf20Sopenharmony_ci			   (void *)(uintptr_t)(addr + len));
1958c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
1968c2ecf20Sopenharmony_ci			   (void *)(uintptr_t)mem->va,
1978c2ecf20Sopenharmony_ci			   (void *)(uintptr_t)(mem->va + mem->len),
1988c2ecf20Sopenharmony_ci			   mem->stag);
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci		return -E_BASE_BOUNDS;
2018c2ecf20Sopenharmony_ci	}
2028c2ecf20Sopenharmony_ci	return E_ACCESS_OK;
2038c2ecf20Sopenharmony_ci}
2048c2ecf20Sopenharmony_ci
2058c2ecf20Sopenharmony_ci/*
2068c2ecf20Sopenharmony_ci * siw_check_sge()
2078c2ecf20Sopenharmony_ci *
2088c2ecf20Sopenharmony_ci * Check SGE for access rights in given interval
2098c2ecf20Sopenharmony_ci *
2108c2ecf20Sopenharmony_ci * @pd:		Protection Domain memory should belong to
2118c2ecf20Sopenharmony_ci * @sge:	SGE to be checked
2128c2ecf20Sopenharmony_ci * @mem:	location of memory reference within array
2138c2ecf20Sopenharmony_ci * @perms:	requested access permissions
2148c2ecf20Sopenharmony_ci * @off:	starting offset in SGE
2158c2ecf20Sopenharmony_ci * @len:	len of memory interval to be checked
2168c2ecf20Sopenharmony_ci *
2178c2ecf20Sopenharmony_ci * NOTE: Function references SGE's memory object (mem->obj)
2188c2ecf20Sopenharmony_ci * if not yet done. New reference is kept if check went ok and
2198c2ecf20Sopenharmony_ci * released if check failed. If mem->obj is already valid, no new
2208c2ecf20Sopenharmony_ci * lookup is being done and mem is not released it check fails.
2218c2ecf20Sopenharmony_ci */
2228c2ecf20Sopenharmony_ciint siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
2238c2ecf20Sopenharmony_ci		  enum ib_access_flags perms, u32 off, int len)
2248c2ecf20Sopenharmony_ci{
2258c2ecf20Sopenharmony_ci	struct siw_device *sdev = to_siw_dev(pd->device);
2268c2ecf20Sopenharmony_ci	struct siw_mem *new = NULL;
2278c2ecf20Sopenharmony_ci	int rv = E_ACCESS_OK;
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci	if (len + off > sge->length) {
2308c2ecf20Sopenharmony_ci		rv = -E_BASE_BOUNDS;
2318c2ecf20Sopenharmony_ci		goto fail;
2328c2ecf20Sopenharmony_ci	}
2338c2ecf20Sopenharmony_ci	if (*mem == NULL) {
2348c2ecf20Sopenharmony_ci		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
2358c2ecf20Sopenharmony_ci		if (unlikely(!new)) {
2368c2ecf20Sopenharmony_ci			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
2378c2ecf20Sopenharmony_ci			rv = -E_STAG_INVALID;
2388c2ecf20Sopenharmony_ci			goto fail;
2398c2ecf20Sopenharmony_ci		}
2408c2ecf20Sopenharmony_ci		*mem = new;
2418c2ecf20Sopenharmony_ci	}
2428c2ecf20Sopenharmony_ci	/* Check if user re-registered with different STag key */
2438c2ecf20Sopenharmony_ci	if (unlikely((*mem)->stag != sge->lkey)) {
2448c2ecf20Sopenharmony_ci		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
2458c2ecf20Sopenharmony_ci		rv = -E_STAG_INVALID;
2468c2ecf20Sopenharmony_ci		goto fail;
2478c2ecf20Sopenharmony_ci	}
2488c2ecf20Sopenharmony_ci	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
2498c2ecf20Sopenharmony_ci	if (unlikely(rv))
2508c2ecf20Sopenharmony_ci		goto fail;
2518c2ecf20Sopenharmony_ci
2528c2ecf20Sopenharmony_ci	return 0;
2538c2ecf20Sopenharmony_ci
2548c2ecf20Sopenharmony_cifail:
2558c2ecf20Sopenharmony_ci	if (new) {
2568c2ecf20Sopenharmony_ci		*mem = NULL;
2578c2ecf20Sopenharmony_ci		siw_mem_put(new);
2588c2ecf20Sopenharmony_ci	}
2598c2ecf20Sopenharmony_ci	return rv;
2608c2ecf20Sopenharmony_ci}
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_civoid siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
2638c2ecf20Sopenharmony_ci{
2648c2ecf20Sopenharmony_ci	switch (op) {
2658c2ecf20Sopenharmony_ci	case SIW_OP_SEND:
2668c2ecf20Sopenharmony_ci	case SIW_OP_WRITE:
2678c2ecf20Sopenharmony_ci	case SIW_OP_SEND_WITH_IMM:
2688c2ecf20Sopenharmony_ci	case SIW_OP_SEND_REMOTE_INV:
2698c2ecf20Sopenharmony_ci	case SIW_OP_READ:
2708c2ecf20Sopenharmony_ci	case SIW_OP_READ_LOCAL_INV:
2718c2ecf20Sopenharmony_ci		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
2728c2ecf20Sopenharmony_ci			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
2738c2ecf20Sopenharmony_ci		break;
2748c2ecf20Sopenharmony_ci
2758c2ecf20Sopenharmony_ci	case SIW_OP_RECEIVE:
2768c2ecf20Sopenharmony_ci		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
2778c2ecf20Sopenharmony_ci		break;
2788c2ecf20Sopenharmony_ci
2798c2ecf20Sopenharmony_ci	case SIW_OP_READ_RESPONSE:
2808c2ecf20Sopenharmony_ci		siw_unref_mem_sgl(wqe->mem, 1);
2818c2ecf20Sopenharmony_ci		break;
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	default:
2848c2ecf20Sopenharmony_ci		/*
2858c2ecf20Sopenharmony_ci		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
2868c2ecf20Sopenharmony_ci		 * do not hold memory references
2878c2ecf20Sopenharmony_ci		 */
2888c2ecf20Sopenharmony_ci		break;
2898c2ecf20Sopenharmony_ci	}
2908c2ecf20Sopenharmony_ci}
2918c2ecf20Sopenharmony_ci
2928c2ecf20Sopenharmony_ciint siw_invalidate_stag(struct ib_pd *pd, u32 stag)
2938c2ecf20Sopenharmony_ci{
2948c2ecf20Sopenharmony_ci	struct siw_device *sdev = to_siw_dev(pd->device);
2958c2ecf20Sopenharmony_ci	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
2968c2ecf20Sopenharmony_ci	int rv = 0;
2978c2ecf20Sopenharmony_ci
2988c2ecf20Sopenharmony_ci	if (unlikely(!mem)) {
2998c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
3008c2ecf20Sopenharmony_ci		return -EINVAL;
3018c2ecf20Sopenharmony_ci	}
3028c2ecf20Sopenharmony_ci	if (unlikely(mem->pd != pd)) {
3038c2ecf20Sopenharmony_ci		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
3048c2ecf20Sopenharmony_ci		rv = -EACCES;
3058c2ecf20Sopenharmony_ci		goto out;
3068c2ecf20Sopenharmony_ci	}
3078c2ecf20Sopenharmony_ci	/*
3088c2ecf20Sopenharmony_ci	 * Per RDMA verbs definition, an STag may already be in invalid
3098c2ecf20Sopenharmony_ci	 * state if invalidation is requested. So no state check here.
3108c2ecf20Sopenharmony_ci	 */
3118c2ecf20Sopenharmony_ci	mem->stag_valid = 0;
3128c2ecf20Sopenharmony_ci
3138c2ecf20Sopenharmony_ci	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
3148c2ecf20Sopenharmony_ciout:
3158c2ecf20Sopenharmony_ci	siw_mem_put(mem);
3168c2ecf20Sopenharmony_ci	return rv;
3178c2ecf20Sopenharmony_ci}
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci/*
3208c2ecf20Sopenharmony_ci * Gets physical address backed by PBL element. Address is referenced
3218c2ecf20Sopenharmony_ci * by linear byte offset into list of variably sized PB elements.
3228c2ecf20Sopenharmony_ci * Optionally, provides remaining len within current element, and
3238c2ecf20Sopenharmony_ci * current PBL index for later resume at same element.
3248c2ecf20Sopenharmony_ci */
3258c2ecf20Sopenharmony_cidma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
3268c2ecf20Sopenharmony_ci{
3278c2ecf20Sopenharmony_ci	int i = idx ? *idx : 0;
3288c2ecf20Sopenharmony_ci
3298c2ecf20Sopenharmony_ci	while (i < pbl->num_buf) {
3308c2ecf20Sopenharmony_ci		struct siw_pble *pble = &pbl->pbe[i];
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci		if (pble->pbl_off + pble->size > off) {
3338c2ecf20Sopenharmony_ci			u64 pble_off = off - pble->pbl_off;
3348c2ecf20Sopenharmony_ci
3358c2ecf20Sopenharmony_ci			if (len)
3368c2ecf20Sopenharmony_ci				*len = pble->size - pble_off;
3378c2ecf20Sopenharmony_ci			if (idx)
3388c2ecf20Sopenharmony_ci				*idx = i;
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci			return pble->addr + pble_off;
3418c2ecf20Sopenharmony_ci		}
3428c2ecf20Sopenharmony_ci		i++;
3438c2ecf20Sopenharmony_ci	}
3448c2ecf20Sopenharmony_ci	if (len)
3458c2ecf20Sopenharmony_ci		*len = 0;
3468c2ecf20Sopenharmony_ci	return 0;
3478c2ecf20Sopenharmony_ci}
3488c2ecf20Sopenharmony_ci
3498c2ecf20Sopenharmony_cistruct siw_pbl *siw_pbl_alloc(u32 num_buf)
3508c2ecf20Sopenharmony_ci{
3518c2ecf20Sopenharmony_ci	struct siw_pbl *pbl;
3528c2ecf20Sopenharmony_ci
3538c2ecf20Sopenharmony_ci	if (num_buf == 0)
3548c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
3558c2ecf20Sopenharmony_ci
3568c2ecf20Sopenharmony_ci	pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
3578c2ecf20Sopenharmony_ci	if (!pbl)
3588c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
3598c2ecf20Sopenharmony_ci
3608c2ecf20Sopenharmony_ci	pbl->max_buf = num_buf;
3618c2ecf20Sopenharmony_ci
3628c2ecf20Sopenharmony_ci	return pbl;
3638c2ecf20Sopenharmony_ci}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_cistruct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
3668c2ecf20Sopenharmony_ci{
3678c2ecf20Sopenharmony_ci	struct siw_umem *umem;
3688c2ecf20Sopenharmony_ci	struct mm_struct *mm_s;
3698c2ecf20Sopenharmony_ci	u64 first_page_va;
3708c2ecf20Sopenharmony_ci	unsigned long mlock_limit;
3718c2ecf20Sopenharmony_ci	unsigned int foll_flags = FOLL_WRITE;
3728c2ecf20Sopenharmony_ci	int num_pages, num_chunks, i, rv = 0;
3738c2ecf20Sopenharmony_ci
3748c2ecf20Sopenharmony_ci	if (!can_do_mlock())
3758c2ecf20Sopenharmony_ci		return ERR_PTR(-EPERM);
3768c2ecf20Sopenharmony_ci
3778c2ecf20Sopenharmony_ci	if (!len)
3788c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
3798c2ecf20Sopenharmony_ci
3808c2ecf20Sopenharmony_ci	first_page_va = start & PAGE_MASK;
3818c2ecf20Sopenharmony_ci	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
3828c2ecf20Sopenharmony_ci	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
3838c2ecf20Sopenharmony_ci
3848c2ecf20Sopenharmony_ci	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
3858c2ecf20Sopenharmony_ci	if (!umem)
3868c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
3878c2ecf20Sopenharmony_ci
3888c2ecf20Sopenharmony_ci	mm_s = current->mm;
3898c2ecf20Sopenharmony_ci	umem->owning_mm = mm_s;
3908c2ecf20Sopenharmony_ci	umem->writable = writable;
3918c2ecf20Sopenharmony_ci
3928c2ecf20Sopenharmony_ci	mmgrab(mm_s);
3938c2ecf20Sopenharmony_ci
3948c2ecf20Sopenharmony_ci	if (!writable)
3958c2ecf20Sopenharmony_ci		foll_flags |= FOLL_FORCE;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	mmap_read_lock(mm_s);
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	if (num_pages + atomic64_read(&mm_s->pinned_vm) > mlock_limit) {
4028c2ecf20Sopenharmony_ci		rv = -ENOMEM;
4038c2ecf20Sopenharmony_ci		goto out_sem_up;
4048c2ecf20Sopenharmony_ci	}
4058c2ecf20Sopenharmony_ci	umem->fp_addr = first_page_va;
4068c2ecf20Sopenharmony_ci
4078c2ecf20Sopenharmony_ci	umem->page_chunk =
4088c2ecf20Sopenharmony_ci		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
4098c2ecf20Sopenharmony_ci	if (!umem->page_chunk) {
4108c2ecf20Sopenharmony_ci		rv = -ENOMEM;
4118c2ecf20Sopenharmony_ci		goto out_sem_up;
4128c2ecf20Sopenharmony_ci	}
4138c2ecf20Sopenharmony_ci	for (i = 0; num_pages; i++) {
4148c2ecf20Sopenharmony_ci		int got, nents = min_t(int, num_pages, PAGES_PER_CHUNK);
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci		umem->page_chunk[i].plist =
4178c2ecf20Sopenharmony_ci			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
4188c2ecf20Sopenharmony_ci		if (!umem->page_chunk[i].plist) {
4198c2ecf20Sopenharmony_ci			rv = -ENOMEM;
4208c2ecf20Sopenharmony_ci			goto out_sem_up;
4218c2ecf20Sopenharmony_ci		}
4228c2ecf20Sopenharmony_ci		got = 0;
4238c2ecf20Sopenharmony_ci		while (nents) {
4248c2ecf20Sopenharmony_ci			struct page **plist = &umem->page_chunk[i].plist[got];
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci			rv = pin_user_pages(first_page_va, nents,
4278c2ecf20Sopenharmony_ci					    foll_flags | FOLL_LONGTERM,
4288c2ecf20Sopenharmony_ci					    plist, NULL);
4298c2ecf20Sopenharmony_ci			if (rv < 0)
4308c2ecf20Sopenharmony_ci				goto out_sem_up;
4318c2ecf20Sopenharmony_ci
4328c2ecf20Sopenharmony_ci			umem->num_pages += rv;
4338c2ecf20Sopenharmony_ci			atomic64_add(rv, &mm_s->pinned_vm);
4348c2ecf20Sopenharmony_ci			first_page_va += rv * PAGE_SIZE;
4358c2ecf20Sopenharmony_ci			nents -= rv;
4368c2ecf20Sopenharmony_ci			got += rv;
4378c2ecf20Sopenharmony_ci		}
4388c2ecf20Sopenharmony_ci		num_pages -= got;
4398c2ecf20Sopenharmony_ci	}
4408c2ecf20Sopenharmony_ciout_sem_up:
4418c2ecf20Sopenharmony_ci	mmap_read_unlock(mm_s);
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	if (rv > 0)
4448c2ecf20Sopenharmony_ci		return umem;
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci	siw_umem_release(umem, false);
4478c2ecf20Sopenharmony_ci
4488c2ecf20Sopenharmony_ci	return ERR_PTR(rv);
4498c2ecf20Sopenharmony_ci}
450