162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
262306a36Sopenharmony_ci
362306a36Sopenharmony_ci/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
462306a36Sopenharmony_ci/* Copyright (c) 2008-2019, IBM Corporation */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/gfp.h>
762306a36Sopenharmony_ci#include <rdma/ib_verbs.h>
862306a36Sopenharmony_ci#include <linux/dma-mapping.h>
962306a36Sopenharmony_ci#include <linux/slab.h>
1062306a36Sopenharmony_ci#include <linux/sched/mm.h>
1162306a36Sopenharmony_ci#include <linux/resource.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_ci#include "siw.h"
1462306a36Sopenharmony_ci#include "siw_mem.h"
1562306a36Sopenharmony_ci
1662306a36Sopenharmony_ci/*
1762306a36Sopenharmony_ci * Stag lookup is based on its index part only (24 bits).
1862306a36Sopenharmony_ci * The code avoids special Stag of zero and tries to randomize
1962306a36Sopenharmony_ci * STag values between 1 and SIW_STAG_MAX_INDEX.
2062306a36Sopenharmony_ci */
2162306a36Sopenharmony_ciint siw_mem_add(struct siw_device *sdev, struct siw_mem *m)
2262306a36Sopenharmony_ci{
2362306a36Sopenharmony_ci	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
2462306a36Sopenharmony_ci	u32 id, next;
2562306a36Sopenharmony_ci
2662306a36Sopenharmony_ci	get_random_bytes(&next, 4);
2762306a36Sopenharmony_ci	next &= 0x00ffffff;
2862306a36Sopenharmony_ci
2962306a36Sopenharmony_ci	if (xa_alloc_cyclic(&sdev->mem_xa, &id, m, limit, &next,
3062306a36Sopenharmony_ci	    GFP_KERNEL) < 0)
3162306a36Sopenharmony_ci		return -ENOMEM;
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_ci	/* Set the STag index part */
3462306a36Sopenharmony_ci	m->stag = id << 8;
3562306a36Sopenharmony_ci
3662306a36Sopenharmony_ci	siw_dbg_mem(m, "new MEM object\n");
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci	return 0;
3962306a36Sopenharmony_ci}
4062306a36Sopenharmony_ci
4162306a36Sopenharmony_ci/*
4262306a36Sopenharmony_ci * siw_mem_id2obj()
4362306a36Sopenharmony_ci *
4462306a36Sopenharmony_ci * resolves memory from stag given by id. might be called from:
4562306a36Sopenharmony_ci * o process context before sending out of sgl, or
4662306a36Sopenharmony_ci * o in softirq when resolving target memory
4762306a36Sopenharmony_ci */
4862306a36Sopenharmony_cistruct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
4962306a36Sopenharmony_ci{
5062306a36Sopenharmony_ci	struct siw_mem *mem;
5162306a36Sopenharmony_ci
5262306a36Sopenharmony_ci	rcu_read_lock();
5362306a36Sopenharmony_ci	mem = xa_load(&sdev->mem_xa, stag_index);
5462306a36Sopenharmony_ci	if (likely(mem && kref_get_unless_zero(&mem->ref))) {
5562306a36Sopenharmony_ci		rcu_read_unlock();
5662306a36Sopenharmony_ci		return mem;
5762306a36Sopenharmony_ci	}
5862306a36Sopenharmony_ci	rcu_read_unlock();
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	return NULL;
6162306a36Sopenharmony_ci}
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_cistatic void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
6462306a36Sopenharmony_ci			   bool dirty)
6562306a36Sopenharmony_ci{
6662306a36Sopenharmony_ci	unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
6762306a36Sopenharmony_ci}
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_civoid siw_umem_release(struct siw_umem *umem, bool dirty)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	struct mm_struct *mm_s = umem->owning_mm;
7262306a36Sopenharmony_ci	int i, num_pages = umem->num_pages;
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_ci	for (i = 0; num_pages; i++) {
7562306a36Sopenharmony_ci		int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_ci		siw_free_plist(&umem->page_chunk[i], to_free,
7862306a36Sopenharmony_ci			       umem->writable && dirty);
7962306a36Sopenharmony_ci		kfree(umem->page_chunk[i].plist);
8062306a36Sopenharmony_ci		num_pages -= to_free;
8162306a36Sopenharmony_ci	}
8262306a36Sopenharmony_ci	atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	mmdrop(mm_s);
8562306a36Sopenharmony_ci	kfree(umem->page_chunk);
8662306a36Sopenharmony_ci	kfree(umem);
8762306a36Sopenharmony_ci}
8862306a36Sopenharmony_ci
8962306a36Sopenharmony_ciint siw_mr_add_mem(struct siw_mr *mr, struct ib_pd *pd, void *mem_obj,
9062306a36Sopenharmony_ci		   u64 start, u64 len, int rights)
9162306a36Sopenharmony_ci{
9262306a36Sopenharmony_ci	struct siw_device *sdev = to_siw_dev(pd->device);
9362306a36Sopenharmony_ci	struct siw_mem *mem = kzalloc(sizeof(*mem), GFP_KERNEL);
9462306a36Sopenharmony_ci	struct xa_limit limit = XA_LIMIT(1, 0x00ffffff);
9562306a36Sopenharmony_ci	u32 id, next;
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci	if (!mem)
9862306a36Sopenharmony_ci		return -ENOMEM;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	mem->mem_obj = mem_obj;
10162306a36Sopenharmony_ci	mem->stag_valid = 0;
10262306a36Sopenharmony_ci	mem->sdev = sdev;
10362306a36Sopenharmony_ci	mem->va = start;
10462306a36Sopenharmony_ci	mem->len = len;
10562306a36Sopenharmony_ci	mem->pd = pd;
10662306a36Sopenharmony_ci	mem->perms = rights & IWARP_ACCESS_MASK;
10762306a36Sopenharmony_ci	kref_init(&mem->ref);
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	get_random_bytes(&next, 4);
11062306a36Sopenharmony_ci	next &= 0x00ffffff;
11162306a36Sopenharmony_ci
11262306a36Sopenharmony_ci	if (xa_alloc_cyclic(&sdev->mem_xa, &id, mem, limit, &next,
11362306a36Sopenharmony_ci	    GFP_KERNEL) < 0) {
11462306a36Sopenharmony_ci		kfree(mem);
11562306a36Sopenharmony_ci		return -ENOMEM;
11662306a36Sopenharmony_ci	}
11762306a36Sopenharmony_ci
11862306a36Sopenharmony_ci	mr->mem = mem;
11962306a36Sopenharmony_ci	/* Set the STag index part */
12062306a36Sopenharmony_ci	mem->stag = id << 8;
12162306a36Sopenharmony_ci	mr->base_mr.lkey = mr->base_mr.rkey = mem->stag;
12262306a36Sopenharmony_ci
12362306a36Sopenharmony_ci	return 0;
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_civoid siw_mr_drop_mem(struct siw_mr *mr)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	struct siw_mem *mem = mr->mem, *found;
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ci	mem->stag_valid = 0;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	/* make STag invalid visible asap */
13362306a36Sopenharmony_ci	smp_mb();
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	found = xa_erase(&mem->sdev->mem_xa, mem->stag >> 8);
13662306a36Sopenharmony_ci	WARN_ON(found != mem);
13762306a36Sopenharmony_ci	siw_mem_put(mem);
13862306a36Sopenharmony_ci}
13962306a36Sopenharmony_ci
14062306a36Sopenharmony_civoid siw_free_mem(struct kref *ref)
14162306a36Sopenharmony_ci{
14262306a36Sopenharmony_ci	struct siw_mem *mem = container_of(ref, struct siw_mem, ref);
14362306a36Sopenharmony_ci
14462306a36Sopenharmony_ci	siw_dbg_mem(mem, "free mem, pbl: %s\n", mem->is_pbl ? "y" : "n");
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_ci	if (!mem->is_mw && mem->mem_obj) {
14762306a36Sopenharmony_ci		if (mem->is_pbl == 0)
14862306a36Sopenharmony_ci			siw_umem_release(mem->umem, true);
14962306a36Sopenharmony_ci		else
15062306a36Sopenharmony_ci			kfree(mem->pbl);
15162306a36Sopenharmony_ci	}
15262306a36Sopenharmony_ci	kfree(mem);
15362306a36Sopenharmony_ci}
15462306a36Sopenharmony_ci
15562306a36Sopenharmony_ci/*
15662306a36Sopenharmony_ci * siw_check_mem()
15762306a36Sopenharmony_ci *
15862306a36Sopenharmony_ci * Check protection domain, STAG state, access permissions and
15962306a36Sopenharmony_ci * address range for memory object.
16062306a36Sopenharmony_ci *
16162306a36Sopenharmony_ci * @pd:		Protection Domain memory should belong to
16262306a36Sopenharmony_ci * @mem:	memory to be checked
16362306a36Sopenharmony_ci * @addr:	starting addr of mem
16462306a36Sopenharmony_ci * @perms:	requested access permissions
16562306a36Sopenharmony_ci * @len:	len of memory interval to be checked
16662306a36Sopenharmony_ci *
16762306a36Sopenharmony_ci */
16862306a36Sopenharmony_ciint siw_check_mem(struct ib_pd *pd, struct siw_mem *mem, u64 addr,
16962306a36Sopenharmony_ci		  enum ib_access_flags perms, int len)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	if (!mem->stag_valid) {
17262306a36Sopenharmony_ci		siw_dbg_pd(pd, "STag 0x%08x invalid\n", mem->stag);
17362306a36Sopenharmony_ci		return -E_STAG_INVALID;
17462306a36Sopenharmony_ci	}
17562306a36Sopenharmony_ci	if (mem->pd != pd) {
17662306a36Sopenharmony_ci		siw_dbg_pd(pd, "STag 0x%08x: PD mismatch\n", mem->stag);
17762306a36Sopenharmony_ci		return -E_PD_MISMATCH;
17862306a36Sopenharmony_ci	}
17962306a36Sopenharmony_ci	/*
18062306a36Sopenharmony_ci	 * check access permissions
18162306a36Sopenharmony_ci	 */
18262306a36Sopenharmony_ci	if ((mem->perms & perms) < perms) {
18362306a36Sopenharmony_ci		siw_dbg_pd(pd, "permissions 0x%08x < 0x%08x\n",
18462306a36Sopenharmony_ci			   mem->perms, perms);
18562306a36Sopenharmony_ci		return -E_ACCESS_PERM;
18662306a36Sopenharmony_ci	}
18762306a36Sopenharmony_ci	/*
18862306a36Sopenharmony_ci	 * Check if access falls into valid memory interval.
18962306a36Sopenharmony_ci	 */
19062306a36Sopenharmony_ci	if (addr < mem->va || addr + len > mem->va + mem->len) {
19162306a36Sopenharmony_ci		siw_dbg_pd(pd, "MEM interval len %d\n", len);
19262306a36Sopenharmony_ci		siw_dbg_pd(pd, "[0x%pK, 0x%pK] out of bounds\n",
19362306a36Sopenharmony_ci			   (void *)(uintptr_t)addr,
19462306a36Sopenharmony_ci			   (void *)(uintptr_t)(addr + len));
19562306a36Sopenharmony_ci		siw_dbg_pd(pd, "[0x%pK, 0x%pK] STag=0x%08x\n",
19662306a36Sopenharmony_ci			   (void *)(uintptr_t)mem->va,
19762306a36Sopenharmony_ci			   (void *)(uintptr_t)(mem->va + mem->len),
19862306a36Sopenharmony_ci			   mem->stag);
19962306a36Sopenharmony_ci
20062306a36Sopenharmony_ci		return -E_BASE_BOUNDS;
20162306a36Sopenharmony_ci	}
20262306a36Sopenharmony_ci	return E_ACCESS_OK;
20362306a36Sopenharmony_ci}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci/*
20662306a36Sopenharmony_ci * siw_check_sge()
20762306a36Sopenharmony_ci *
20862306a36Sopenharmony_ci * Check SGE for access rights in given interval
20962306a36Sopenharmony_ci *
21062306a36Sopenharmony_ci * @pd:		Protection Domain memory should belong to
21162306a36Sopenharmony_ci * @sge:	SGE to be checked
21262306a36Sopenharmony_ci * @mem:	location of memory reference within array
21362306a36Sopenharmony_ci * @perms:	requested access permissions
21462306a36Sopenharmony_ci * @off:	starting offset in SGE
21562306a36Sopenharmony_ci * @len:	len of memory interval to be checked
21662306a36Sopenharmony_ci *
21762306a36Sopenharmony_ci * NOTE: Function references SGE's memory object (mem->obj)
21862306a36Sopenharmony_ci * if not yet done. New reference is kept if check went ok and
21962306a36Sopenharmony_ci * released if check failed. If mem->obj is already valid, no new
22062306a36Sopenharmony_ci * lookup is being done and mem is not released it check fails.
22162306a36Sopenharmony_ci */
22262306a36Sopenharmony_ciint siw_check_sge(struct ib_pd *pd, struct siw_sge *sge, struct siw_mem *mem[],
22362306a36Sopenharmony_ci		  enum ib_access_flags perms, u32 off, int len)
22462306a36Sopenharmony_ci{
22562306a36Sopenharmony_ci	struct siw_device *sdev = to_siw_dev(pd->device);
22662306a36Sopenharmony_ci	struct siw_mem *new = NULL;
22762306a36Sopenharmony_ci	int rv = E_ACCESS_OK;
22862306a36Sopenharmony_ci
22962306a36Sopenharmony_ci	if (len + off > sge->length) {
23062306a36Sopenharmony_ci		rv = -E_BASE_BOUNDS;
23162306a36Sopenharmony_ci		goto fail;
23262306a36Sopenharmony_ci	}
23362306a36Sopenharmony_ci	if (*mem == NULL) {
23462306a36Sopenharmony_ci		new = siw_mem_id2obj(sdev, sge->lkey >> 8);
23562306a36Sopenharmony_ci		if (unlikely(!new)) {
23662306a36Sopenharmony_ci			siw_dbg_pd(pd, "STag unknown: 0x%08x\n", sge->lkey);
23762306a36Sopenharmony_ci			rv = -E_STAG_INVALID;
23862306a36Sopenharmony_ci			goto fail;
23962306a36Sopenharmony_ci		}
24062306a36Sopenharmony_ci		*mem = new;
24162306a36Sopenharmony_ci	}
24262306a36Sopenharmony_ci	/* Check if user re-registered with different STag key */
24362306a36Sopenharmony_ci	if (unlikely((*mem)->stag != sge->lkey)) {
24462306a36Sopenharmony_ci		siw_dbg_mem((*mem), "STag mismatch: 0x%08x\n", sge->lkey);
24562306a36Sopenharmony_ci		rv = -E_STAG_INVALID;
24662306a36Sopenharmony_ci		goto fail;
24762306a36Sopenharmony_ci	}
24862306a36Sopenharmony_ci	rv = siw_check_mem(pd, *mem, sge->laddr + off, perms, len);
24962306a36Sopenharmony_ci	if (unlikely(rv))
25062306a36Sopenharmony_ci		goto fail;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	return 0;
25362306a36Sopenharmony_ci
25462306a36Sopenharmony_cifail:
25562306a36Sopenharmony_ci	if (new) {
25662306a36Sopenharmony_ci		*mem = NULL;
25762306a36Sopenharmony_ci		siw_mem_put(new);
25862306a36Sopenharmony_ci	}
25962306a36Sopenharmony_ci	return rv;
26062306a36Sopenharmony_ci}
26162306a36Sopenharmony_ci
26262306a36Sopenharmony_civoid siw_wqe_put_mem(struct siw_wqe *wqe, enum siw_opcode op)
26362306a36Sopenharmony_ci{
26462306a36Sopenharmony_ci	switch (op) {
26562306a36Sopenharmony_ci	case SIW_OP_SEND:
26662306a36Sopenharmony_ci	case SIW_OP_WRITE:
26762306a36Sopenharmony_ci	case SIW_OP_SEND_WITH_IMM:
26862306a36Sopenharmony_ci	case SIW_OP_SEND_REMOTE_INV:
26962306a36Sopenharmony_ci	case SIW_OP_READ:
27062306a36Sopenharmony_ci	case SIW_OP_READ_LOCAL_INV:
27162306a36Sopenharmony_ci		if (!(wqe->sqe.flags & SIW_WQE_INLINE))
27262306a36Sopenharmony_ci			siw_unref_mem_sgl(wqe->mem, wqe->sqe.num_sge);
27362306a36Sopenharmony_ci		break;
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	case SIW_OP_RECEIVE:
27662306a36Sopenharmony_ci		siw_unref_mem_sgl(wqe->mem, wqe->rqe.num_sge);
27762306a36Sopenharmony_ci		break;
27862306a36Sopenharmony_ci
27962306a36Sopenharmony_ci	case SIW_OP_READ_RESPONSE:
28062306a36Sopenharmony_ci		siw_unref_mem_sgl(wqe->mem, 1);
28162306a36Sopenharmony_ci		break;
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	default:
28462306a36Sopenharmony_ci		/*
28562306a36Sopenharmony_ci		 * SIW_OP_INVAL_STAG and SIW_OP_REG_MR
28662306a36Sopenharmony_ci		 * do not hold memory references
28762306a36Sopenharmony_ci		 */
28862306a36Sopenharmony_ci		break;
28962306a36Sopenharmony_ci	}
29062306a36Sopenharmony_ci}
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ciint siw_invalidate_stag(struct ib_pd *pd, u32 stag)
29362306a36Sopenharmony_ci{
29462306a36Sopenharmony_ci	struct siw_device *sdev = to_siw_dev(pd->device);
29562306a36Sopenharmony_ci	struct siw_mem *mem = siw_mem_id2obj(sdev, stag >> 8);
29662306a36Sopenharmony_ci	int rv = 0;
29762306a36Sopenharmony_ci
29862306a36Sopenharmony_ci	if (unlikely(!mem)) {
29962306a36Sopenharmony_ci		siw_dbg_pd(pd, "STag 0x%08x unknown\n", stag);
30062306a36Sopenharmony_ci		return -EINVAL;
30162306a36Sopenharmony_ci	}
30262306a36Sopenharmony_ci	if (unlikely(mem->pd != pd)) {
30362306a36Sopenharmony_ci		siw_dbg_pd(pd, "PD mismatch for STag 0x%08x\n", stag);
30462306a36Sopenharmony_ci		rv = -EACCES;
30562306a36Sopenharmony_ci		goto out;
30662306a36Sopenharmony_ci	}
30762306a36Sopenharmony_ci	/*
30862306a36Sopenharmony_ci	 * Per RDMA verbs definition, an STag may already be in invalid
30962306a36Sopenharmony_ci	 * state if invalidation is requested. So no state check here.
31062306a36Sopenharmony_ci	 */
31162306a36Sopenharmony_ci	mem->stag_valid = 0;
31262306a36Sopenharmony_ci
31362306a36Sopenharmony_ci	siw_dbg_pd(pd, "STag 0x%08x now invalid\n", stag);
31462306a36Sopenharmony_ciout:
31562306a36Sopenharmony_ci	siw_mem_put(mem);
31662306a36Sopenharmony_ci	return rv;
31762306a36Sopenharmony_ci}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci/*
32062306a36Sopenharmony_ci * Gets physical address backed by PBL element. Address is referenced
32162306a36Sopenharmony_ci * by linear byte offset into list of variably sized PB elements.
32262306a36Sopenharmony_ci * Optionally, provides remaining len within current element, and
32362306a36Sopenharmony_ci * current PBL index for later resume at same element.
32462306a36Sopenharmony_ci */
32562306a36Sopenharmony_cidma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx)
32662306a36Sopenharmony_ci{
32762306a36Sopenharmony_ci	int i = idx ? *idx : 0;
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	while (i < pbl->num_buf) {
33062306a36Sopenharmony_ci		struct siw_pble *pble = &pbl->pbe[i];
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci		if (pble->pbl_off + pble->size > off) {
33362306a36Sopenharmony_ci			u64 pble_off = off - pble->pbl_off;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci			if (len)
33662306a36Sopenharmony_ci				*len = pble->size - pble_off;
33762306a36Sopenharmony_ci			if (idx)
33862306a36Sopenharmony_ci				*idx = i;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci			return pble->addr + pble_off;
34162306a36Sopenharmony_ci		}
34262306a36Sopenharmony_ci		i++;
34362306a36Sopenharmony_ci	}
34462306a36Sopenharmony_ci	if (len)
34562306a36Sopenharmony_ci		*len = 0;
34662306a36Sopenharmony_ci	return 0;
34762306a36Sopenharmony_ci}
34862306a36Sopenharmony_ci
34962306a36Sopenharmony_cistruct siw_pbl *siw_pbl_alloc(u32 num_buf)
35062306a36Sopenharmony_ci{
35162306a36Sopenharmony_ci	struct siw_pbl *pbl;
35262306a36Sopenharmony_ci
35362306a36Sopenharmony_ci	if (num_buf == 0)
35462306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
35562306a36Sopenharmony_ci
35662306a36Sopenharmony_ci	pbl = kzalloc(struct_size(pbl, pbe, num_buf), GFP_KERNEL);
35762306a36Sopenharmony_ci	if (!pbl)
35862306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	pbl->max_buf = num_buf;
36162306a36Sopenharmony_ci
36262306a36Sopenharmony_ci	return pbl;
36362306a36Sopenharmony_ci}
36462306a36Sopenharmony_ci
36562306a36Sopenharmony_cistruct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
36662306a36Sopenharmony_ci{
36762306a36Sopenharmony_ci	struct siw_umem *umem;
36862306a36Sopenharmony_ci	struct mm_struct *mm_s;
36962306a36Sopenharmony_ci	u64 first_page_va;
37062306a36Sopenharmony_ci	unsigned long mlock_limit;
37162306a36Sopenharmony_ci	unsigned int foll_flags = FOLL_LONGTERM;
37262306a36Sopenharmony_ci	int num_pages, num_chunks, i, rv = 0;
37362306a36Sopenharmony_ci
37462306a36Sopenharmony_ci	if (!can_do_mlock())
37562306a36Sopenharmony_ci		return ERR_PTR(-EPERM);
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	if (!len)
37862306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
37962306a36Sopenharmony_ci
38062306a36Sopenharmony_ci	first_page_va = start & PAGE_MASK;
38162306a36Sopenharmony_ci	num_pages = PAGE_ALIGN(start + len - first_page_va) >> PAGE_SHIFT;
38262306a36Sopenharmony_ci	num_chunks = (num_pages >> CHUNK_SHIFT) + 1;
38362306a36Sopenharmony_ci
38462306a36Sopenharmony_ci	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
38562306a36Sopenharmony_ci	if (!umem)
38662306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
38762306a36Sopenharmony_ci
38862306a36Sopenharmony_ci	mm_s = current->mm;
38962306a36Sopenharmony_ci	umem->owning_mm = mm_s;
39062306a36Sopenharmony_ci	umem->writable = writable;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	mmgrab(mm_s);
39362306a36Sopenharmony_ci
39462306a36Sopenharmony_ci	if (writable)
39562306a36Sopenharmony_ci		foll_flags |= FOLL_WRITE;
39662306a36Sopenharmony_ci
39762306a36Sopenharmony_ci	mmap_read_lock(mm_s);
39862306a36Sopenharmony_ci
39962306a36Sopenharmony_ci	mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
40262306a36Sopenharmony_ci		rv = -ENOMEM;
40362306a36Sopenharmony_ci		goto out_sem_up;
40462306a36Sopenharmony_ci	}
40562306a36Sopenharmony_ci	umem->fp_addr = first_page_va;
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_ci	umem->page_chunk =
40862306a36Sopenharmony_ci		kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
40962306a36Sopenharmony_ci	if (!umem->page_chunk) {
41062306a36Sopenharmony_ci		rv = -ENOMEM;
41162306a36Sopenharmony_ci		goto out_sem_up;
41262306a36Sopenharmony_ci	}
41362306a36Sopenharmony_ci	for (i = 0; num_pages; i++) {
41462306a36Sopenharmony_ci		int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
41562306a36Sopenharmony_ci		struct page **plist =
41662306a36Sopenharmony_ci			kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
41762306a36Sopenharmony_ci
41862306a36Sopenharmony_ci		if (!plist) {
41962306a36Sopenharmony_ci			rv = -ENOMEM;
42062306a36Sopenharmony_ci			goto out_sem_up;
42162306a36Sopenharmony_ci		}
42262306a36Sopenharmony_ci		umem->page_chunk[i].plist = plist;
42362306a36Sopenharmony_ci		while (nents) {
42462306a36Sopenharmony_ci			rv = pin_user_pages(first_page_va, nents, foll_flags,
42562306a36Sopenharmony_ci					    plist);
42662306a36Sopenharmony_ci			if (rv < 0)
42762306a36Sopenharmony_ci				goto out_sem_up;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci			umem->num_pages += rv;
43062306a36Sopenharmony_ci			first_page_va += rv * PAGE_SIZE;
43162306a36Sopenharmony_ci			plist += rv;
43262306a36Sopenharmony_ci			nents -= rv;
43362306a36Sopenharmony_ci			num_pages -= rv;
43462306a36Sopenharmony_ci		}
43562306a36Sopenharmony_ci	}
43662306a36Sopenharmony_ciout_sem_up:
43762306a36Sopenharmony_ci	mmap_read_unlock(mm_s);
43862306a36Sopenharmony_ci
43962306a36Sopenharmony_ci	if (rv > 0)
44062306a36Sopenharmony_ci		return umem;
44162306a36Sopenharmony_ci
44262306a36Sopenharmony_ci	/* Adjust accounting for pages not pinned */
44362306a36Sopenharmony_ci	if (num_pages)
44462306a36Sopenharmony_ci		atomic64_sub(num_pages, &mm_s->pinned_vm);
44562306a36Sopenharmony_ci
44662306a36Sopenharmony_ci	siw_umem_release(umem, false);
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci	return ERR_PTR(rv);
44962306a36Sopenharmony_ci}
450