162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* XDP user-space packet buffer
362306a36Sopenharmony_ci * Copyright(c) 2018 Intel Corporation.
462306a36Sopenharmony_ci */
562306a36Sopenharmony_ci
662306a36Sopenharmony_ci#include <linux/init.h>
762306a36Sopenharmony_ci#include <linux/sched/mm.h>
862306a36Sopenharmony_ci#include <linux/sched/signal.h>
962306a36Sopenharmony_ci#include <linux/sched/task.h>
1062306a36Sopenharmony_ci#include <linux/uaccess.h>
1162306a36Sopenharmony_ci#include <linux/slab.h>
1262306a36Sopenharmony_ci#include <linux/bpf.h>
1362306a36Sopenharmony_ci#include <linux/mm.h>
1462306a36Sopenharmony_ci#include <linux/netdevice.h>
1562306a36Sopenharmony_ci#include <linux/rtnetlink.h>
1662306a36Sopenharmony_ci#include <linux/idr.h>
1762306a36Sopenharmony_ci#include <linux/vmalloc.h>
1862306a36Sopenharmony_ci
1962306a36Sopenharmony_ci#include "xdp_umem.h"
2062306a36Sopenharmony_ci#include "xsk_queue.h"
2162306a36Sopenharmony_ci
2262306a36Sopenharmony_cistatic DEFINE_IDA(umem_ida);
2362306a36Sopenharmony_ci
2462306a36Sopenharmony_cistatic void xdp_umem_unpin_pages(struct xdp_umem *umem)
2562306a36Sopenharmony_ci{
2662306a36Sopenharmony_ci	unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci	kvfree(umem->pgs);
2962306a36Sopenharmony_ci	umem->pgs = NULL;
3062306a36Sopenharmony_ci}
3162306a36Sopenharmony_ci
3262306a36Sopenharmony_cistatic void xdp_umem_unaccount_pages(struct xdp_umem *umem)
3362306a36Sopenharmony_ci{
3462306a36Sopenharmony_ci	if (umem->user) {
3562306a36Sopenharmony_ci		atomic_long_sub(umem->npgs, &umem->user->locked_vm);
3662306a36Sopenharmony_ci		free_uid(umem->user);
3762306a36Sopenharmony_ci	}
3862306a36Sopenharmony_ci}
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_cistatic void xdp_umem_addr_unmap(struct xdp_umem *umem)
4162306a36Sopenharmony_ci{
4262306a36Sopenharmony_ci	vunmap(umem->addrs);
4362306a36Sopenharmony_ci	umem->addrs = NULL;
4462306a36Sopenharmony_ci}
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistatic int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages,
4762306a36Sopenharmony_ci			     u32 nr_pages)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
5062306a36Sopenharmony_ci	if (!umem->addrs)
5162306a36Sopenharmony_ci		return -ENOMEM;
5262306a36Sopenharmony_ci	return 0;
5362306a36Sopenharmony_ci}
5462306a36Sopenharmony_ci
5562306a36Sopenharmony_cistatic void xdp_umem_release(struct xdp_umem *umem)
5662306a36Sopenharmony_ci{
5762306a36Sopenharmony_ci	umem->zc = false;
5862306a36Sopenharmony_ci	ida_free(&umem_ida, umem->id);
5962306a36Sopenharmony_ci
6062306a36Sopenharmony_ci	xdp_umem_addr_unmap(umem);
6162306a36Sopenharmony_ci	xdp_umem_unpin_pages(umem);
6262306a36Sopenharmony_ci
6362306a36Sopenharmony_ci	xdp_umem_unaccount_pages(umem);
6462306a36Sopenharmony_ci	kfree(umem);
6562306a36Sopenharmony_ci}
6662306a36Sopenharmony_ci
6762306a36Sopenharmony_cistatic void xdp_umem_release_deferred(struct work_struct *work)
6862306a36Sopenharmony_ci{
6962306a36Sopenharmony_ci	struct xdp_umem *umem = container_of(work, struct xdp_umem, work);
7062306a36Sopenharmony_ci
7162306a36Sopenharmony_ci	xdp_umem_release(umem);
7262306a36Sopenharmony_ci}
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_civoid xdp_get_umem(struct xdp_umem *umem)
7562306a36Sopenharmony_ci{
7662306a36Sopenharmony_ci	refcount_inc(&umem->users);
7762306a36Sopenharmony_ci}
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_civoid xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup)
8062306a36Sopenharmony_ci{
8162306a36Sopenharmony_ci	if (!umem)
8262306a36Sopenharmony_ci		return;
8362306a36Sopenharmony_ci
8462306a36Sopenharmony_ci	if (refcount_dec_and_test(&umem->users)) {
8562306a36Sopenharmony_ci		if (defer_cleanup) {
8662306a36Sopenharmony_ci			INIT_WORK(&umem->work, xdp_umem_release_deferred);
8762306a36Sopenharmony_ci			schedule_work(&umem->work);
8862306a36Sopenharmony_ci		} else {
8962306a36Sopenharmony_ci			xdp_umem_release(umem);
9062306a36Sopenharmony_ci		}
9162306a36Sopenharmony_ci	}
9262306a36Sopenharmony_ci}
9362306a36Sopenharmony_ci
9462306a36Sopenharmony_cistatic int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
9562306a36Sopenharmony_ci{
9662306a36Sopenharmony_ci	unsigned int gup_flags = FOLL_WRITE;
9762306a36Sopenharmony_ci	long npgs;
9862306a36Sopenharmony_ci	int err;
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN);
10162306a36Sopenharmony_ci	if (!umem->pgs)
10262306a36Sopenharmony_ci		return -ENOMEM;
10362306a36Sopenharmony_ci
10462306a36Sopenharmony_ci	mmap_read_lock(current->mm);
10562306a36Sopenharmony_ci	npgs = pin_user_pages(address, umem->npgs,
10662306a36Sopenharmony_ci			      gup_flags | FOLL_LONGTERM, &umem->pgs[0]);
10762306a36Sopenharmony_ci	mmap_read_unlock(current->mm);
10862306a36Sopenharmony_ci
10962306a36Sopenharmony_ci	if (npgs != umem->npgs) {
11062306a36Sopenharmony_ci		if (npgs >= 0) {
11162306a36Sopenharmony_ci			umem->npgs = npgs;
11262306a36Sopenharmony_ci			err = -ENOMEM;
11362306a36Sopenharmony_ci			goto out_pin;
11462306a36Sopenharmony_ci		}
11562306a36Sopenharmony_ci		err = npgs;
11662306a36Sopenharmony_ci		goto out_pgs;
11762306a36Sopenharmony_ci	}
11862306a36Sopenharmony_ci	return 0;
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ciout_pin:
12162306a36Sopenharmony_ci	xdp_umem_unpin_pages(umem);
12262306a36Sopenharmony_ciout_pgs:
12362306a36Sopenharmony_ci	kvfree(umem->pgs);
12462306a36Sopenharmony_ci	umem->pgs = NULL;
12562306a36Sopenharmony_ci	return err;
12662306a36Sopenharmony_ci}
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_cistatic int xdp_umem_account_pages(struct xdp_umem *umem)
12962306a36Sopenharmony_ci{
13062306a36Sopenharmony_ci	unsigned long lock_limit, new_npgs, old_npgs;
13162306a36Sopenharmony_ci
13262306a36Sopenharmony_ci	if (capable(CAP_IPC_LOCK))
13362306a36Sopenharmony_ci		return 0;
13462306a36Sopenharmony_ci
13562306a36Sopenharmony_ci	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
13662306a36Sopenharmony_ci	umem->user = get_uid(current_user());
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	do {
13962306a36Sopenharmony_ci		old_npgs = atomic_long_read(&umem->user->locked_vm);
14062306a36Sopenharmony_ci		new_npgs = old_npgs + umem->npgs;
14162306a36Sopenharmony_ci		if (new_npgs > lock_limit) {
14262306a36Sopenharmony_ci			free_uid(umem->user);
14362306a36Sopenharmony_ci			umem->user = NULL;
14462306a36Sopenharmony_ci			return -ENOBUFS;
14562306a36Sopenharmony_ci		}
14662306a36Sopenharmony_ci	} while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs,
14762306a36Sopenharmony_ci				     new_npgs) != old_npgs);
14862306a36Sopenharmony_ci	return 0;
14962306a36Sopenharmony_ci}
15062306a36Sopenharmony_ci
15162306a36Sopenharmony_cistatic int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
15262306a36Sopenharmony_ci{
15362306a36Sopenharmony_ci	bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG;
15462306a36Sopenharmony_ci	u32 chunk_size = mr->chunk_size, headroom = mr->headroom;
15562306a36Sopenharmony_ci	u64 addr = mr->addr, size = mr->len;
15662306a36Sopenharmony_ci	u32 chunks_rem, npgs_rem;
15762306a36Sopenharmony_ci	u64 chunks, npgs;
15862306a36Sopenharmony_ci	int err;
15962306a36Sopenharmony_ci
16062306a36Sopenharmony_ci	if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) {
16162306a36Sopenharmony_ci		/* Strictly speaking we could support this, if:
16262306a36Sopenharmony_ci		 * - huge pages, or*
16362306a36Sopenharmony_ci		 * - using an IOMMU, or
16462306a36Sopenharmony_ci		 * - making sure the memory area is consecutive
16562306a36Sopenharmony_ci		 * but for now, we simply say "computer says no".
16662306a36Sopenharmony_ci		 */
16762306a36Sopenharmony_ci		return -EINVAL;
16862306a36Sopenharmony_ci	}
16962306a36Sopenharmony_ci
17062306a36Sopenharmony_ci	if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG)
17162306a36Sopenharmony_ci		return -EINVAL;
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_ci	if (!unaligned_chunks && !is_power_of_2(chunk_size))
17462306a36Sopenharmony_ci		return -EINVAL;
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci	if (!PAGE_ALIGNED(addr)) {
17762306a36Sopenharmony_ci		/* Memory area has to be page size aligned. For
17862306a36Sopenharmony_ci		 * simplicity, this might change.
17962306a36Sopenharmony_ci		 */
18062306a36Sopenharmony_ci		return -EINVAL;
18162306a36Sopenharmony_ci	}
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci	if ((addr + size) < addr)
18462306a36Sopenharmony_ci		return -EINVAL;
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci	npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem);
18762306a36Sopenharmony_ci	if (npgs_rem)
18862306a36Sopenharmony_ci		npgs++;
18962306a36Sopenharmony_ci	if (npgs > U32_MAX)
19062306a36Sopenharmony_ci		return -EINVAL;
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	chunks = div_u64_rem(size, chunk_size, &chunks_rem);
19362306a36Sopenharmony_ci	if (!chunks || chunks > U32_MAX)
19462306a36Sopenharmony_ci		return -EINVAL;
19562306a36Sopenharmony_ci
19662306a36Sopenharmony_ci	if (!unaligned_chunks && chunks_rem)
19762306a36Sopenharmony_ci		return -EINVAL;
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
20062306a36Sopenharmony_ci		return -EINVAL;
20162306a36Sopenharmony_ci
20262306a36Sopenharmony_ci	umem->size = size;
20362306a36Sopenharmony_ci	umem->headroom = headroom;
20462306a36Sopenharmony_ci	umem->chunk_size = chunk_size;
20562306a36Sopenharmony_ci	umem->chunks = chunks;
20662306a36Sopenharmony_ci	umem->npgs = npgs;
20762306a36Sopenharmony_ci	umem->pgs = NULL;
20862306a36Sopenharmony_ci	umem->user = NULL;
20962306a36Sopenharmony_ci	umem->flags = mr->flags;
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci	INIT_LIST_HEAD(&umem->xsk_dma_list);
21262306a36Sopenharmony_ci	refcount_set(&umem->users, 1);
21362306a36Sopenharmony_ci
21462306a36Sopenharmony_ci	err = xdp_umem_account_pages(umem);
21562306a36Sopenharmony_ci	if (err)
21662306a36Sopenharmony_ci		return err;
21762306a36Sopenharmony_ci
21862306a36Sopenharmony_ci	err = xdp_umem_pin_pages(umem, (unsigned long)addr);
21962306a36Sopenharmony_ci	if (err)
22062306a36Sopenharmony_ci		goto out_account;
22162306a36Sopenharmony_ci
22262306a36Sopenharmony_ci	err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs);
22362306a36Sopenharmony_ci	if (err)
22462306a36Sopenharmony_ci		goto out_unpin;
22562306a36Sopenharmony_ci
22662306a36Sopenharmony_ci	return 0;
22762306a36Sopenharmony_ci
22862306a36Sopenharmony_ciout_unpin:
22962306a36Sopenharmony_ci	xdp_umem_unpin_pages(umem);
23062306a36Sopenharmony_ciout_account:
23162306a36Sopenharmony_ci	xdp_umem_unaccount_pages(umem);
23262306a36Sopenharmony_ci	return err;
23362306a36Sopenharmony_ci}
23462306a36Sopenharmony_ci
23562306a36Sopenharmony_cistruct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr)
23662306a36Sopenharmony_ci{
23762306a36Sopenharmony_ci	struct xdp_umem *umem;
23862306a36Sopenharmony_ci	int err;
23962306a36Sopenharmony_ci
24062306a36Sopenharmony_ci	umem = kzalloc(sizeof(*umem), GFP_KERNEL);
24162306a36Sopenharmony_ci	if (!umem)
24262306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
24362306a36Sopenharmony_ci
24462306a36Sopenharmony_ci	err = ida_alloc(&umem_ida, GFP_KERNEL);
24562306a36Sopenharmony_ci	if (err < 0) {
24662306a36Sopenharmony_ci		kfree(umem);
24762306a36Sopenharmony_ci		return ERR_PTR(err);
24862306a36Sopenharmony_ci	}
24962306a36Sopenharmony_ci	umem->id = err;
25062306a36Sopenharmony_ci
25162306a36Sopenharmony_ci	err = xdp_umem_reg(umem, mr);
25262306a36Sopenharmony_ci	if (err) {
25362306a36Sopenharmony_ci		ida_free(&umem_ida, umem->id);
25462306a36Sopenharmony_ci		kfree(umem);
25562306a36Sopenharmony_ci		return ERR_PTR(err);
25662306a36Sopenharmony_ci	}
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	return umem;
25962306a36Sopenharmony_ci}
260