162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* XDP user-space packet buffer 362306a36Sopenharmony_ci * Copyright(c) 2018 Intel Corporation. 462306a36Sopenharmony_ci */ 562306a36Sopenharmony_ci 662306a36Sopenharmony_ci#include <linux/init.h> 762306a36Sopenharmony_ci#include <linux/sched/mm.h> 862306a36Sopenharmony_ci#include <linux/sched/signal.h> 962306a36Sopenharmony_ci#include <linux/sched/task.h> 1062306a36Sopenharmony_ci#include <linux/uaccess.h> 1162306a36Sopenharmony_ci#include <linux/slab.h> 1262306a36Sopenharmony_ci#include <linux/bpf.h> 1362306a36Sopenharmony_ci#include <linux/mm.h> 1462306a36Sopenharmony_ci#include <linux/netdevice.h> 1562306a36Sopenharmony_ci#include <linux/rtnetlink.h> 1662306a36Sopenharmony_ci#include <linux/idr.h> 1762306a36Sopenharmony_ci#include <linux/vmalloc.h> 1862306a36Sopenharmony_ci 1962306a36Sopenharmony_ci#include "xdp_umem.h" 2062306a36Sopenharmony_ci#include "xsk_queue.h" 2162306a36Sopenharmony_ci 2262306a36Sopenharmony_cistatic DEFINE_IDA(umem_ida); 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic void xdp_umem_unpin_pages(struct xdp_umem *umem) 2562306a36Sopenharmony_ci{ 2662306a36Sopenharmony_ci unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci kvfree(umem->pgs); 2962306a36Sopenharmony_ci umem->pgs = NULL; 3062306a36Sopenharmony_ci} 3162306a36Sopenharmony_ci 3262306a36Sopenharmony_cistatic void xdp_umem_unaccount_pages(struct xdp_umem *umem) 3362306a36Sopenharmony_ci{ 3462306a36Sopenharmony_ci if (umem->user) { 3562306a36Sopenharmony_ci atomic_long_sub(umem->npgs, &umem->user->locked_vm); 3662306a36Sopenharmony_ci free_uid(umem->user); 3762306a36Sopenharmony_ci } 3862306a36Sopenharmony_ci} 3962306a36Sopenharmony_ci 4062306a36Sopenharmony_cistatic void xdp_umem_addr_unmap(struct xdp_umem *umem) 4162306a36Sopenharmony_ci{ 4262306a36Sopenharmony_ci vunmap(umem->addrs); 4362306a36Sopenharmony_ci umem->addrs = NULL; 4462306a36Sopenharmony_ci} 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 4762306a36Sopenharmony_ci u32 nr_pages) 4862306a36Sopenharmony_ci{ 4962306a36Sopenharmony_ci umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 5062306a36Sopenharmony_ci if (!umem->addrs) 5162306a36Sopenharmony_ci return -ENOMEM; 5262306a36Sopenharmony_ci return 0; 5362306a36Sopenharmony_ci} 5462306a36Sopenharmony_ci 5562306a36Sopenharmony_cistatic void xdp_umem_release(struct xdp_umem *umem) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci umem->zc = false; 5862306a36Sopenharmony_ci ida_free(&umem_ida, umem->id); 5962306a36Sopenharmony_ci 6062306a36Sopenharmony_ci xdp_umem_addr_unmap(umem); 6162306a36Sopenharmony_ci xdp_umem_unpin_pages(umem); 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci xdp_umem_unaccount_pages(umem); 6462306a36Sopenharmony_ci kfree(umem); 6562306a36Sopenharmony_ci} 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_cistatic void xdp_umem_release_deferred(struct work_struct *work) 6862306a36Sopenharmony_ci{ 6962306a36Sopenharmony_ci struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci xdp_umem_release(umem); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_civoid xdp_get_umem(struct xdp_umem *umem) 7562306a36Sopenharmony_ci{ 7662306a36Sopenharmony_ci refcount_inc(&umem->users); 7762306a36Sopenharmony_ci} 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_civoid xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 8062306a36Sopenharmony_ci{ 8162306a36Sopenharmony_ci if (!umem) 8262306a36Sopenharmony_ci return; 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci if (refcount_dec_and_test(&umem->users)) { 8562306a36Sopenharmony_ci if (defer_cleanup) { 8662306a36Sopenharmony_ci INIT_WORK(&umem->work, xdp_umem_release_deferred); 8762306a36Sopenharmony_ci schedule_work(&umem->work); 8862306a36Sopenharmony_ci } else { 8962306a36Sopenharmony_ci xdp_umem_release(umem); 9062306a36Sopenharmony_ci } 9162306a36Sopenharmony_ci } 9262306a36Sopenharmony_ci} 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_cistatic int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 9562306a36Sopenharmony_ci{ 9662306a36Sopenharmony_ci unsigned int gup_flags = FOLL_WRITE; 9762306a36Sopenharmony_ci long npgs; 9862306a36Sopenharmony_ci int err; 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci umem->pgs = kvcalloc(umem->npgs, sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); 10162306a36Sopenharmony_ci if (!umem->pgs) 10262306a36Sopenharmony_ci return -ENOMEM; 10362306a36Sopenharmony_ci 10462306a36Sopenharmony_ci mmap_read_lock(current->mm); 10562306a36Sopenharmony_ci npgs = pin_user_pages(address, umem->npgs, 10662306a36Sopenharmony_ci gup_flags | FOLL_LONGTERM, &umem->pgs[0]); 10762306a36Sopenharmony_ci mmap_read_unlock(current->mm); 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci if (npgs != umem->npgs) { 11062306a36Sopenharmony_ci if (npgs >= 0) { 11162306a36Sopenharmony_ci umem->npgs = npgs; 11262306a36Sopenharmony_ci err = -ENOMEM; 11362306a36Sopenharmony_ci goto out_pin; 11462306a36Sopenharmony_ci } 11562306a36Sopenharmony_ci err = npgs; 11662306a36Sopenharmony_ci goto out_pgs; 11762306a36Sopenharmony_ci } 11862306a36Sopenharmony_ci return 0; 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ciout_pin: 12162306a36Sopenharmony_ci xdp_umem_unpin_pages(umem); 12262306a36Sopenharmony_ciout_pgs: 12362306a36Sopenharmony_ci kvfree(umem->pgs); 12462306a36Sopenharmony_ci umem->pgs = NULL; 12562306a36Sopenharmony_ci return err; 12662306a36Sopenharmony_ci} 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_cistatic int xdp_umem_account_pages(struct xdp_umem *umem) 12962306a36Sopenharmony_ci{ 13062306a36Sopenharmony_ci unsigned long lock_limit, new_npgs, old_npgs; 13162306a36Sopenharmony_ci 13262306a36Sopenharmony_ci if (capable(CAP_IPC_LOCK)) 13362306a36Sopenharmony_ci return 0; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 13662306a36Sopenharmony_ci umem->user = get_uid(current_user()); 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci do { 13962306a36Sopenharmony_ci old_npgs = atomic_long_read(&umem->user->locked_vm); 14062306a36Sopenharmony_ci new_npgs = old_npgs + umem->npgs; 14162306a36Sopenharmony_ci if (new_npgs > lock_limit) { 14262306a36Sopenharmony_ci free_uid(umem->user); 14362306a36Sopenharmony_ci umem->user = NULL; 14462306a36Sopenharmony_ci return -ENOBUFS; 14562306a36Sopenharmony_ci } 14662306a36Sopenharmony_ci } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 14762306a36Sopenharmony_ci new_npgs) != old_npgs); 14862306a36Sopenharmony_ci return 0; 14962306a36Sopenharmony_ci} 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_cistatic int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 15262306a36Sopenharmony_ci{ 15362306a36Sopenharmony_ci bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 15462306a36Sopenharmony_ci u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 15562306a36Sopenharmony_ci u64 addr = mr->addr, size = mr->len; 15662306a36Sopenharmony_ci u32 chunks_rem, npgs_rem; 15762306a36Sopenharmony_ci u64 chunks, npgs; 15862306a36Sopenharmony_ci int err; 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 16162306a36Sopenharmony_ci /* Strictly speaking we could support this, if: 16262306a36Sopenharmony_ci * - huge pages, or* 16362306a36Sopenharmony_ci * - using an IOMMU, or 16462306a36Sopenharmony_ci * - making sure the memory area is consecutive 16562306a36Sopenharmony_ci * but for now, we simply say "computer says no". 16662306a36Sopenharmony_ci */ 16762306a36Sopenharmony_ci return -EINVAL; 16862306a36Sopenharmony_ci } 16962306a36Sopenharmony_ci 17062306a36Sopenharmony_ci if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 17162306a36Sopenharmony_ci return -EINVAL; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_ci if (!unaligned_chunks && !is_power_of_2(chunk_size)) 17462306a36Sopenharmony_ci return -EINVAL; 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci if (!PAGE_ALIGNED(addr)) { 17762306a36Sopenharmony_ci /* Memory area has to be page size aligned. For 17862306a36Sopenharmony_ci * simplicity, this might change. 17962306a36Sopenharmony_ci */ 18062306a36Sopenharmony_ci return -EINVAL; 18162306a36Sopenharmony_ci } 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci if ((addr + size) < addr) 18462306a36Sopenharmony_ci return -EINVAL; 18562306a36Sopenharmony_ci 18662306a36Sopenharmony_ci npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 18762306a36Sopenharmony_ci if (npgs_rem) 18862306a36Sopenharmony_ci npgs++; 18962306a36Sopenharmony_ci if (npgs > U32_MAX) 19062306a36Sopenharmony_ci return -EINVAL; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci chunks = div_u64_rem(size, chunk_size, &chunks_rem); 19362306a36Sopenharmony_ci if (!chunks || chunks > U32_MAX) 19462306a36Sopenharmony_ci return -EINVAL; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci if (!unaligned_chunks && chunks_rem) 19762306a36Sopenharmony_ci return -EINVAL; 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 20062306a36Sopenharmony_ci return -EINVAL; 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci umem->size = size; 20362306a36Sopenharmony_ci umem->headroom = headroom; 20462306a36Sopenharmony_ci umem->chunk_size = chunk_size; 20562306a36Sopenharmony_ci umem->chunks = chunks; 20662306a36Sopenharmony_ci umem->npgs = npgs; 20762306a36Sopenharmony_ci umem->pgs = NULL; 20862306a36Sopenharmony_ci umem->user = NULL; 20962306a36Sopenharmony_ci umem->flags = mr->flags; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_ci INIT_LIST_HEAD(&umem->xsk_dma_list); 21262306a36Sopenharmony_ci refcount_set(&umem->users, 1); 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci err = xdp_umem_account_pages(umem); 21562306a36Sopenharmony_ci if (err) 21662306a36Sopenharmony_ci return err; 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci err = xdp_umem_pin_pages(umem, (unsigned long)addr); 21962306a36Sopenharmony_ci if (err) 22062306a36Sopenharmony_ci goto out_account; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 22362306a36Sopenharmony_ci if (err) 22462306a36Sopenharmony_ci goto out_unpin; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci return 0; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ciout_unpin: 22962306a36Sopenharmony_ci xdp_umem_unpin_pages(umem); 23062306a36Sopenharmony_ciout_account: 23162306a36Sopenharmony_ci xdp_umem_unaccount_pages(umem); 23262306a36Sopenharmony_ci return err; 23362306a36Sopenharmony_ci} 23462306a36Sopenharmony_ci 23562306a36Sopenharmony_cistruct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 23662306a36Sopenharmony_ci{ 23762306a36Sopenharmony_ci struct xdp_umem *umem; 23862306a36Sopenharmony_ci int err; 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci umem = kzalloc(sizeof(*umem), GFP_KERNEL); 24162306a36Sopenharmony_ci if (!umem) 24262306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 24362306a36Sopenharmony_ci 24462306a36Sopenharmony_ci err = ida_alloc(&umem_ida, GFP_KERNEL); 24562306a36Sopenharmony_ci if (err < 0) { 24662306a36Sopenharmony_ci kfree(umem); 24762306a36Sopenharmony_ci return ERR_PTR(err); 24862306a36Sopenharmony_ci } 24962306a36Sopenharmony_ci umem->id = err; 25062306a36Sopenharmony_ci 25162306a36Sopenharmony_ci err = xdp_umem_reg(umem, mr); 25262306a36Sopenharmony_ci if (err) { 25362306a36Sopenharmony_ci ida_free(&umem_ida, umem->id); 25462306a36Sopenharmony_ci kfree(umem); 25562306a36Sopenharmony_ci return ERR_PTR(err); 25662306a36Sopenharmony_ci } 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci return umem; 25962306a36Sopenharmony_ci} 260