1// SPDX-License-Identifier: GPL-2.0 2/* XDP user-space packet buffer 3 * Copyright(c) 2018 Intel Corporation. 4 */ 5 6#include <linux/init.h> 7#include <linux/sched/mm.h> 8#include <linux/sched/signal.h> 9#include <linux/sched/task.h> 10#include <linux/uaccess.h> 11#include <linux/slab.h> 12#include <linux/bpf.h> 13#include <linux/mm.h> 14#include <linux/netdevice.h> 15#include <linux/rtnetlink.h> 16#include <linux/idr.h> 17#include <linux/vmalloc.h> 18 19#include "xdp_umem.h" 20#include "xsk_queue.h" 21 22#define XDP_UMEM_MIN_CHUNK_SIZE 2048 23 24static DEFINE_IDA(umem_ida); 25 26static void xdp_umem_unpin_pages(struct xdp_umem *umem) 27{ 28 unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true); 29 30 kfree(umem->pgs); 31 umem->pgs = NULL; 32} 33 34static void xdp_umem_unaccount_pages(struct xdp_umem *umem) 35{ 36 if (umem->user) { 37 atomic_long_sub(umem->npgs, &umem->user->locked_vm); 38 free_uid(umem->user); 39 } 40} 41 42static void xdp_umem_addr_unmap(struct xdp_umem *umem) 43{ 44 vunmap(umem->addrs); 45 umem->addrs = NULL; 46} 47 48static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, 49 u32 nr_pages) 50{ 51 umem->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL); 52 if (!umem->addrs) 53 return -ENOMEM; 54 return 0; 55} 56 57static void xdp_umem_release(struct xdp_umem *umem) 58{ 59 umem->zc = false; 60 ida_simple_remove(&umem_ida, umem->id); 61 62 xdp_umem_addr_unmap(umem); 63 xdp_umem_unpin_pages(umem); 64 65 xdp_umem_unaccount_pages(umem); 66 kfree(umem); 67} 68 69static void xdp_umem_release_deferred(struct work_struct *work) 70{ 71 struct xdp_umem *umem = container_of(work, struct xdp_umem, work); 72 73 xdp_umem_release(umem); 74} 75 76void xdp_get_umem(struct xdp_umem *umem) 77{ 78 refcount_inc(&umem->users); 79} 80 81void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) 82{ 83 if (!umem) 84 return; 85 86 if (refcount_dec_and_test(&umem->users)) { 87 if (defer_cleanup) { 88 INIT_WORK(&umem->work, xdp_umem_release_deferred); 89 schedule_work(&umem->work); 90 } else { 91 xdp_umem_release(umem); 92 } 93 } 94} 95 96static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) 97{ 98 unsigned int gup_flags = FOLL_WRITE; 99 long npgs; 100 int err; 101 102 umem->pgs = kcalloc(umem->npgs, sizeof(*umem->pgs), 103 GFP_KERNEL | __GFP_NOWARN); 104 if (!umem->pgs) 105 return -ENOMEM; 106 107 mmap_read_lock(current->mm); 108 npgs = pin_user_pages(address, umem->npgs, 109 gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL); 110 mmap_read_unlock(current->mm); 111 112 if (npgs != umem->npgs) { 113 if (npgs >= 0) { 114 umem->npgs = npgs; 115 err = -ENOMEM; 116 goto out_pin; 117 } 118 err = npgs; 119 goto out_pgs; 120 } 121 return 0; 122 123out_pin: 124 xdp_umem_unpin_pages(umem); 125out_pgs: 126 kfree(umem->pgs); 127 umem->pgs = NULL; 128 return err; 129} 130 131static int xdp_umem_account_pages(struct xdp_umem *umem) 132{ 133 unsigned long lock_limit, new_npgs, old_npgs; 134 135 if (capable(CAP_IPC_LOCK)) 136 return 0; 137 138 lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 139 umem->user = get_uid(current_user()); 140 141 do { 142 old_npgs = atomic_long_read(&umem->user->locked_vm); 143 new_npgs = old_npgs + umem->npgs; 144 if (new_npgs > lock_limit) { 145 free_uid(umem->user); 146 umem->user = NULL; 147 return -ENOBUFS; 148 } 149 } while (atomic_long_cmpxchg(&umem->user->locked_vm, old_npgs, 150 new_npgs) != old_npgs); 151 return 0; 152} 153 154static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) 155{ 156 bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; 157 u32 chunk_size = mr->chunk_size, headroom = mr->headroom; 158 u64 addr = mr->addr, size = mr->len; 159 u32 chunks_rem, npgs_rem; 160 u64 chunks, npgs; 161 int err; 162 163 if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { 164 /* Strictly speaking we could support this, if: 165 * - huge pages, or* 166 * - using an IOMMU, or 167 * - making sure the memory area is consecutive 168 * but for now, we simply say "computer says no". 169 */ 170 return -EINVAL; 171 } 172 173 if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) 174 return -EINVAL; 175 176 if (!unaligned_chunks && !is_power_of_2(chunk_size)) 177 return -EINVAL; 178 179 if (!PAGE_ALIGNED(addr)) { 180 /* Memory area has to be page size aligned. For 181 * simplicity, this might change. 182 */ 183 return -EINVAL; 184 } 185 186 if ((addr + size) < addr) 187 return -EINVAL; 188 189 npgs = div_u64_rem(size, PAGE_SIZE, &npgs_rem); 190 if (npgs_rem) 191 npgs++; 192 if (npgs > U32_MAX) 193 return -EINVAL; 194 195 chunks = div_u64_rem(size, chunk_size, &chunks_rem); 196 if (!chunks || chunks > U32_MAX) 197 return -EINVAL; 198 199 if (!unaligned_chunks && chunks_rem) 200 return -EINVAL; 201 202 if (headroom >= chunk_size - XDP_PACKET_HEADROOM) 203 return -EINVAL; 204 205 umem->size = size; 206 umem->headroom = headroom; 207 umem->chunk_size = chunk_size; 208 umem->chunks = chunks; 209 umem->npgs = npgs; 210 umem->pgs = NULL; 211 umem->user = NULL; 212 umem->flags = mr->flags; 213 214 INIT_LIST_HEAD(&umem->xsk_dma_list); 215 refcount_set(&umem->users, 1); 216 217 err = xdp_umem_account_pages(umem); 218 if (err) 219 return err; 220 221 err = xdp_umem_pin_pages(umem, (unsigned long)addr); 222 if (err) 223 goto out_account; 224 225 err = xdp_umem_addr_map(umem, umem->pgs, umem->npgs); 226 if (err) 227 goto out_unpin; 228 229 return 0; 230 231out_unpin: 232 xdp_umem_unpin_pages(umem); 233out_account: 234 xdp_umem_unaccount_pages(umem); 235 return err; 236} 237 238struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) 239{ 240 struct xdp_umem *umem; 241 int err; 242 243 umem = kzalloc(sizeof(*umem), GFP_KERNEL); 244 if (!umem) 245 return ERR_PTR(-ENOMEM); 246 247 err = ida_simple_get(&umem_ida, 0, 0, GFP_KERNEL); 248 if (err < 0) { 249 kfree(umem); 250 return ERR_PTR(err); 251 } 252 umem->id = err; 253 254 err = xdp_umem_reg(umem, mr); 255 if (err) { 256 ida_simple_remove(&umem_ida, umem->id); 257 kfree(umem); 258 return ERR_PTR(err); 259 } 260 261 return umem; 262} 263