1// SPDX-License-Identifier: GPL-2.0-only 2/* net/core/xdp.c 3 * 4 * Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc. 5 */ 6#include <linux/bpf.h> 7#include <linux/filter.h> 8#include <linux/types.h> 9#include <linux/mm.h> 10#include <linux/netdevice.h> 11#include <linux/slab.h> 12#include <linux/idr.h> 13#include <linux/rhashtable.h> 14#include <linux/bug.h> 15#include <net/page_pool.h> 16 17#include <net/xdp.h> 18#include <net/xdp_priv.h> /* struct xdp_mem_allocator */ 19#include <trace/events/xdp.h> 20#include <net/xdp_sock_drv.h> 21 22#define REG_STATE_NEW 0x0 23#define REG_STATE_REGISTERED 0x1 24#define REG_STATE_UNREGISTERED 0x2 25#define REG_STATE_UNUSED 0x3 26 27static DEFINE_IDA(mem_id_pool); 28static DEFINE_MUTEX(mem_id_lock); 29#define MEM_ID_MAX 0xFFFE 30#define MEM_ID_MIN 1 31static int mem_id_next = MEM_ID_MIN; 32 33static bool mem_id_init; /* false */ 34static struct rhashtable *mem_id_ht; 35 36static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) 37{ 38 const u32 *k = data; 39 const u32 key = *k; 40 41 BUILD_BUG_ON(sizeof_field(struct xdp_mem_allocator, mem.id) 42 != sizeof(u32)); 43 44 /* Use cyclic increasing ID as direct hash key */ 45 return key; 46} 47 48static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, 49 const void *ptr) 50{ 51 const struct xdp_mem_allocator *xa = ptr; 52 u32 mem_id = *(u32 *)arg->key; 53 54 return xa->mem.id != mem_id; 55} 56 57static const struct rhashtable_params mem_id_rht_params = { 58 .nelem_hint = 64, 59 .head_offset = offsetof(struct xdp_mem_allocator, node), 60 .key_offset = offsetof(struct xdp_mem_allocator, mem.id), 61 .key_len = sizeof_field(struct xdp_mem_allocator, mem.id), 62 .max_size = MEM_ID_MAX, 63 .min_size = 8, 64 .automatic_shrinking = true, 65 .hashfn = xdp_mem_id_hashfn, 66 .obj_cmpfn = xdp_mem_id_cmp, 67}; 68 69static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) 70{ 71 struct xdp_mem_allocator *xa; 72 73 xa = container_of(rcu, struct xdp_mem_allocator, rcu); 74 75 /* Allow this ID to be reused */ 76 ida_simple_remove(&mem_id_pool, xa->mem.id); 77 78 kfree(xa); 79} 80 81static void mem_xa_remove(struct xdp_mem_allocator *xa) 82{ 83 trace_mem_disconnect(xa); 84 85 if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params)) 86 call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free); 87} 88 89static void mem_allocator_disconnect(void *allocator) 90{ 91 struct xdp_mem_allocator *xa; 92 struct rhashtable_iter iter; 93 94 mutex_lock(&mem_id_lock); 95 96 rhashtable_walk_enter(mem_id_ht, &iter); 97 do { 98 rhashtable_walk_start(&iter); 99 100 while ((xa = rhashtable_walk_next(&iter)) && !IS_ERR(xa)) { 101 if (xa->allocator == allocator) 102 mem_xa_remove(xa); 103 } 104 105 rhashtable_walk_stop(&iter); 106 107 } while (xa == ERR_PTR(-EAGAIN)); 108 rhashtable_walk_exit(&iter); 109 110 mutex_unlock(&mem_id_lock); 111} 112 113void xdp_unreg_mem_model(struct xdp_mem_info *mem) 114{ 115 struct xdp_mem_allocator *xa; 116 int type = mem->type; 117 int id = mem->id; 118 119 /* Reset mem info to defaults */ 120 mem->id = 0; 121 mem->type = 0; 122 123 if (id == 0) 124 return; 125 126 if (type == MEM_TYPE_PAGE_POOL) { 127 xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params); 128 page_pool_destroy(xa->page_pool); 129 } 130} 131EXPORT_SYMBOL_GPL(xdp_unreg_mem_model); 132 133void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) 134{ 135 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 136 WARN(1, "Missing register, driver bug"); 137 return; 138 } 139 140 xdp_unreg_mem_model(&xdp_rxq->mem); 141} 142EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); 143 144void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) 145{ 146 /* Simplify driver cleanup code paths, allow unreg "unused" */ 147 if (xdp_rxq->reg_state == REG_STATE_UNUSED) 148 return; 149 150 WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); 151 152 xdp_rxq_info_unreg_mem_model(xdp_rxq); 153 154 xdp_rxq->reg_state = REG_STATE_UNREGISTERED; 155 xdp_rxq->dev = NULL; 156} 157EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg); 158 159static void xdp_rxq_info_init(struct xdp_rxq_info *xdp_rxq) 160{ 161 memset(xdp_rxq, 0, sizeof(*xdp_rxq)); 162} 163 164/* Returns 0 on success, negative on failure */ 165int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, 166 struct net_device *dev, u32 queue_index) 167{ 168 if (xdp_rxq->reg_state == REG_STATE_UNUSED) { 169 WARN(1, "Driver promised not to register this"); 170 return -EINVAL; 171 } 172 173 if (xdp_rxq->reg_state == REG_STATE_REGISTERED) { 174 WARN(1, "Missing unregister, handled but fix driver"); 175 xdp_rxq_info_unreg(xdp_rxq); 176 } 177 178 if (!dev) { 179 WARN(1, "Missing net_device from driver"); 180 return -ENODEV; 181 } 182 183 /* State either UNREGISTERED or NEW */ 184 xdp_rxq_info_init(xdp_rxq); 185 xdp_rxq->dev = dev; 186 xdp_rxq->queue_index = queue_index; 187 188 xdp_rxq->reg_state = REG_STATE_REGISTERED; 189 return 0; 190} 191EXPORT_SYMBOL_GPL(xdp_rxq_info_reg); 192 193void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq) 194{ 195 xdp_rxq->reg_state = REG_STATE_UNUSED; 196} 197EXPORT_SYMBOL_GPL(xdp_rxq_info_unused); 198 199bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq) 200{ 201 return (xdp_rxq->reg_state == REG_STATE_REGISTERED); 202} 203EXPORT_SYMBOL_GPL(xdp_rxq_info_is_reg); 204 205static int __mem_id_init_hash_table(void) 206{ 207 struct rhashtable *rht; 208 int ret; 209 210 if (unlikely(mem_id_init)) 211 return 0; 212 213 rht = kzalloc(sizeof(*rht), GFP_KERNEL); 214 if (!rht) 215 return -ENOMEM; 216 217 ret = rhashtable_init(rht, &mem_id_rht_params); 218 if (ret < 0) { 219 kfree(rht); 220 return ret; 221 } 222 mem_id_ht = rht; 223 smp_mb(); /* mutex lock should provide enough pairing */ 224 mem_id_init = true; 225 226 return 0; 227} 228 229/* Allocate a cyclic ID that maps to allocator pointer. 230 * See: https://www.kernel.org/doc/html/latest/core-api/idr.html 231 * 232 * Caller must lock mem_id_lock. 233 */ 234static int __mem_id_cyclic_get(gfp_t gfp) 235{ 236 int retries = 1; 237 int id; 238 239again: 240 id = ida_simple_get(&mem_id_pool, mem_id_next, MEM_ID_MAX, gfp); 241 if (id < 0) { 242 if (id == -ENOSPC) { 243 /* Cyclic allocator, reset next id */ 244 if (retries--) { 245 mem_id_next = MEM_ID_MIN; 246 goto again; 247 } 248 } 249 return id; /* errno */ 250 } 251 mem_id_next = id + 1; 252 253 return id; 254} 255 256static bool __is_supported_mem_type(enum xdp_mem_type type) 257{ 258 if (type == MEM_TYPE_PAGE_POOL) 259 return is_page_pool_compiled_in(); 260 261 if (type >= MEM_TYPE_MAX) 262 return false; 263 264 return true; 265} 266 267static struct xdp_mem_allocator *__xdp_reg_mem_model(struct xdp_mem_info *mem, 268 enum xdp_mem_type type, 269 void *allocator) 270{ 271 struct xdp_mem_allocator *xdp_alloc; 272 gfp_t gfp = GFP_KERNEL; 273 int id, errno, ret; 274 void *ptr; 275 276 if (!__is_supported_mem_type(type)) 277 return ERR_PTR(-EOPNOTSUPP); 278 279 mem->type = type; 280 281 if (!allocator) { 282 if (type == MEM_TYPE_PAGE_POOL) 283 return ERR_PTR(-EINVAL); /* Setup time check page_pool req */ 284 return NULL; 285 } 286 287 /* Delay init of rhashtable to save memory if feature isn't used */ 288 if (!mem_id_init) { 289 mutex_lock(&mem_id_lock); 290 ret = __mem_id_init_hash_table(); 291 mutex_unlock(&mem_id_lock); 292 if (ret < 0) 293 return ERR_PTR(ret); 294 } 295 296 xdp_alloc = kzalloc(sizeof(*xdp_alloc), gfp); 297 if (!xdp_alloc) 298 return ERR_PTR(-ENOMEM); 299 300 mutex_lock(&mem_id_lock); 301 id = __mem_id_cyclic_get(gfp); 302 if (id < 0) { 303 errno = id; 304 goto err; 305 } 306 mem->id = id; 307 xdp_alloc->mem = *mem; 308 xdp_alloc->allocator = allocator; 309 310 /* Insert allocator into ID lookup table */ 311 ptr = rhashtable_insert_slow(mem_id_ht, &id, &xdp_alloc->node); 312 if (IS_ERR(ptr)) { 313 ida_simple_remove(&mem_id_pool, mem->id); 314 mem->id = 0; 315 errno = PTR_ERR(ptr); 316 goto err; 317 } 318 319 if (type == MEM_TYPE_PAGE_POOL) 320 page_pool_use_xdp_mem(allocator, mem_allocator_disconnect); 321 322 mutex_unlock(&mem_id_lock); 323 324 return xdp_alloc; 325err: 326 mutex_unlock(&mem_id_lock); 327 kfree(xdp_alloc); 328 return ERR_PTR(errno); 329} 330 331int xdp_reg_mem_model(struct xdp_mem_info *mem, 332 enum xdp_mem_type type, void *allocator) 333{ 334 struct xdp_mem_allocator *xdp_alloc; 335 336 xdp_alloc = __xdp_reg_mem_model(mem, type, allocator); 337 if (IS_ERR(xdp_alloc)) 338 return PTR_ERR(xdp_alloc); 339 return 0; 340} 341EXPORT_SYMBOL_GPL(xdp_reg_mem_model); 342 343int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, 344 enum xdp_mem_type type, void *allocator) 345{ 346 struct xdp_mem_allocator *xdp_alloc; 347 348 if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { 349 WARN(1, "Missing register, driver bug"); 350 return -EFAULT; 351 } 352 353 xdp_alloc = __xdp_reg_mem_model(&xdp_rxq->mem, type, allocator); 354 if (IS_ERR(xdp_alloc)) 355 return PTR_ERR(xdp_alloc); 356 357 if (trace_mem_connect_enabled() && xdp_alloc) 358 trace_mem_connect(xdp_alloc, xdp_rxq); 359 return 0; 360} 361 362EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); 363 364/* XDP RX runs under NAPI protection, and in different delivery error 365 * scenarios (e.g. queue full), it is possible to return the xdp_frame 366 * while still leveraging this protection. The @napi_direct boolean 367 * is used for those calls sites. Thus, allowing for faster recycling 368 * of xdp_frames/pages in those cases. 369 */ 370static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, 371 struct xdp_buff *xdp) 372{ 373 struct xdp_mem_allocator *xa; 374 struct page *page; 375 376 switch (mem->type) { 377 case MEM_TYPE_PAGE_POOL: 378 rcu_read_lock(); 379 /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ 380 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 381 page = virt_to_head_page(data); 382 if (napi_direct && xdp_return_frame_no_direct()) 383 napi_direct = false; 384 page_pool_put_full_page(xa->page_pool, page, napi_direct); 385 rcu_read_unlock(); 386 break; 387 case MEM_TYPE_PAGE_SHARED: 388 page_frag_free(data); 389 break; 390 case MEM_TYPE_PAGE_ORDER0: 391 page = virt_to_page(data); /* Assumes order0 page*/ 392 put_page(page); 393 break; 394 case MEM_TYPE_XSK_BUFF_POOL: 395 /* NB! Only valid from an xdp_buff! */ 396 xsk_buff_free(xdp); 397 break; 398 default: 399 /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ 400 WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); 401 break; 402 } 403} 404 405void xdp_return_frame(struct xdp_frame *xdpf) 406{ 407 __xdp_return(xdpf->data, &xdpf->mem, false, NULL); 408} 409EXPORT_SYMBOL_GPL(xdp_return_frame); 410 411void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) 412{ 413 __xdp_return(xdpf->data, &xdpf->mem, true, NULL); 414} 415EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); 416 417void xdp_return_buff(struct xdp_buff *xdp) 418{ 419 __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); 420} 421 422/* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ 423void __xdp_release_frame(void *data, struct xdp_mem_info *mem) 424{ 425 struct xdp_mem_allocator *xa; 426 struct page *page; 427 428 rcu_read_lock(); 429 xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); 430 page = virt_to_head_page(data); 431 if (xa) 432 page_pool_release_page(xa->page_pool, page); 433 rcu_read_unlock(); 434} 435EXPORT_SYMBOL_GPL(__xdp_release_frame); 436 437void xdp_attachment_setup(struct xdp_attachment_info *info, 438 struct netdev_bpf *bpf) 439{ 440 if (info->prog) 441 bpf_prog_put(info->prog); 442 info->prog = bpf->prog; 443 info->flags = bpf->flags; 444} 445EXPORT_SYMBOL_GPL(xdp_attachment_setup); 446 447struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp) 448{ 449 unsigned int metasize, totsize; 450 void *addr, *data_to_copy; 451 struct xdp_frame *xdpf; 452 struct page *page; 453 454 /* Clone into a MEM_TYPE_PAGE_ORDER0 xdp_frame. */ 455 metasize = xdp_data_meta_unsupported(xdp) ? 0 : 456 xdp->data - xdp->data_meta; 457 totsize = xdp->data_end - xdp->data + metasize; 458 459 if (sizeof(*xdpf) + totsize > PAGE_SIZE) 460 return NULL; 461 462 page = dev_alloc_page(); 463 if (!page) 464 return NULL; 465 466 addr = page_to_virt(page); 467 xdpf = addr; 468 memset(xdpf, 0, sizeof(*xdpf)); 469 470 addr += sizeof(*xdpf); 471 data_to_copy = metasize ? xdp->data_meta : xdp->data; 472 memcpy(addr, data_to_copy, totsize); 473 474 xdpf->data = addr + metasize; 475 xdpf->len = totsize - metasize; 476 xdpf->headroom = 0; 477 xdpf->metasize = metasize; 478 xdpf->frame_sz = PAGE_SIZE; 479 xdpf->mem.type = MEM_TYPE_PAGE_ORDER0; 480 481 xsk_buff_free(xdp); 482 return xdpf; 483} 484EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame); 485 486/* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */ 487void xdp_warn(const char *msg, const char *func, const int line) 488{ 489 WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg); 490}; 491EXPORT_SYMBOL_GPL(xdp_warn); 492