1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Generic address resolution entity 4 * 5 * Authors: 6 * Pedro Roque <roque@di.fc.ul.pt> 7 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> 8 * 9 * Fixes: 10 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add. 11 * Harald Welte Add neighbour cache statistics like rtstat 12 */ 13 14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 15 16#include <linux/slab.h> 17#include <linux/kmemleak.h> 18#include <linux/types.h> 19#include <linux/kernel.h> 20#include <linux/module.h> 21#include <linux/socket.h> 22#include <linux/netdevice.h> 23#include <linux/proc_fs.h> 24#ifdef CONFIG_SYSCTL 25#include <linux/sysctl.h> 26#endif 27#include <linux/times.h> 28#include <net/net_namespace.h> 29#include <net/neighbour.h> 30#include <net/arp.h> 31#include <net/dst.h> 32#include <net/sock.h> 33#include <net/netevent.h> 34#include <net/netlink.h> 35#include <linux/rtnetlink.h> 36#include <linux/random.h> 37#include <linux/string.h> 38#include <linux/log2.h> 39#include <linux/inetdevice.h> 40#include <net/addrconf.h> 41 42#include <trace/events/neigh.h> 43 44#define DEBUG 45#define NEIGH_DEBUG 1 46#define neigh_dbg(level, fmt, ...) \ 47do { \ 48 if (level <= NEIGH_DEBUG) \ 49 pr_debug(fmt, ##__VA_ARGS__); \ 50} while (0) 51 52#define PNEIGH_HASHMASK 0xF 53 54static void neigh_timer_handler(struct timer_list *t); 55static void __neigh_notify(struct neighbour *n, int type, int flags, 56 u32 pid); 57static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid); 58static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 59 struct net_device *dev); 60 61#ifdef CONFIG_PROC_FS 62static const struct seq_operations neigh_stat_seq_ops; 63#endif 64 65/* 66 Neighbour hash table buckets are protected with rwlock tbl->lock. 67 68 - All the scans/updates to hash buckets MUST be made under this lock. 69 - NOTHING clever should be made under this lock: no callbacks 70 to protocol backends, no attempts to send something to network. 71 It will result in deadlocks, if backend/driver wants to use neighbour 72 cache. 73 - If the entry requires some non-trivial actions, increase 74 its reference count and release table lock. 75 76 Neighbour entries are protected: 77 - with reference count. 78 - with rwlock neigh->lock 79 80 Reference count prevents destruction. 81 82 neigh->lock mainly serializes ll address data and its validity state. 83 However, the same lock is used to protect another entry fields: 84 - timer 85 - resolution queue 86 87 Again, nothing clever shall be made under neigh->lock, 88 the most complicated procedure, which we allow is dev->hard_header. 89 It is supposed, that dev->hard_header is simplistic and does 90 not make callbacks to neighbour tables. 91 */ 92 93static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb) 94{ 95 kfree_skb(skb); 96 return -ENETDOWN; 97} 98 99static void neigh_cleanup_and_release(struct neighbour *neigh) 100{ 101 trace_neigh_cleanup_and_release(neigh, 0); 102 __neigh_notify(neigh, RTM_DELNEIGH, 0, 0); 103 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 104 neigh_release(neigh); 105} 106 107/* 108 * It is random distribution in the interval (1/2)*base...(3/2)*base. 109 * It corresponds to default IPv6 settings and is not overridable, 110 * because it is really reasonable choice. 111 */ 112 113unsigned long neigh_rand_reach_time(unsigned long base) 114{ 115 return base ? (prandom_u32() % base) + (base >> 1) : 0; 116} 117EXPORT_SYMBOL(neigh_rand_reach_time); 118 119static void neigh_mark_dead(struct neighbour *n) 120{ 121 n->dead = 1; 122 if (!list_empty(&n->gc_list)) { 123 list_del_init(&n->gc_list); 124 atomic_dec(&n->tbl->gc_entries); 125 } 126} 127 128static void neigh_update_gc_list(struct neighbour *n) 129{ 130 bool on_gc_list, exempt_from_gc; 131 132 write_lock_bh(&n->tbl->lock); 133 write_lock(&n->lock); 134 135 if (n->dead) 136 goto out; 137 138 /* remove from the gc list if new state is permanent or if neighbor 139 * is externally learned; otherwise entry should be on the gc list 140 */ 141 exempt_from_gc = n->nud_state & NUD_PERMANENT || 142 n->flags & NTF_EXT_LEARNED; 143 on_gc_list = !list_empty(&n->gc_list); 144 145 if (exempt_from_gc && on_gc_list) { 146 list_del_init(&n->gc_list); 147 atomic_dec(&n->tbl->gc_entries); 148 } else if (!exempt_from_gc && !on_gc_list) { 149 /* add entries to the tail; cleaning removes from the front */ 150 list_add_tail(&n->gc_list, &n->tbl->gc_list); 151 atomic_inc(&n->tbl->gc_entries); 152 } 153 154out: 155 write_unlock(&n->lock); 156 write_unlock_bh(&n->tbl->lock); 157} 158 159static bool neigh_update_ext_learned(struct neighbour *neigh, u32 flags, 160 int *notify) 161{ 162 bool rc = false; 163 u8 ndm_flags; 164 165 if (!(flags & NEIGH_UPDATE_F_ADMIN)) 166 return rc; 167 168 ndm_flags = (flags & NEIGH_UPDATE_F_EXT_LEARNED) ? NTF_EXT_LEARNED : 0; 169 if ((neigh->flags ^ ndm_flags) & NTF_EXT_LEARNED) { 170 if (ndm_flags & NTF_EXT_LEARNED) 171 neigh->flags |= NTF_EXT_LEARNED; 172 else 173 neigh->flags &= ~NTF_EXT_LEARNED; 174 rc = true; 175 *notify = 1; 176 } 177 178 return rc; 179} 180 181static bool neigh_del(struct neighbour *n, struct neighbour __rcu **np, 182 struct neigh_table *tbl) 183{ 184 bool retval = false; 185 186 write_lock(&n->lock); 187 if (refcount_read(&n->refcnt) == 1) { 188 struct neighbour *neigh; 189 190 neigh = rcu_dereference_protected(n->next, 191 lockdep_is_held(&tbl->lock)); 192 rcu_assign_pointer(*np, neigh); 193 neigh_mark_dead(n); 194 retval = true; 195 } 196 write_unlock(&n->lock); 197 if (retval) 198 neigh_cleanup_and_release(n); 199 return retval; 200} 201 202bool neigh_remove_one(struct neighbour *ndel, struct neigh_table *tbl) 203{ 204 struct neigh_hash_table *nht; 205 void *pkey = ndel->primary_key; 206 u32 hash_val; 207 struct neighbour *n; 208 struct neighbour __rcu **np; 209 210 nht = rcu_dereference_protected(tbl->nht, 211 lockdep_is_held(&tbl->lock)); 212 hash_val = tbl->hash(pkey, ndel->dev, nht->hash_rnd); 213 hash_val = hash_val >> (32 - nht->hash_shift); 214 215 np = &nht->hash_buckets[hash_val]; 216 while ((n = rcu_dereference_protected(*np, 217 lockdep_is_held(&tbl->lock)))) { 218 if (n == ndel) 219 return neigh_del(n, np, tbl); 220 np = &n->next; 221 } 222 return false; 223} 224 225static int neigh_forced_gc(struct neigh_table *tbl) 226{ 227 int max_clean = atomic_read(&tbl->gc_entries) - 228 READ_ONCE(tbl->gc_thresh2); 229 u64 tmax = ktime_get_ns() + NSEC_PER_MSEC; 230 unsigned long tref = jiffies - 5 * HZ; 231 struct neighbour *n, *tmp; 232 int shrunk = 0; 233 int loop = 0; 234 235 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); 236 237 write_lock_bh(&tbl->lock); 238 239 list_for_each_entry_safe(n, tmp, &tbl->gc_list, gc_list) { 240 if (refcount_read(&n->refcnt) == 1) { 241 bool remove = false; 242 243 write_lock(&n->lock); 244 if ((n->nud_state == NUD_FAILED) || 245 (n->nud_state == NUD_NOARP) || 246 (tbl->is_multicast && 247 tbl->is_multicast(n->primary_key)) || 248 !time_in_range(n->updated, tref, jiffies)) 249 remove = true; 250 write_unlock(&n->lock); 251 252 if (remove && neigh_remove_one(n, tbl)) 253 shrunk++; 254 if (shrunk >= max_clean) 255 break; 256 if (++loop == 16) { 257 if (ktime_get_ns() > tmax) 258 goto unlock; 259 loop = 0; 260 } 261 } 262 } 263 264 WRITE_ONCE(tbl->last_flush, jiffies); 265unlock: 266 write_unlock_bh(&tbl->lock); 267 268 return shrunk; 269} 270 271static void neigh_add_timer(struct neighbour *n, unsigned long when) 272{ 273 /* Use safe distance from the jiffies - LONG_MAX point while timer 274 * is running in DELAY/PROBE state but still show to user space 275 * large times in the past. 276 */ 277 unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ); 278 279 neigh_hold(n); 280 if (!time_in_range(n->confirmed, mint, jiffies)) 281 n->confirmed = mint; 282 if (time_before(n->used, n->confirmed)) 283 n->used = n->confirmed; 284 if (unlikely(mod_timer(&n->timer, when))) { 285 printk("NEIGH: BUG, double timer add, state is %x\n", 286 n->nud_state); 287 dump_stack(); 288 } 289} 290 291static int neigh_del_timer(struct neighbour *n) 292{ 293 if ((n->nud_state & NUD_IN_TIMER) && 294 del_timer(&n->timer)) { 295 neigh_release(n); 296 return 1; 297 } 298 return 0; 299} 300 301static void pneigh_queue_purge(struct sk_buff_head *list, struct net *net) 302{ 303 struct sk_buff_head tmp; 304 unsigned long flags; 305 struct sk_buff *skb; 306 307 skb_queue_head_init(&tmp); 308 spin_lock_irqsave(&list->lock, flags); 309 skb = skb_peek(list); 310 while (skb != NULL) { 311 struct sk_buff *skb_next = skb_peek_next(skb, list); 312 if (net == NULL || net_eq(dev_net(skb->dev), net)) { 313 __skb_unlink(skb, list); 314 __skb_queue_tail(&tmp, skb); 315 } 316 skb = skb_next; 317 } 318 spin_unlock_irqrestore(&list->lock, flags); 319 320 while ((skb = __skb_dequeue(&tmp))) { 321 dev_put(skb->dev); 322 kfree_skb(skb); 323 } 324} 325 326static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev, 327 bool skip_perm) 328{ 329 int i; 330 struct neigh_hash_table *nht; 331 332 nht = rcu_dereference_protected(tbl->nht, 333 lockdep_is_held(&tbl->lock)); 334 335 for (i = 0; i < (1 << nht->hash_shift); i++) { 336 struct neighbour *n; 337 struct neighbour __rcu **np = &nht->hash_buckets[i]; 338 339 while ((n = rcu_dereference_protected(*np, 340 lockdep_is_held(&tbl->lock))) != NULL) { 341 if (dev && n->dev != dev) { 342 np = &n->next; 343 continue; 344 } 345 if (skip_perm && n->nud_state & NUD_PERMANENT) { 346 np = &n->next; 347 continue; 348 } 349 rcu_assign_pointer(*np, 350 rcu_dereference_protected(n->next, 351 lockdep_is_held(&tbl->lock))); 352 write_lock(&n->lock); 353 neigh_del_timer(n); 354 neigh_mark_dead(n); 355 if (refcount_read(&n->refcnt) != 1) { 356 /* The most unpleasant situation. 357 We must destroy neighbour entry, 358 but someone still uses it. 359 360 The destroy will be delayed until 361 the last user releases us, but 362 we must kill timers etc. and move 363 it to safe state. 364 */ 365 __skb_queue_purge(&n->arp_queue); 366 n->arp_queue_len_bytes = 0; 367 n->output = neigh_blackhole; 368 if (n->nud_state & NUD_VALID) 369 n->nud_state = NUD_NOARP; 370 else 371 n->nud_state = NUD_NONE; 372 neigh_dbg(2, "neigh %p is stray\n", n); 373 } 374 write_unlock(&n->lock); 375 neigh_cleanup_and_release(n); 376 } 377 } 378} 379 380void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev) 381{ 382 write_lock_bh(&tbl->lock); 383 neigh_flush_dev(tbl, dev, false); 384 write_unlock_bh(&tbl->lock); 385} 386EXPORT_SYMBOL(neigh_changeaddr); 387 388static int __neigh_ifdown(struct neigh_table *tbl, struct net_device *dev, 389 bool skip_perm) 390{ 391 write_lock_bh(&tbl->lock); 392 neigh_flush_dev(tbl, dev, skip_perm); 393 pneigh_ifdown_and_unlock(tbl, dev); 394 pneigh_queue_purge(&tbl->proxy_queue, dev ? dev_net(dev) : NULL); 395 if (skb_queue_empty_lockless(&tbl->proxy_queue)) 396 del_timer_sync(&tbl->proxy_timer); 397 return 0; 398} 399 400int neigh_carrier_down(struct neigh_table *tbl, struct net_device *dev) 401{ 402 __neigh_ifdown(tbl, dev, true); 403 return 0; 404} 405EXPORT_SYMBOL(neigh_carrier_down); 406 407int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev) 408{ 409 __neigh_ifdown(tbl, dev, false); 410 return 0; 411} 412EXPORT_SYMBOL(neigh_ifdown); 413 414static struct neighbour *neigh_alloc(struct neigh_table *tbl, 415 struct net_device *dev, 416 u8 flags, bool exempt_from_gc) 417{ 418 struct neighbour *n = NULL; 419 unsigned long now = jiffies; 420 int entries, gc_thresh3; 421 422 if (exempt_from_gc) 423 goto do_alloc; 424 425 entries = atomic_inc_return(&tbl->gc_entries) - 1; 426 gc_thresh3 = READ_ONCE(tbl->gc_thresh3); 427 if (entries >= gc_thresh3 || 428 (entries >= READ_ONCE(tbl->gc_thresh2) && 429 time_after(now, READ_ONCE(tbl->last_flush) + 5 * HZ))) { 430 if (!neigh_forced_gc(tbl) && entries >= gc_thresh3) { 431 net_info_ratelimited("%s: neighbor table overflow!\n", 432 tbl->id); 433 NEIGH_CACHE_STAT_INC(tbl, table_fulls); 434 goto out_entries; 435 } 436 } 437 438do_alloc: 439 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC); 440 if (!n) 441 goto out_entries; 442 443 __skb_queue_head_init(&n->arp_queue); 444 rwlock_init(&n->lock); 445 seqlock_init(&n->ha_lock); 446 n->updated = n->used = now; 447 n->nud_state = NUD_NONE; 448 n->output = neigh_blackhole; 449 n->flags = flags; 450 seqlock_init(&n->hh.hh_lock); 451 n->parms = neigh_parms_clone(&tbl->parms); 452 timer_setup(&n->timer, neigh_timer_handler, 0); 453 454 NEIGH_CACHE_STAT_INC(tbl, allocs); 455 n->tbl = tbl; 456 refcount_set(&n->refcnt, 1); 457 n->dead = 1; 458 INIT_LIST_HEAD(&n->gc_list); 459 460 atomic_inc(&tbl->entries); 461out: 462 return n; 463 464out_entries: 465 if (!exempt_from_gc) 466 atomic_dec(&tbl->gc_entries); 467 goto out; 468} 469 470static void neigh_get_hash_rnd(u32 *x) 471{ 472 *x = get_random_u32() | 1; 473} 474 475static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift) 476{ 477 size_t size = (1 << shift) * sizeof(struct neighbour *); 478 struct neigh_hash_table *ret; 479 struct neighbour __rcu **buckets; 480 int i; 481 482 ret = kmalloc(sizeof(*ret), GFP_ATOMIC); 483 if (!ret) 484 return NULL; 485 if (size <= PAGE_SIZE) { 486 buckets = kzalloc(size, GFP_ATOMIC); 487 } else { 488 buckets = (struct neighbour __rcu **) 489 __get_free_pages(GFP_ATOMIC | __GFP_ZERO, 490 get_order(size)); 491 kmemleak_alloc(buckets, size, 1, GFP_ATOMIC); 492 } 493 if (!buckets) { 494 kfree(ret); 495 return NULL; 496 } 497 ret->hash_buckets = buckets; 498 ret->hash_shift = shift; 499 for (i = 0; i < NEIGH_NUM_HASH_RND; i++) 500 neigh_get_hash_rnd(&ret->hash_rnd[i]); 501 return ret; 502} 503 504static void neigh_hash_free_rcu(struct rcu_head *head) 505{ 506 struct neigh_hash_table *nht = container_of(head, 507 struct neigh_hash_table, 508 rcu); 509 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *); 510 struct neighbour __rcu **buckets = nht->hash_buckets; 511 512 if (size <= PAGE_SIZE) { 513 kfree(buckets); 514 } else { 515 kmemleak_free(buckets); 516 free_pages((unsigned long)buckets, get_order(size)); 517 } 518 kfree(nht); 519} 520 521static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, 522 unsigned long new_shift) 523{ 524 unsigned int i, hash; 525 struct neigh_hash_table *new_nht, *old_nht; 526 527 NEIGH_CACHE_STAT_INC(tbl, hash_grows); 528 529 old_nht = rcu_dereference_protected(tbl->nht, 530 lockdep_is_held(&tbl->lock)); 531 new_nht = neigh_hash_alloc(new_shift); 532 if (!new_nht) 533 return old_nht; 534 535 for (i = 0; i < (1 << old_nht->hash_shift); i++) { 536 struct neighbour *n, *next; 537 538 for (n = rcu_dereference_protected(old_nht->hash_buckets[i], 539 lockdep_is_held(&tbl->lock)); 540 n != NULL; 541 n = next) { 542 hash = tbl->hash(n->primary_key, n->dev, 543 new_nht->hash_rnd); 544 545 hash >>= (32 - new_nht->hash_shift); 546 next = rcu_dereference_protected(n->next, 547 lockdep_is_held(&tbl->lock)); 548 549 rcu_assign_pointer(n->next, 550 rcu_dereference_protected( 551 new_nht->hash_buckets[hash], 552 lockdep_is_held(&tbl->lock))); 553 rcu_assign_pointer(new_nht->hash_buckets[hash], n); 554 } 555 } 556 557 rcu_assign_pointer(tbl->nht, new_nht); 558 call_rcu(&old_nht->rcu, neigh_hash_free_rcu); 559 return new_nht; 560} 561 562struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, 563 struct net_device *dev) 564{ 565 struct neighbour *n; 566 567 NEIGH_CACHE_STAT_INC(tbl, lookups); 568 569 rcu_read_lock_bh(); 570 n = __neigh_lookup_noref(tbl, pkey, dev); 571 if (n) { 572 if (!refcount_inc_not_zero(&n->refcnt)) 573 n = NULL; 574 NEIGH_CACHE_STAT_INC(tbl, hits); 575 } 576 577 rcu_read_unlock_bh(); 578 return n; 579} 580EXPORT_SYMBOL(neigh_lookup); 581 582static struct neighbour * 583___neigh_create(struct neigh_table *tbl, const void *pkey, 584 struct net_device *dev, u8 flags, 585 bool exempt_from_gc, bool want_ref) 586{ 587 u32 hash_val, key_len = tbl->key_len; 588 struct neighbour *n1, *rc, *n; 589 struct neigh_hash_table *nht; 590 int error; 591 592 n = neigh_alloc(tbl, dev, flags, exempt_from_gc); 593 trace_neigh_create(tbl, dev, pkey, n, exempt_from_gc); 594 if (!n) { 595 rc = ERR_PTR(-ENOBUFS); 596 goto out; 597 } 598 599 memcpy(n->primary_key, pkey, key_len); 600 n->dev = dev; 601 dev_hold(dev); 602 603 /* Protocol specific setup. */ 604 if (tbl->constructor && (error = tbl->constructor(n)) < 0) { 605 rc = ERR_PTR(error); 606 goto out_neigh_release; 607 } 608 609 if (dev->netdev_ops->ndo_neigh_construct) { 610 error = dev->netdev_ops->ndo_neigh_construct(dev, n); 611 if (error < 0) { 612 rc = ERR_PTR(error); 613 goto out_neigh_release; 614 } 615 } 616 617 /* Device specific setup. */ 618 if (n->parms->neigh_setup && 619 (error = n->parms->neigh_setup(n)) < 0) { 620 rc = ERR_PTR(error); 621 goto out_neigh_release; 622 } 623 624 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1); 625 626 write_lock_bh(&tbl->lock); 627 nht = rcu_dereference_protected(tbl->nht, 628 lockdep_is_held(&tbl->lock)); 629 630 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift)) 631 nht = neigh_hash_grow(tbl, nht->hash_shift + 1); 632 633 hash_val = tbl->hash(n->primary_key, dev, nht->hash_rnd) >> (32 - nht->hash_shift); 634 635 if (n->parms->dead) { 636 rc = ERR_PTR(-EINVAL); 637 goto out_tbl_unlock; 638 } 639 640 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], 641 lockdep_is_held(&tbl->lock)); 642 n1 != NULL; 643 n1 = rcu_dereference_protected(n1->next, 644 lockdep_is_held(&tbl->lock))) { 645 if (dev == n1->dev && !memcmp(n1->primary_key, n->primary_key, key_len)) { 646 if (want_ref) 647 neigh_hold(n1); 648 rc = n1; 649 goto out_tbl_unlock; 650 } 651 } 652 653 n->dead = 0; 654 if (!exempt_from_gc) 655 list_add_tail(&n->gc_list, &n->tbl->gc_list); 656 657 if (want_ref) 658 neigh_hold(n); 659 rcu_assign_pointer(n->next, 660 rcu_dereference_protected(nht->hash_buckets[hash_val], 661 lockdep_is_held(&tbl->lock))); 662 rcu_assign_pointer(nht->hash_buckets[hash_val], n); 663 write_unlock_bh(&tbl->lock); 664 neigh_dbg(2, "neigh %p is created\n", n); 665 rc = n; 666out: 667 return rc; 668out_tbl_unlock: 669 write_unlock_bh(&tbl->lock); 670out_neigh_release: 671 if (!exempt_from_gc) 672 atomic_dec(&tbl->gc_entries); 673 neigh_release(n); 674 goto out; 675} 676 677struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, 678 struct net_device *dev, bool want_ref) 679{ 680 return ___neigh_create(tbl, pkey, dev, 0, false, want_ref); 681} 682EXPORT_SYMBOL(__neigh_create); 683 684static u32 pneigh_hash(const void *pkey, unsigned int key_len) 685{ 686 u32 hash_val = *(u32 *)(pkey + key_len - 4); 687 hash_val ^= (hash_val >> 16); 688 hash_val ^= hash_val >> 8; 689 hash_val ^= hash_val >> 4; 690 hash_val &= PNEIGH_HASHMASK; 691 return hash_val; 692} 693 694static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, 695 struct net *net, 696 const void *pkey, 697 unsigned int key_len, 698 struct net_device *dev) 699{ 700 while (n) { 701 if (!memcmp(n->key, pkey, key_len) && 702 net_eq(pneigh_net(n), net) && 703 (n->dev == dev || !n->dev)) 704 return n; 705 n = n->next; 706 } 707 return NULL; 708} 709 710struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, 711 struct net *net, const void *pkey, struct net_device *dev) 712{ 713 unsigned int key_len = tbl->key_len; 714 u32 hash_val = pneigh_hash(pkey, key_len); 715 716 return __pneigh_lookup_1(tbl->phash_buckets[hash_val], 717 net, pkey, key_len, dev); 718} 719EXPORT_SYMBOL_GPL(__pneigh_lookup); 720 721struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, 722 struct net *net, const void *pkey, 723 struct net_device *dev, int creat) 724{ 725 struct pneigh_entry *n; 726 unsigned int key_len = tbl->key_len; 727 u32 hash_val = pneigh_hash(pkey, key_len); 728 729 read_lock_bh(&tbl->lock); 730 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val], 731 net, pkey, key_len, dev); 732 read_unlock_bh(&tbl->lock); 733 734 if (n || !creat) 735 goto out; 736 737 ASSERT_RTNL(); 738 739 n = kzalloc(sizeof(*n) + key_len, GFP_KERNEL); 740 if (!n) 741 goto out; 742 743 write_pnet(&n->net, net); 744 memcpy(n->key, pkey, key_len); 745 n->dev = dev; 746 if (dev) 747 dev_hold(dev); 748 749 if (tbl->pconstructor && tbl->pconstructor(n)) { 750 if (dev) 751 dev_put(dev); 752 kfree(n); 753 n = NULL; 754 goto out; 755 } 756 757 write_lock_bh(&tbl->lock); 758 n->next = tbl->phash_buckets[hash_val]; 759 tbl->phash_buckets[hash_val] = n; 760 write_unlock_bh(&tbl->lock); 761out: 762 return n; 763} 764EXPORT_SYMBOL(pneigh_lookup); 765 766 767int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, 768 struct net_device *dev) 769{ 770 struct pneigh_entry *n, **np; 771 unsigned int key_len = tbl->key_len; 772 u32 hash_val = pneigh_hash(pkey, key_len); 773 774 write_lock_bh(&tbl->lock); 775 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL; 776 np = &n->next) { 777 if (!memcmp(n->key, pkey, key_len) && n->dev == dev && 778 net_eq(pneigh_net(n), net)) { 779 *np = n->next; 780 write_unlock_bh(&tbl->lock); 781 if (tbl->pdestructor) 782 tbl->pdestructor(n); 783 if (n->dev) 784 dev_put(n->dev); 785 kfree(n); 786 return 0; 787 } 788 } 789 write_unlock_bh(&tbl->lock); 790 return -ENOENT; 791} 792 793static int pneigh_ifdown_and_unlock(struct neigh_table *tbl, 794 struct net_device *dev) 795{ 796 struct pneigh_entry *n, **np, *freelist = NULL; 797 u32 h; 798 799 for (h = 0; h <= PNEIGH_HASHMASK; h++) { 800 np = &tbl->phash_buckets[h]; 801 while ((n = *np) != NULL) { 802 if (!dev || n->dev == dev) { 803 *np = n->next; 804 n->next = freelist; 805 freelist = n; 806 continue; 807 } 808 np = &n->next; 809 } 810 } 811 write_unlock_bh(&tbl->lock); 812 while ((n = freelist)) { 813 freelist = n->next; 814 n->next = NULL; 815 if (tbl->pdestructor) 816 tbl->pdestructor(n); 817 if (n->dev) 818 dev_put(n->dev); 819 kfree(n); 820 } 821 return -ENOENT; 822} 823 824static void neigh_parms_destroy(struct neigh_parms *parms); 825 826static inline void neigh_parms_put(struct neigh_parms *parms) 827{ 828 if (refcount_dec_and_test(&parms->refcnt)) 829 neigh_parms_destroy(parms); 830} 831 832/* 833 * neighbour must already be out of the table; 834 * 835 */ 836void neigh_destroy(struct neighbour *neigh) 837{ 838 struct net_device *dev = neigh->dev; 839 840 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys); 841 842 if (!neigh->dead) { 843 pr_warn("Destroying alive neighbour %p\n", neigh); 844 dump_stack(); 845 return; 846 } 847 848 if (neigh_del_timer(neigh)) 849 pr_warn("Impossible event\n"); 850 851 write_lock_bh(&neigh->lock); 852 __skb_queue_purge(&neigh->arp_queue); 853 write_unlock_bh(&neigh->lock); 854 neigh->arp_queue_len_bytes = 0; 855 856 if (dev->netdev_ops->ndo_neigh_destroy) 857 dev->netdev_ops->ndo_neigh_destroy(dev, neigh); 858 859 dev_put(dev); 860 neigh_parms_put(neigh->parms); 861 862 neigh_dbg(2, "neigh %p is destroyed\n", neigh); 863 864 atomic_dec(&neigh->tbl->entries); 865 kfree_rcu(neigh, rcu); 866} 867EXPORT_SYMBOL(neigh_destroy); 868 869/* Neighbour state is suspicious; 870 disable fast path. 871 872 Called with write_locked neigh. 873 */ 874static void neigh_suspect(struct neighbour *neigh) 875{ 876 neigh_dbg(2, "neigh %p is suspected\n", neigh); 877 878 neigh->output = neigh->ops->output; 879} 880 881/* Neighbour state is OK; 882 enable fast path. 883 884 Called with write_locked neigh. 885 */ 886static void neigh_connect(struct neighbour *neigh) 887{ 888 neigh_dbg(2, "neigh %p is connected\n", neigh); 889 890 neigh->output = neigh->ops->connected_output; 891} 892 893static void neigh_periodic_work(struct work_struct *work) 894{ 895 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); 896 struct neighbour *n; 897 struct neighbour __rcu **np; 898 unsigned int i; 899 struct neigh_hash_table *nht; 900 901 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); 902 903 write_lock_bh(&tbl->lock); 904 nht = rcu_dereference_protected(tbl->nht, 905 lockdep_is_held(&tbl->lock)); 906 907 /* 908 * periodically recompute ReachableTime from random function 909 */ 910 911 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) { 912 struct neigh_parms *p; 913 914 WRITE_ONCE(tbl->last_rand, jiffies); 915 list_for_each_entry(p, &tbl->parms_list, list) 916 p->reachable_time = 917 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 918 } 919 920 if (atomic_read(&tbl->entries) < READ_ONCE(tbl->gc_thresh1)) 921 goto out; 922 923 for (i = 0 ; i < (1 << nht->hash_shift); i++) { 924 np = &nht->hash_buckets[i]; 925 926 while ((n = rcu_dereference_protected(*np, 927 lockdep_is_held(&tbl->lock))) != NULL) { 928 unsigned int state; 929 930 write_lock(&n->lock); 931 932 state = n->nud_state; 933 if ((state & (NUD_PERMANENT | NUD_IN_TIMER)) || 934 (n->flags & NTF_EXT_LEARNED)) { 935 write_unlock(&n->lock); 936 goto next_elt; 937 } 938 939 if (time_before(n->used, n->confirmed) && 940 time_is_before_eq_jiffies(n->confirmed)) 941 n->used = n->confirmed; 942 943 if (refcount_read(&n->refcnt) == 1 && 944 (state == NUD_FAILED || 945 !time_in_range_open(jiffies, n->used, 946 n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) { 947 rcu_assign_pointer(*np, 948 rcu_dereference_protected(n->next, 949 lockdep_is_held(&tbl->lock))); 950 neigh_mark_dead(n); 951 write_unlock(&n->lock); 952 neigh_cleanup_and_release(n); 953 continue; 954 } 955 write_unlock(&n->lock); 956 957next_elt: 958 np = &n->next; 959 } 960 /* 961 * It's fine to release lock here, even if hash table 962 * grows while we are preempted. 963 */ 964 write_unlock_bh(&tbl->lock); 965 cond_resched(); 966 write_lock_bh(&tbl->lock); 967 nht = rcu_dereference_protected(tbl->nht, 968 lockdep_is_held(&tbl->lock)); 969 } 970out: 971 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks. 972 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2 973 * BASE_REACHABLE_TIME. 974 */ 975 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 976 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1); 977 write_unlock_bh(&tbl->lock); 978} 979 980static __inline__ int neigh_max_probes(struct neighbour *n) 981{ 982 struct neigh_parms *p = n->parms; 983 return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) + 984 (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) : 985 NEIGH_VAR(p, MCAST_PROBES)); 986} 987 988static void neigh_invalidate(struct neighbour *neigh) 989 __releases(neigh->lock) 990 __acquires(neigh->lock) 991{ 992 struct sk_buff *skb; 993 994 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed); 995 neigh_dbg(2, "neigh %p is failed\n", neigh); 996 neigh->updated = jiffies; 997 998 /* It is very thin place. report_unreachable is very complicated 999 routine. Particularly, it can hit the same neighbour entry! 1000 1001 So that, we try to be accurate and avoid dead loop. --ANK 1002 */ 1003 while (neigh->nud_state == NUD_FAILED && 1004 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1005 write_unlock(&neigh->lock); 1006 neigh->ops->error_report(neigh, skb); 1007 write_lock(&neigh->lock); 1008 } 1009 __skb_queue_purge(&neigh->arp_queue); 1010 neigh->arp_queue_len_bytes = 0; 1011} 1012 1013static void neigh_probe(struct neighbour *neigh) 1014 __releases(neigh->lock) 1015{ 1016 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue); 1017 /* keep skb alive even if arp_queue overflows */ 1018 if (skb) 1019 skb = skb_clone(skb, GFP_ATOMIC); 1020 write_unlock(&neigh->lock); 1021 if (neigh->ops->solicit) 1022 neigh->ops->solicit(neigh, skb); 1023 atomic_inc(&neigh->probes); 1024 consume_skb(skb); 1025} 1026 1027/* Called when a timer expires for a neighbour entry. */ 1028 1029static void neigh_timer_handler(struct timer_list *t) 1030{ 1031 unsigned long now, next; 1032 struct neighbour *neigh = from_timer(neigh, t, timer); 1033 unsigned int state; 1034 int notify = 0; 1035 1036 write_lock(&neigh->lock); 1037 1038 state = neigh->nud_state; 1039 now = jiffies; 1040 next = now + HZ; 1041 1042 if (!(state & NUD_IN_TIMER)) 1043 goto out; 1044 1045 if (state & NUD_REACHABLE) { 1046 if (time_before_eq(now, 1047 neigh->confirmed + neigh->parms->reachable_time)) { 1048 neigh_dbg(2, "neigh %p is still alive\n", neigh); 1049 next = neigh->confirmed + neigh->parms->reachable_time; 1050 } else if (time_before_eq(now, 1051 neigh->used + 1052 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1053 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1054 neigh->nud_state = NUD_DELAY; 1055 neigh->updated = jiffies; 1056 neigh_suspect(neigh); 1057 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME); 1058 } else { 1059 neigh_dbg(2, "neigh %p is suspected\n", neigh); 1060 neigh->nud_state = NUD_STALE; 1061 neigh->updated = jiffies; 1062 neigh_suspect(neigh); 1063 notify = 1; 1064 } 1065 } else if (state & NUD_DELAY) { 1066 if (time_before_eq(now, 1067 neigh->confirmed + 1068 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) { 1069 neigh_dbg(2, "neigh %p is now reachable\n", neigh); 1070 neigh->nud_state = NUD_REACHABLE; 1071 neigh->updated = jiffies; 1072 neigh_connect(neigh); 1073 notify = 1; 1074 next = neigh->confirmed + neigh->parms->reachable_time; 1075 } else { 1076 neigh_dbg(2, "neigh %p is probed\n", neigh); 1077 neigh->nud_state = NUD_PROBE; 1078 neigh->updated = jiffies; 1079 atomic_set(&neigh->probes, 0); 1080 notify = 1; 1081 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1082 HZ/100); 1083 } 1084 } else { 1085 /* NUD_PROBE|NUD_INCOMPLETE */ 1086 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), HZ/100); 1087 } 1088 1089 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) && 1090 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) { 1091 neigh->nud_state = NUD_FAILED; 1092 notify = 1; 1093 neigh_invalidate(neigh); 1094 goto out; 1095 } 1096 1097 if (neigh->nud_state & NUD_IN_TIMER) { 1098 if (time_before(next, jiffies + HZ/100)) 1099 next = jiffies + HZ/100; 1100 if (!mod_timer(&neigh->timer, next)) 1101 neigh_hold(neigh); 1102 } 1103 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) { 1104 neigh_probe(neigh); 1105 } else { 1106out: 1107 write_unlock(&neigh->lock); 1108 } 1109 1110 if (notify) 1111 neigh_update_notify(neigh, 0); 1112 1113 trace_neigh_timer_handler(neigh, 0); 1114 1115 neigh_release(neigh); 1116} 1117 1118int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) 1119{ 1120 int rc; 1121 bool immediate_probe = false; 1122 1123 write_lock_bh(&neigh->lock); 1124 1125 rc = 0; 1126 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE)) 1127 goto out_unlock_bh; 1128 if (neigh->dead) 1129 goto out_dead; 1130 1131 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) { 1132 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) + 1133 NEIGH_VAR(neigh->parms, APP_PROBES)) { 1134 unsigned long next, now = jiffies; 1135 1136 atomic_set(&neigh->probes, 1137 NEIGH_VAR(neigh->parms, UCAST_PROBES)); 1138 neigh_del_timer(neigh); 1139 neigh->nud_state = NUD_INCOMPLETE; 1140 neigh->updated = now; 1141 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1142 HZ/100); 1143 neigh_add_timer(neigh, next); 1144 immediate_probe = true; 1145 } else { 1146 neigh->nud_state = NUD_FAILED; 1147 neigh->updated = jiffies; 1148 write_unlock_bh(&neigh->lock); 1149 1150 kfree_skb(skb); 1151 return 1; 1152 } 1153 } else if (neigh->nud_state & NUD_STALE) { 1154 neigh_dbg(2, "neigh %p is delayed\n", neigh); 1155 neigh_del_timer(neigh); 1156 neigh->nud_state = NUD_DELAY; 1157 neigh->updated = jiffies; 1158 neigh_add_timer(neigh, jiffies + 1159 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME)); 1160 } 1161 1162 if (neigh->nud_state == NUD_INCOMPLETE) { 1163 if (skb) { 1164 while (neigh->arp_queue_len_bytes + skb->truesize > 1165 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) { 1166 struct sk_buff *buff; 1167 1168 buff = __skb_dequeue(&neigh->arp_queue); 1169 if (!buff) 1170 break; 1171 neigh->arp_queue_len_bytes -= buff->truesize; 1172 kfree_skb(buff); 1173 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards); 1174 } 1175 skb_dst_force(skb); 1176 __skb_queue_tail(&neigh->arp_queue, skb); 1177 neigh->arp_queue_len_bytes += skb->truesize; 1178 } 1179 rc = 1; 1180 } 1181out_unlock_bh: 1182 if (immediate_probe) 1183 neigh_probe(neigh); 1184 else 1185 write_unlock(&neigh->lock); 1186 local_bh_enable(); 1187 trace_neigh_event_send_done(neigh, rc); 1188 return rc; 1189 1190out_dead: 1191 if (neigh->nud_state & NUD_STALE) 1192 goto out_unlock_bh; 1193 write_unlock_bh(&neigh->lock); 1194 kfree_skb(skb); 1195 trace_neigh_event_send_dead(neigh, 1); 1196 return 1; 1197} 1198EXPORT_SYMBOL(__neigh_event_send); 1199 1200static void neigh_update_hhs(struct neighbour *neigh) 1201{ 1202 struct hh_cache *hh; 1203 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) 1204 = NULL; 1205 1206 if (neigh->dev->header_ops) 1207 update = neigh->dev->header_ops->cache_update; 1208 1209 if (update) { 1210 hh = &neigh->hh; 1211 if (READ_ONCE(hh->hh_len)) { 1212 write_seqlock_bh(&hh->hh_lock); 1213 update(hh, neigh->dev, neigh->ha); 1214 write_sequnlock_bh(&hh->hh_lock); 1215 } 1216 } 1217} 1218 1219 1220 1221/* Generic update routine. 1222 -- lladdr is new lladdr or NULL, if it is not supplied. 1223 -- new is new state. 1224 -- flags 1225 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr, 1226 if it is different. 1227 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected" 1228 lladdr instead of overriding it 1229 if it is different. 1230 NEIGH_UPDATE_F_ADMIN means that the change is administrative. 1231 NEIGH_UPDATE_F_USE means that the entry is user triggered. 1232 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing 1233 NTF_ROUTER flag. 1234 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as 1235 a router. 1236 1237 Caller MUST hold reference count on the entry. 1238 */ 1239 1240static int __neigh_update(struct neighbour *neigh, const u8 *lladdr, 1241 u8 new, u32 flags, u32 nlmsg_pid, 1242 struct netlink_ext_ack *extack) 1243{ 1244 bool ext_learn_change = false; 1245 u8 old; 1246 int err; 1247 int notify = 0; 1248 struct net_device *dev; 1249 int update_isrouter = 0; 1250 1251 trace_neigh_update(neigh, lladdr, new, flags, nlmsg_pid); 1252 1253 write_lock_bh(&neigh->lock); 1254 1255 dev = neigh->dev; 1256 old = neigh->nud_state; 1257 err = -EPERM; 1258 1259 if (neigh->dead) { 1260 NL_SET_ERR_MSG(extack, "Neighbor entry is now dead"); 1261 new = old; 1262 goto out; 1263 } 1264 if (!(flags & NEIGH_UPDATE_F_ADMIN) && 1265 (old & (NUD_NOARP | NUD_PERMANENT))) 1266 goto out; 1267 1268 ext_learn_change = neigh_update_ext_learned(neigh, flags, ¬ify); 1269 if (flags & NEIGH_UPDATE_F_USE) { 1270 new = old & ~NUD_PERMANENT; 1271 neigh->nud_state = new; 1272 err = 0; 1273 goto out; 1274 } 1275 1276 if (!(new & NUD_VALID)) { 1277 neigh_del_timer(neigh); 1278 if (old & NUD_CONNECTED) 1279 neigh_suspect(neigh); 1280 neigh->nud_state = new; 1281 err = 0; 1282 notify = old & NUD_VALID; 1283 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) && 1284 (new & NUD_FAILED)) { 1285 neigh_invalidate(neigh); 1286 notify = 1; 1287 } 1288 goto out; 1289 } 1290 1291 /* Compare new lladdr with cached one */ 1292 if (!dev->addr_len) { 1293 /* First case: device needs no address. */ 1294 lladdr = neigh->ha; 1295 } else if (lladdr) { 1296 /* The second case: if something is already cached 1297 and a new address is proposed: 1298 - compare new & old 1299 - if they are different, check override flag 1300 */ 1301 if ((old & NUD_VALID) && 1302 !memcmp(lladdr, neigh->ha, dev->addr_len)) 1303 lladdr = neigh->ha; 1304 } else { 1305 /* No address is supplied; if we know something, 1306 use it, otherwise discard the request. 1307 */ 1308 err = -EINVAL; 1309 if (!(old & NUD_VALID)) { 1310 NL_SET_ERR_MSG(extack, "No link layer address given"); 1311 goto out; 1312 } 1313 lladdr = neigh->ha; 1314 } 1315 1316 /* Update confirmed timestamp for neighbour entry after we 1317 * received ARP packet even if it doesn't change IP to MAC binding. 1318 */ 1319 if (new & NUD_CONNECTED) 1320 neigh->confirmed = jiffies; 1321 1322 /* If entry was valid and address is not changed, 1323 do not change entry state, if new one is STALE. 1324 */ 1325 err = 0; 1326 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1327 if (old & NUD_VALID) { 1328 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) { 1329 update_isrouter = 0; 1330 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) && 1331 (old & NUD_CONNECTED)) { 1332 lladdr = neigh->ha; 1333 new = NUD_STALE; 1334 } else 1335 goto out; 1336 } else { 1337 if (lladdr == neigh->ha && new == NUD_STALE && 1338 !(flags & NEIGH_UPDATE_F_ADMIN)) 1339 new = old; 1340 } 1341 } 1342 1343 /* Update timestamp only once we know we will make a change to the 1344 * neighbour entry. Otherwise we risk to move the locktime window with 1345 * noop updates and ignore relevant ARP updates. 1346 */ 1347 if (new != old || lladdr != neigh->ha) 1348 neigh->updated = jiffies; 1349 1350 if (new != old) { 1351 neigh_del_timer(neigh); 1352 if (new & NUD_PROBE) 1353 atomic_set(&neigh->probes, 0); 1354 if (new & NUD_IN_TIMER) 1355 neigh_add_timer(neigh, (jiffies + 1356 ((new & NUD_REACHABLE) ? 1357 neigh->parms->reachable_time : 1358 0))); 1359 neigh->nud_state = new; 1360 notify = 1; 1361 } 1362 1363 if (lladdr != neigh->ha) { 1364 write_seqlock(&neigh->ha_lock); 1365 memcpy(&neigh->ha, lladdr, dev->addr_len); 1366 write_sequnlock(&neigh->ha_lock); 1367 neigh_update_hhs(neigh); 1368 if (!(new & NUD_CONNECTED)) 1369 neigh->confirmed = jiffies - 1370 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1); 1371 notify = 1; 1372 } 1373 if (new == old) 1374 goto out; 1375 if (new & NUD_CONNECTED) 1376 neigh_connect(neigh); 1377 else 1378 neigh_suspect(neigh); 1379 if (!(old & NUD_VALID)) { 1380 struct sk_buff *skb; 1381 1382 /* Again: avoid dead loop if something went wrong */ 1383 1384 while (neigh->nud_state & NUD_VALID && 1385 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) { 1386 struct dst_entry *dst = skb_dst(skb); 1387 struct neighbour *n2, *n1 = neigh; 1388 write_unlock_bh(&neigh->lock); 1389 1390 rcu_read_lock(); 1391 1392 /* Why not just use 'neigh' as-is? The problem is that 1393 * things such as shaper, eql, and sch_teql can end up 1394 * using alternative, different, neigh objects to output 1395 * the packet in the output path. So what we need to do 1396 * here is re-lookup the top-level neigh in the path so 1397 * we can reinject the packet there. 1398 */ 1399 n2 = NULL; 1400 if (dst && dst->obsolete != DST_OBSOLETE_DEAD) { 1401 n2 = dst_neigh_lookup_skb(dst, skb); 1402 if (n2) 1403 n1 = n2; 1404 } 1405 n1->output(n1, skb); 1406 if (n2) 1407 neigh_release(n2); 1408 rcu_read_unlock(); 1409 1410 write_lock_bh(&neigh->lock); 1411 } 1412 __skb_queue_purge(&neigh->arp_queue); 1413 neigh->arp_queue_len_bytes = 0; 1414 } 1415out: 1416 if (update_isrouter) 1417 neigh_update_is_router(neigh, flags, ¬ify); 1418 write_unlock_bh(&neigh->lock); 1419 1420 if (((new ^ old) & NUD_PERMANENT) || ext_learn_change) 1421 neigh_update_gc_list(neigh); 1422 1423 if (notify) 1424 neigh_update_notify(neigh, nlmsg_pid); 1425 1426 trace_neigh_update_done(neigh, err); 1427 1428 return err; 1429} 1430 1431int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, 1432 u32 flags, u32 nlmsg_pid) 1433{ 1434 return __neigh_update(neigh, lladdr, new, flags, nlmsg_pid, NULL); 1435} 1436EXPORT_SYMBOL(neigh_update); 1437 1438/* Update the neigh to listen temporarily for probe responses, even if it is 1439 * in a NUD_FAILED state. The caller has to hold neigh->lock for writing. 1440 */ 1441void __neigh_set_probe_once(struct neighbour *neigh) 1442{ 1443 if (neigh->dead) 1444 return; 1445 neigh->updated = jiffies; 1446 if (!(neigh->nud_state & NUD_FAILED)) 1447 return; 1448 neigh->nud_state = NUD_INCOMPLETE; 1449 atomic_set(&neigh->probes, neigh_max_probes(neigh)); 1450 neigh_add_timer(neigh, 1451 jiffies + max(NEIGH_VAR(neigh->parms, RETRANS_TIME), 1452 HZ/100)); 1453} 1454EXPORT_SYMBOL(__neigh_set_probe_once); 1455 1456struct neighbour *neigh_event_ns(struct neigh_table *tbl, 1457 u8 *lladdr, void *saddr, 1458 struct net_device *dev) 1459{ 1460 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev, 1461 lladdr || !dev->addr_len); 1462 if (neigh) 1463 neigh_update(neigh, lladdr, NUD_STALE, 1464 NEIGH_UPDATE_F_OVERRIDE, 0); 1465 return neigh; 1466} 1467EXPORT_SYMBOL(neigh_event_ns); 1468 1469/* called with read_lock_bh(&n->lock); */ 1470static void neigh_hh_init(struct neighbour *n) 1471{ 1472 struct net_device *dev = n->dev; 1473 __be16 prot = n->tbl->protocol; 1474 struct hh_cache *hh = &n->hh; 1475 1476 write_lock_bh(&n->lock); 1477 1478 /* Only one thread can come in here and initialize the 1479 * hh_cache entry. 1480 */ 1481 if (!hh->hh_len) 1482 dev->header_ops->cache(n, hh, prot); 1483 1484 write_unlock_bh(&n->lock); 1485} 1486 1487/* Slow and careful. */ 1488 1489int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb) 1490{ 1491 int rc = 0; 1492 1493 if (!neigh_event_send(neigh, skb)) { 1494 int err; 1495 struct net_device *dev = neigh->dev; 1496 unsigned int seq; 1497 1498 if (dev->header_ops->cache && !READ_ONCE(neigh->hh.hh_len)) 1499 neigh_hh_init(neigh); 1500 1501 do { 1502 __skb_pull(skb, skb_network_offset(skb)); 1503 seq = read_seqbegin(&neigh->ha_lock); 1504 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1505 neigh->ha, NULL, skb->len); 1506 } while (read_seqretry(&neigh->ha_lock, seq)); 1507 1508 if (err >= 0) 1509 rc = dev_queue_xmit(skb); 1510 else 1511 goto out_kfree_skb; 1512 } 1513out: 1514 return rc; 1515out_kfree_skb: 1516 rc = -EINVAL; 1517 kfree_skb(skb); 1518 goto out; 1519} 1520EXPORT_SYMBOL(neigh_resolve_output); 1521 1522/* As fast as possible without hh cache */ 1523 1524int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb) 1525{ 1526 struct net_device *dev = neigh->dev; 1527 unsigned int seq; 1528 int err; 1529 1530 do { 1531 __skb_pull(skb, skb_network_offset(skb)); 1532 seq = read_seqbegin(&neigh->ha_lock); 1533 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 1534 neigh->ha, NULL, skb->len); 1535 } while (read_seqretry(&neigh->ha_lock, seq)); 1536 1537 if (err >= 0) 1538 err = dev_queue_xmit(skb); 1539 else { 1540 err = -EINVAL; 1541 kfree_skb(skb); 1542 } 1543 return err; 1544} 1545EXPORT_SYMBOL(neigh_connected_output); 1546 1547int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb) 1548{ 1549 return dev_queue_xmit(skb); 1550} 1551EXPORT_SYMBOL(neigh_direct_output); 1552 1553static void neigh_proxy_process(struct timer_list *t) 1554{ 1555 struct neigh_table *tbl = from_timer(tbl, t, proxy_timer); 1556 long sched_next = 0; 1557 unsigned long now = jiffies; 1558 struct sk_buff *skb, *n; 1559 1560 spin_lock(&tbl->proxy_queue.lock); 1561 1562 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) { 1563 long tdif = NEIGH_CB(skb)->sched_next - now; 1564 1565 if (tdif <= 0) { 1566 struct net_device *dev = skb->dev; 1567 1568 __skb_unlink(skb, &tbl->proxy_queue); 1569 if (tbl->proxy_redo && netif_running(dev)) { 1570 rcu_read_lock(); 1571 tbl->proxy_redo(skb); 1572 rcu_read_unlock(); 1573 } else { 1574 kfree_skb(skb); 1575 } 1576 1577 dev_put(dev); 1578 } else if (!sched_next || tdif < sched_next) 1579 sched_next = tdif; 1580 } 1581 del_timer(&tbl->proxy_timer); 1582 if (sched_next) 1583 mod_timer(&tbl->proxy_timer, jiffies + sched_next); 1584 spin_unlock(&tbl->proxy_queue.lock); 1585} 1586 1587void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p, 1588 struct sk_buff *skb) 1589{ 1590 unsigned long now = jiffies; 1591 1592 unsigned long sched_next = now + (prandom_u32() % 1593 NEIGH_VAR(p, PROXY_DELAY)); 1594 1595 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) { 1596 kfree_skb(skb); 1597 return; 1598 } 1599 1600 NEIGH_CB(skb)->sched_next = sched_next; 1601 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED; 1602 1603 spin_lock(&tbl->proxy_queue.lock); 1604 if (del_timer(&tbl->proxy_timer)) { 1605 if (time_before(tbl->proxy_timer.expires, sched_next)) 1606 sched_next = tbl->proxy_timer.expires; 1607 } 1608 skb_dst_drop(skb); 1609 dev_hold(skb->dev); 1610 __skb_queue_tail(&tbl->proxy_queue, skb); 1611 mod_timer(&tbl->proxy_timer, sched_next); 1612 spin_unlock(&tbl->proxy_queue.lock); 1613} 1614EXPORT_SYMBOL(pneigh_enqueue); 1615 1616static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl, 1617 struct net *net, int ifindex) 1618{ 1619 struct neigh_parms *p; 1620 1621 list_for_each_entry(p, &tbl->parms_list, list) { 1622 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) || 1623 (!p->dev && !ifindex && net_eq(net, &init_net))) 1624 return p; 1625 } 1626 1627 return NULL; 1628} 1629 1630struct neigh_parms *neigh_parms_alloc(struct net_device *dev, 1631 struct neigh_table *tbl) 1632{ 1633 struct neigh_parms *p; 1634 struct net *net = dev_net(dev); 1635 const struct net_device_ops *ops = dev->netdev_ops; 1636 1637 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL); 1638 if (p) { 1639 p->tbl = tbl; 1640 refcount_set(&p->refcnt, 1); 1641 p->reachable_time = 1642 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 1643 dev_hold(dev); 1644 p->dev = dev; 1645 write_pnet(&p->net, net); 1646 p->sysctl_table = NULL; 1647 1648 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) { 1649 dev_put(dev); 1650 kfree(p); 1651 return NULL; 1652 } 1653 1654 write_lock_bh(&tbl->lock); 1655 list_add(&p->list, &tbl->parms.list); 1656 write_unlock_bh(&tbl->lock); 1657 1658 neigh_parms_data_state_cleanall(p); 1659 } 1660 return p; 1661} 1662EXPORT_SYMBOL(neigh_parms_alloc); 1663 1664static void neigh_rcu_free_parms(struct rcu_head *head) 1665{ 1666 struct neigh_parms *parms = 1667 container_of(head, struct neigh_parms, rcu_head); 1668 1669 neigh_parms_put(parms); 1670} 1671 1672void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms) 1673{ 1674 if (!parms || parms == &tbl->parms) 1675 return; 1676 write_lock_bh(&tbl->lock); 1677 list_del(&parms->list); 1678 parms->dead = 1; 1679 write_unlock_bh(&tbl->lock); 1680 if (parms->dev) 1681 dev_put(parms->dev); 1682 call_rcu(&parms->rcu_head, neigh_rcu_free_parms); 1683} 1684EXPORT_SYMBOL(neigh_parms_release); 1685 1686static void neigh_parms_destroy(struct neigh_parms *parms) 1687{ 1688 kfree(parms); 1689} 1690 1691static struct lock_class_key neigh_table_proxy_queue_class; 1692 1693static struct neigh_table *neigh_tables[NEIGH_NR_TABLES] __read_mostly; 1694 1695void neigh_table_init(int index, struct neigh_table *tbl) 1696{ 1697 unsigned long now = jiffies; 1698 unsigned long phsize; 1699 1700 INIT_LIST_HEAD(&tbl->parms_list); 1701 INIT_LIST_HEAD(&tbl->gc_list); 1702 list_add(&tbl->parms.list, &tbl->parms_list); 1703 write_pnet(&tbl->parms.net, &init_net); 1704 refcount_set(&tbl->parms.refcnt, 1); 1705 tbl->parms.reachable_time = 1706 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME)); 1707 1708 tbl->stats = alloc_percpu(struct neigh_statistics); 1709 if (!tbl->stats) 1710 panic("cannot create neighbour cache statistics"); 1711 1712#ifdef CONFIG_PROC_FS 1713 if (!proc_create_seq_data(tbl->id, 0, init_net.proc_net_stat, 1714 &neigh_stat_seq_ops, tbl)) 1715 panic("cannot create neighbour proc dir entry"); 1716#endif 1717 1718 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3)); 1719 1720 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); 1721 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); 1722 1723 if (!tbl->nht || !tbl->phash_buckets) 1724 panic("cannot allocate neighbour cache hashes"); 1725 1726 if (!tbl->entry_size) 1727 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) + 1728 tbl->key_len, NEIGH_PRIV_ALIGN); 1729 else 1730 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN); 1731 1732 rwlock_init(&tbl->lock); 1733 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work); 1734 queue_delayed_work(system_power_efficient_wq, &tbl->gc_work, 1735 tbl->parms.reachable_time); 1736 timer_setup(&tbl->proxy_timer, neigh_proxy_process, 0); 1737 skb_queue_head_init_class(&tbl->proxy_queue, 1738 &neigh_table_proxy_queue_class); 1739 1740 tbl->last_flush = now; 1741 tbl->last_rand = now + tbl->parms.reachable_time * 20; 1742 1743 neigh_tables[index] = tbl; 1744} 1745EXPORT_SYMBOL(neigh_table_init); 1746 1747int neigh_table_clear(int index, struct neigh_table *tbl) 1748{ 1749 neigh_tables[index] = NULL; 1750 /* It is not clean... Fix it to unload IPv6 module safely */ 1751 cancel_delayed_work_sync(&tbl->gc_work); 1752 del_timer_sync(&tbl->proxy_timer); 1753 pneigh_queue_purge(&tbl->proxy_queue, NULL); 1754 neigh_ifdown(tbl, NULL); 1755 if (atomic_read(&tbl->entries)) 1756 pr_crit("neighbour leakage\n"); 1757 1758 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu, 1759 neigh_hash_free_rcu); 1760 tbl->nht = NULL; 1761 1762 kfree(tbl->phash_buckets); 1763 tbl->phash_buckets = NULL; 1764 1765 remove_proc_entry(tbl->id, init_net.proc_net_stat); 1766 1767 free_percpu(tbl->stats); 1768 tbl->stats = NULL; 1769 1770 return 0; 1771} 1772EXPORT_SYMBOL(neigh_table_clear); 1773 1774static struct neigh_table *neigh_find_table(int family) 1775{ 1776 struct neigh_table *tbl = NULL; 1777 1778 switch (family) { 1779 case AF_INET: 1780 tbl = neigh_tables[NEIGH_ARP_TABLE]; 1781 break; 1782 case AF_INET6: 1783 tbl = neigh_tables[NEIGH_ND_TABLE]; 1784 break; 1785#ifdef CONFIG_NEWIP 1786 case AF_NINET: /* NIP */ 1787 tbl = neigh_tables[NEIGH_NND_TABLE]; 1788 break; 1789#endif 1790 } 1791 1792 return tbl; 1793} 1794 1795const struct nla_policy nda_policy[NDA_MAX+1] = { 1796 [NDA_UNSPEC] = { .strict_start_type = NDA_NH_ID }, 1797 [NDA_DST] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1798 [NDA_LLADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN }, 1799 [NDA_CACHEINFO] = { .len = sizeof(struct nda_cacheinfo) }, 1800 [NDA_PROBES] = { .type = NLA_U32 }, 1801 [NDA_VLAN] = { .type = NLA_U16 }, 1802 [NDA_PORT] = { .type = NLA_U16 }, 1803 [NDA_VNI] = { .type = NLA_U32 }, 1804 [NDA_IFINDEX] = { .type = NLA_U32 }, 1805 [NDA_MASTER] = { .type = NLA_U32 }, 1806 [NDA_PROTOCOL] = { .type = NLA_U8 }, 1807 [NDA_NH_ID] = { .type = NLA_U32 }, 1808 [NDA_FDB_EXT_ATTRS] = { .type = NLA_NESTED }, 1809}; 1810 1811static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, 1812 struct netlink_ext_ack *extack) 1813{ 1814 struct net *net = sock_net(skb->sk); 1815 struct ndmsg *ndm; 1816 struct nlattr *dst_attr; 1817 struct neigh_table *tbl; 1818 struct neighbour *neigh; 1819 struct net_device *dev = NULL; 1820 int err = -EINVAL; 1821 1822 ASSERT_RTNL(); 1823 if (nlmsg_len(nlh) < sizeof(*ndm)) 1824 goto out; 1825 1826 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); 1827 if (!dst_attr) { 1828 NL_SET_ERR_MSG(extack, "Network address not specified"); 1829 goto out; 1830 } 1831 1832 ndm = nlmsg_data(nlh); 1833 if (ndm->ndm_ifindex) { 1834 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1835 if (dev == NULL) { 1836 err = -ENODEV; 1837 goto out; 1838 } 1839 } 1840 1841 tbl = neigh_find_table(ndm->ndm_family); 1842 if (tbl == NULL) 1843 return -EAFNOSUPPORT; 1844 1845 if (nla_len(dst_attr) < (int)tbl->key_len) { 1846 NL_SET_ERR_MSG(extack, "Invalid network address"); 1847 goto out; 1848 } 1849 1850 if (ndm->ndm_flags & NTF_PROXY) { 1851 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev); 1852 goto out; 1853 } 1854 1855 if (dev == NULL) 1856 goto out; 1857 1858 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); 1859 if (neigh == NULL) { 1860 err = -ENOENT; 1861 goto out; 1862 } 1863 1864 err = __neigh_update(neigh, NULL, NUD_FAILED, 1865 NEIGH_UPDATE_F_OVERRIDE | NEIGH_UPDATE_F_ADMIN, 1866 NETLINK_CB(skb).portid, extack); 1867 write_lock_bh(&tbl->lock); 1868 neigh_release(neigh); 1869 neigh_remove_one(neigh, tbl); 1870 write_unlock_bh(&tbl->lock); 1871 1872out: 1873 return err; 1874} 1875 1876static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, 1877 struct netlink_ext_ack *extack) 1878{ 1879 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE | 1880 NEIGH_UPDATE_F_OVERRIDE_ISROUTER; 1881 struct net *net = sock_net(skb->sk); 1882 struct ndmsg *ndm; 1883 struct nlattr *tb[NDA_MAX+1]; 1884 struct neigh_table *tbl; 1885 struct net_device *dev = NULL; 1886 struct neighbour *neigh; 1887 void *dst, *lladdr; 1888 u8 protocol = 0; 1889 int err; 1890 1891 ASSERT_RTNL(); 1892 err = nlmsg_parse_deprecated(nlh, sizeof(*ndm), tb, NDA_MAX, 1893 nda_policy, extack); 1894 if (err < 0) 1895 goto out; 1896 1897 err = -EINVAL; 1898 if (!tb[NDA_DST]) { 1899 NL_SET_ERR_MSG(extack, "Network address not specified"); 1900 goto out; 1901 } 1902 1903 ndm = nlmsg_data(nlh); 1904 if (ndm->ndm_ifindex) { 1905 dev = __dev_get_by_index(net, ndm->ndm_ifindex); 1906 if (dev == NULL) { 1907 err = -ENODEV; 1908 goto out; 1909 } 1910 1911 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) { 1912 NL_SET_ERR_MSG(extack, "Invalid link address"); 1913 goto out; 1914 } 1915 } 1916 1917 tbl = neigh_find_table(ndm->ndm_family); 1918 if (tbl == NULL) 1919 return -EAFNOSUPPORT; 1920 1921 if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) { 1922 NL_SET_ERR_MSG(extack, "Invalid network address"); 1923 goto out; 1924 } 1925 1926 dst = nla_data(tb[NDA_DST]); 1927 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; 1928 1929 if (tb[NDA_PROTOCOL]) 1930 protocol = nla_get_u8(tb[NDA_PROTOCOL]); 1931 1932 if (ndm->ndm_flags & NTF_PROXY) { 1933 struct pneigh_entry *pn; 1934 1935 err = -ENOBUFS; 1936 pn = pneigh_lookup(tbl, net, dst, dev, 1); 1937 if (pn) { 1938 pn->flags = ndm->ndm_flags; 1939 if (protocol) 1940 pn->protocol = protocol; 1941 err = 0; 1942 } 1943 goto out; 1944 } 1945 1946 if (!dev) { 1947 NL_SET_ERR_MSG(extack, "Device not specified"); 1948 goto out; 1949 } 1950 1951 if (tbl->allow_add && !tbl->allow_add(dev, extack)) { 1952 err = -EINVAL; 1953 goto out; 1954 } 1955 1956 neigh = neigh_lookup(tbl, dst, dev); 1957 if (neigh == NULL) { 1958 bool exempt_from_gc; 1959 1960 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { 1961 err = -ENOENT; 1962 goto out; 1963 } 1964 1965 exempt_from_gc = ndm->ndm_state & NUD_PERMANENT || 1966 ndm->ndm_flags & NTF_EXT_LEARNED; 1967 neigh = ___neigh_create(tbl, dst, dev, 1968 ndm->ndm_flags & NTF_EXT_LEARNED, 1969 exempt_from_gc, true); 1970 if (IS_ERR(neigh)) { 1971 err = PTR_ERR(neigh); 1972 goto out; 1973 } 1974 } else { 1975 if (nlh->nlmsg_flags & NLM_F_EXCL) { 1976 err = -EEXIST; 1977 neigh_release(neigh); 1978 goto out; 1979 } 1980 1981 if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) 1982 flags &= ~(NEIGH_UPDATE_F_OVERRIDE | 1983 NEIGH_UPDATE_F_OVERRIDE_ISROUTER); 1984 } 1985 1986 if (protocol) 1987 neigh->protocol = protocol; 1988 if (ndm->ndm_flags & NTF_EXT_LEARNED) 1989 flags |= NEIGH_UPDATE_F_EXT_LEARNED; 1990 if (ndm->ndm_flags & NTF_ROUTER) 1991 flags |= NEIGH_UPDATE_F_ISROUTER; 1992 if (ndm->ndm_flags & NTF_USE) 1993 flags |= NEIGH_UPDATE_F_USE; 1994 1995 err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, 1996 NETLINK_CB(skb).portid, extack); 1997 if (!err && ndm->ndm_flags & NTF_USE) { 1998 neigh_event_send(neigh, NULL); 1999 err = 0; 2000 } 2001 neigh_release(neigh); 2002out: 2003 return err; 2004} 2005 2006static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) 2007{ 2008 struct nlattr *nest; 2009 2010 nest = nla_nest_start_noflag(skb, NDTA_PARMS); 2011 if (nest == NULL) 2012 return -ENOBUFS; 2013 2014 if ((parms->dev && 2015 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) || 2016 nla_put_u32(skb, NDTPA_REFCNT, refcount_read(&parms->refcnt)) || 2017 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES, 2018 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) || 2019 /* approximative value for deprecated QUEUE_LEN (in packets) */ 2020 nla_put_u32(skb, NDTPA_QUEUE_LEN, 2021 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) || 2022 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) || 2023 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) || 2024 nla_put_u32(skb, NDTPA_UCAST_PROBES, 2025 NEIGH_VAR(parms, UCAST_PROBES)) || 2026 nla_put_u32(skb, NDTPA_MCAST_PROBES, 2027 NEIGH_VAR(parms, MCAST_PROBES)) || 2028 nla_put_u32(skb, NDTPA_MCAST_REPROBES, 2029 NEIGH_VAR(parms, MCAST_REPROBES)) || 2030 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time, 2031 NDTPA_PAD) || 2032 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME, 2033 NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) || 2034 nla_put_msecs(skb, NDTPA_GC_STALETIME, 2035 NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) || 2036 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME, 2037 NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) || 2038 nla_put_msecs(skb, NDTPA_RETRANS_TIME, 2039 NEIGH_VAR(parms, RETRANS_TIME), NDTPA_PAD) || 2040 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY, 2041 NEIGH_VAR(parms, ANYCAST_DELAY), NDTPA_PAD) || 2042 nla_put_msecs(skb, NDTPA_PROXY_DELAY, 2043 NEIGH_VAR(parms, PROXY_DELAY), NDTPA_PAD) || 2044 nla_put_msecs(skb, NDTPA_LOCKTIME, 2045 NEIGH_VAR(parms, LOCKTIME), NDTPA_PAD)) 2046 goto nla_put_failure; 2047 return nla_nest_end(skb, nest); 2048 2049nla_put_failure: 2050 nla_nest_cancel(skb, nest); 2051 return -EMSGSIZE; 2052} 2053 2054static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, 2055 u32 pid, u32 seq, int type, int flags) 2056{ 2057 struct nlmsghdr *nlh; 2058 struct ndtmsg *ndtmsg; 2059 2060 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2061 if (nlh == NULL) 2062 return -EMSGSIZE; 2063 2064 ndtmsg = nlmsg_data(nlh); 2065 2066 read_lock_bh(&tbl->lock); 2067 ndtmsg->ndtm_family = tbl->family; 2068 ndtmsg->ndtm_pad1 = 0; 2069 ndtmsg->ndtm_pad2 = 0; 2070 2071 if (nla_put_string(skb, NDTA_NAME, tbl->id) || 2072 nla_put_msecs(skb, NDTA_GC_INTERVAL, READ_ONCE(tbl->gc_interval), 2073 NDTA_PAD) || 2074 nla_put_u32(skb, NDTA_THRESH1, READ_ONCE(tbl->gc_thresh1)) || 2075 nla_put_u32(skb, NDTA_THRESH2, READ_ONCE(tbl->gc_thresh2)) || 2076 nla_put_u32(skb, NDTA_THRESH3, READ_ONCE(tbl->gc_thresh3))) 2077 goto nla_put_failure; 2078 { 2079 unsigned long now = jiffies; 2080 long flush_delta = now - READ_ONCE(tbl->last_flush); 2081 long rand_delta = now - READ_ONCE(tbl->last_rand); 2082 struct neigh_hash_table *nht; 2083 struct ndt_config ndc = { 2084 .ndtc_key_len = tbl->key_len, 2085 .ndtc_entry_size = tbl->entry_size, 2086 .ndtc_entries = atomic_read(&tbl->entries), 2087 .ndtc_last_flush = jiffies_to_msecs(flush_delta), 2088 .ndtc_last_rand = jiffies_to_msecs(rand_delta), 2089 .ndtc_proxy_qlen = READ_ONCE(tbl->proxy_queue.qlen), 2090 }; 2091 2092 rcu_read_lock_bh(); 2093 nht = rcu_dereference_bh(tbl->nht); 2094 ndc.ndtc_hash_rnd = nht->hash_rnd[0]; 2095 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1); 2096 rcu_read_unlock_bh(); 2097 2098 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc)) 2099 goto nla_put_failure; 2100 } 2101 2102 { 2103 int cpu; 2104 struct ndt_stats ndst; 2105 2106 memset(&ndst, 0, sizeof(ndst)); 2107 2108 for_each_possible_cpu(cpu) { 2109 struct neigh_statistics *st; 2110 2111 st = per_cpu_ptr(tbl->stats, cpu); 2112 ndst.ndts_allocs += READ_ONCE(st->allocs); 2113 ndst.ndts_destroys += READ_ONCE(st->destroys); 2114 ndst.ndts_hash_grows += READ_ONCE(st->hash_grows); 2115 ndst.ndts_res_failed += READ_ONCE(st->res_failed); 2116 ndst.ndts_lookups += READ_ONCE(st->lookups); 2117 ndst.ndts_hits += READ_ONCE(st->hits); 2118 ndst.ndts_rcv_probes_mcast += READ_ONCE(st->rcv_probes_mcast); 2119 ndst.ndts_rcv_probes_ucast += READ_ONCE(st->rcv_probes_ucast); 2120 ndst.ndts_periodic_gc_runs += READ_ONCE(st->periodic_gc_runs); 2121 ndst.ndts_forced_gc_runs += READ_ONCE(st->forced_gc_runs); 2122 ndst.ndts_table_fulls += READ_ONCE(st->table_fulls); 2123 } 2124 2125 if (nla_put_64bit(skb, NDTA_STATS, sizeof(ndst), &ndst, 2126 NDTA_PAD)) 2127 goto nla_put_failure; 2128 } 2129 2130 BUG_ON(tbl->parms.dev); 2131 if (neightbl_fill_parms(skb, &tbl->parms) < 0) 2132 goto nla_put_failure; 2133 2134 read_unlock_bh(&tbl->lock); 2135 nlmsg_end(skb, nlh); 2136 return 0; 2137 2138nla_put_failure: 2139 read_unlock_bh(&tbl->lock); 2140 nlmsg_cancel(skb, nlh); 2141 return -EMSGSIZE; 2142} 2143 2144static int neightbl_fill_param_info(struct sk_buff *skb, 2145 struct neigh_table *tbl, 2146 struct neigh_parms *parms, 2147 u32 pid, u32 seq, int type, 2148 unsigned int flags) 2149{ 2150 struct ndtmsg *ndtmsg; 2151 struct nlmsghdr *nlh; 2152 2153 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); 2154 if (nlh == NULL) 2155 return -EMSGSIZE; 2156 2157 ndtmsg = nlmsg_data(nlh); 2158 2159 read_lock_bh(&tbl->lock); 2160 ndtmsg->ndtm_family = tbl->family; 2161 ndtmsg->ndtm_pad1 = 0; 2162 ndtmsg->ndtm_pad2 = 0; 2163 2164 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || 2165 neightbl_fill_parms(skb, parms) < 0) 2166 goto errout; 2167 2168 read_unlock_bh(&tbl->lock); 2169 nlmsg_end(skb, nlh); 2170 return 0; 2171errout: 2172 read_unlock_bh(&tbl->lock); 2173 nlmsg_cancel(skb, nlh); 2174 return -EMSGSIZE; 2175} 2176 2177static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = { 2178 [NDTA_NAME] = { .type = NLA_STRING }, 2179 [NDTA_THRESH1] = { .type = NLA_U32 }, 2180 [NDTA_THRESH2] = { .type = NLA_U32 }, 2181 [NDTA_THRESH3] = { .type = NLA_U32 }, 2182 [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, 2183 [NDTA_PARMS] = { .type = NLA_NESTED }, 2184}; 2185 2186static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = { 2187 [NDTPA_IFINDEX] = { .type = NLA_U32 }, 2188 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, 2189 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, 2190 [NDTPA_APP_PROBES] = { .type = NLA_U32 }, 2191 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, 2192 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, 2193 [NDTPA_MCAST_REPROBES] = { .type = NLA_U32 }, 2194 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, 2195 [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, 2196 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, 2197 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, 2198 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, 2199 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, 2200 [NDTPA_LOCKTIME] = { .type = NLA_U64 }, 2201}; 2202 2203static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, 2204 struct netlink_ext_ack *extack) 2205{ 2206 struct net *net = sock_net(skb->sk); 2207 struct neigh_table *tbl; 2208 struct ndtmsg *ndtmsg; 2209 struct nlattr *tb[NDTA_MAX+1]; 2210 bool found = false; 2211 int err, tidx; 2212 2213 err = nlmsg_parse_deprecated(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, 2214 nl_neightbl_policy, extack); 2215 if (err < 0) 2216 goto errout; 2217 2218 if (tb[NDTA_NAME] == NULL) { 2219 err = -EINVAL; 2220 goto errout; 2221 } 2222 2223 ndtmsg = nlmsg_data(nlh); 2224 2225 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2226 tbl = neigh_tables[tidx]; 2227 if (!tbl) 2228 continue; 2229 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) 2230 continue; 2231 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) { 2232 found = true; 2233 break; 2234 } 2235 } 2236 2237 if (!found) 2238 return -ENOENT; 2239 2240 /* 2241 * We acquire tbl->lock to be nice to the periodic timers and 2242 * make sure they always see a consistent set of values. 2243 */ 2244 write_lock_bh(&tbl->lock); 2245 2246 if (tb[NDTA_PARMS]) { 2247 struct nlattr *tbp[NDTPA_MAX+1]; 2248 struct neigh_parms *p; 2249 int i, ifindex = 0; 2250 2251 err = nla_parse_nested_deprecated(tbp, NDTPA_MAX, 2252 tb[NDTA_PARMS], 2253 nl_ntbl_parm_policy, extack); 2254 if (err < 0) 2255 goto errout_tbl_lock; 2256 2257 if (tbp[NDTPA_IFINDEX]) 2258 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); 2259 2260 p = lookup_neigh_parms(tbl, net, ifindex); 2261 if (p == NULL) { 2262 err = -ENOENT; 2263 goto errout_tbl_lock; 2264 } 2265 2266 for (i = 1; i <= NDTPA_MAX; i++) { 2267 if (tbp[i] == NULL) 2268 continue; 2269 2270 switch (i) { 2271 case NDTPA_QUEUE_LEN: 2272 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2273 nla_get_u32(tbp[i]) * 2274 SKB_TRUESIZE(ETH_FRAME_LEN)); 2275 break; 2276 case NDTPA_QUEUE_LENBYTES: 2277 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES, 2278 nla_get_u32(tbp[i])); 2279 break; 2280 case NDTPA_PROXY_QLEN: 2281 NEIGH_VAR_SET(p, PROXY_QLEN, 2282 nla_get_u32(tbp[i])); 2283 break; 2284 case NDTPA_APP_PROBES: 2285 NEIGH_VAR_SET(p, APP_PROBES, 2286 nla_get_u32(tbp[i])); 2287 break; 2288 case NDTPA_UCAST_PROBES: 2289 NEIGH_VAR_SET(p, UCAST_PROBES, 2290 nla_get_u32(tbp[i])); 2291 break; 2292 case NDTPA_MCAST_PROBES: 2293 NEIGH_VAR_SET(p, MCAST_PROBES, 2294 nla_get_u32(tbp[i])); 2295 break; 2296 case NDTPA_MCAST_REPROBES: 2297 NEIGH_VAR_SET(p, MCAST_REPROBES, 2298 nla_get_u32(tbp[i])); 2299 break; 2300 case NDTPA_BASE_REACHABLE_TIME: 2301 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, 2302 nla_get_msecs(tbp[i])); 2303 /* update reachable_time as well, otherwise, the change will 2304 * only be effective after the next time neigh_periodic_work 2305 * decides to recompute it (can be multiple minutes) 2306 */ 2307 p->reachable_time = 2308 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 2309 break; 2310 case NDTPA_GC_STALETIME: 2311 NEIGH_VAR_SET(p, GC_STALETIME, 2312 nla_get_msecs(tbp[i])); 2313 break; 2314 case NDTPA_DELAY_PROBE_TIME: 2315 NEIGH_VAR_SET(p, DELAY_PROBE_TIME, 2316 nla_get_msecs(tbp[i])); 2317 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 2318 break; 2319 case NDTPA_RETRANS_TIME: 2320 NEIGH_VAR_SET(p, RETRANS_TIME, 2321 nla_get_msecs(tbp[i])); 2322 break; 2323 case NDTPA_ANYCAST_DELAY: 2324 NEIGH_VAR_SET(p, ANYCAST_DELAY, 2325 nla_get_msecs(tbp[i])); 2326 break; 2327 case NDTPA_PROXY_DELAY: 2328 NEIGH_VAR_SET(p, PROXY_DELAY, 2329 nla_get_msecs(tbp[i])); 2330 break; 2331 case NDTPA_LOCKTIME: 2332 NEIGH_VAR_SET(p, LOCKTIME, 2333 nla_get_msecs(tbp[i])); 2334 break; 2335 } 2336 } 2337 } 2338 2339 err = -ENOENT; 2340 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] || 2341 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) && 2342 !net_eq(net, &init_net)) 2343 goto errout_tbl_lock; 2344 2345 if (tb[NDTA_THRESH1]) 2346 WRITE_ONCE(tbl->gc_thresh1, nla_get_u32(tb[NDTA_THRESH1])); 2347 2348 if (tb[NDTA_THRESH2]) 2349 WRITE_ONCE(tbl->gc_thresh2, nla_get_u32(tb[NDTA_THRESH2])); 2350 2351 if (tb[NDTA_THRESH3]) 2352 WRITE_ONCE(tbl->gc_thresh3, nla_get_u32(tb[NDTA_THRESH3])); 2353 2354 if (tb[NDTA_GC_INTERVAL]) 2355 WRITE_ONCE(tbl->gc_interval, nla_get_msecs(tb[NDTA_GC_INTERVAL])); 2356 2357 err = 0; 2358 2359errout_tbl_lock: 2360 write_unlock_bh(&tbl->lock); 2361errout: 2362 return err; 2363} 2364 2365static int neightbl_valid_dump_info(const struct nlmsghdr *nlh, 2366 struct netlink_ext_ack *extack) 2367{ 2368 struct ndtmsg *ndtm; 2369 2370 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndtm))) { 2371 NL_SET_ERR_MSG(extack, "Invalid header for neighbor table dump request"); 2372 return -EINVAL; 2373 } 2374 2375 ndtm = nlmsg_data(nlh); 2376 if (ndtm->ndtm_pad1 || ndtm->ndtm_pad2) { 2377 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor table dump request"); 2378 return -EINVAL; 2379 } 2380 2381 if (nlmsg_attrlen(nlh, sizeof(*ndtm))) { 2382 NL_SET_ERR_MSG(extack, "Invalid data after header in neighbor table dump request"); 2383 return -EINVAL; 2384 } 2385 2386 return 0; 2387} 2388 2389static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2390{ 2391 const struct nlmsghdr *nlh = cb->nlh; 2392 struct net *net = sock_net(skb->sk); 2393 int family, tidx, nidx = 0; 2394 int tbl_skip = cb->args[0]; 2395 int neigh_skip = cb->args[1]; 2396 struct neigh_table *tbl; 2397 2398 if (cb->strict_check) { 2399 int err = neightbl_valid_dump_info(nlh, cb->extack); 2400 2401 if (err < 0) 2402 return err; 2403 } 2404 2405 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2406 2407 for (tidx = 0; tidx < NEIGH_NR_TABLES; tidx++) { 2408 struct neigh_parms *p; 2409 2410 tbl = neigh_tables[tidx]; 2411 if (!tbl) 2412 continue; 2413 2414 if (tidx < tbl_skip || (family && tbl->family != family)) 2415 continue; 2416 2417 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, 2418 nlh->nlmsg_seq, RTM_NEWNEIGHTBL, 2419 NLM_F_MULTI) < 0) 2420 break; 2421 2422 nidx = 0; 2423 p = list_next_entry(&tbl->parms, list); 2424 list_for_each_entry_from(p, &tbl->parms_list, list) { 2425 if (!net_eq(neigh_parms_net(p), net)) 2426 continue; 2427 2428 if (nidx < neigh_skip) 2429 goto next; 2430 2431 if (neightbl_fill_param_info(skb, tbl, p, 2432 NETLINK_CB(cb->skb).portid, 2433 nlh->nlmsg_seq, 2434 RTM_NEWNEIGHTBL, 2435 NLM_F_MULTI) < 0) 2436 goto out; 2437 next: 2438 nidx++; 2439 } 2440 2441 neigh_skip = 0; 2442 } 2443out: 2444 cb->args[0] = tidx; 2445 cb->args[1] = nidx; 2446 2447 return skb->len; 2448} 2449 2450static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, 2451 u32 pid, u32 seq, int type, unsigned int flags) 2452{ 2453 unsigned long now = jiffies; 2454 struct nda_cacheinfo ci; 2455 struct nlmsghdr *nlh; 2456 struct ndmsg *ndm; 2457 2458 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2459 if (nlh == NULL) 2460 return -EMSGSIZE; 2461 2462 ndm = nlmsg_data(nlh); 2463 ndm->ndm_family = neigh->ops->family; 2464 ndm->ndm_pad1 = 0; 2465 ndm->ndm_pad2 = 0; 2466 ndm->ndm_flags = neigh->flags; 2467 ndm->ndm_type = neigh->type; 2468 ndm->ndm_ifindex = neigh->dev->ifindex; 2469 2470 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key)) 2471 goto nla_put_failure; 2472 2473 read_lock_bh(&neigh->lock); 2474 ndm->ndm_state = neigh->nud_state; 2475 if (neigh->nud_state & NUD_VALID) { 2476 char haddr[MAX_ADDR_LEN]; 2477 2478 neigh_ha_snapshot(haddr, neigh, neigh->dev); 2479 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { 2480 read_unlock_bh(&neigh->lock); 2481 goto nla_put_failure; 2482 } 2483 } 2484 2485 ci.ndm_used = jiffies_to_clock_t(now - neigh->used); 2486 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed); 2487 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated); 2488 ci.ndm_refcnt = refcount_read(&neigh->refcnt) - 1; 2489 read_unlock_bh(&neigh->lock); 2490 2491 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) || 2492 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci)) 2493 goto nla_put_failure; 2494 2495 if (neigh->protocol && nla_put_u8(skb, NDA_PROTOCOL, neigh->protocol)) 2496 goto nla_put_failure; 2497 2498 nlmsg_end(skb, nlh); 2499 return 0; 2500 2501nla_put_failure: 2502 nlmsg_cancel(skb, nlh); 2503 return -EMSGSIZE; 2504} 2505 2506static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn, 2507 u32 pid, u32 seq, int type, unsigned int flags, 2508 struct neigh_table *tbl) 2509{ 2510 struct nlmsghdr *nlh; 2511 struct ndmsg *ndm; 2512 2513 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); 2514 if (nlh == NULL) 2515 return -EMSGSIZE; 2516 2517 ndm = nlmsg_data(nlh); 2518 ndm->ndm_family = tbl->family; 2519 ndm->ndm_pad1 = 0; 2520 ndm->ndm_pad2 = 0; 2521 ndm->ndm_flags = pn->flags | NTF_PROXY; 2522 ndm->ndm_type = RTN_UNICAST; 2523 ndm->ndm_ifindex = pn->dev ? pn->dev->ifindex : 0; 2524 ndm->ndm_state = NUD_NONE; 2525 2526 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key)) 2527 goto nla_put_failure; 2528 2529 if (pn->protocol && nla_put_u8(skb, NDA_PROTOCOL, pn->protocol)) 2530 goto nla_put_failure; 2531 2532 nlmsg_end(skb, nlh); 2533 return 0; 2534 2535nla_put_failure: 2536 nlmsg_cancel(skb, nlh); 2537 return -EMSGSIZE; 2538} 2539 2540static void neigh_update_notify(struct neighbour *neigh, u32 nlmsg_pid) 2541{ 2542 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh); 2543 __neigh_notify(neigh, RTM_NEWNEIGH, 0, nlmsg_pid); 2544} 2545 2546static bool neigh_master_filtered(struct net_device *dev, int master_idx) 2547{ 2548 struct net_device *master; 2549 2550 if (!master_idx) 2551 return false; 2552 2553 master = dev ? netdev_master_upper_dev_get(dev) : NULL; 2554 if (!master || master->ifindex != master_idx) 2555 return true; 2556 2557 return false; 2558} 2559 2560static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx) 2561{ 2562 if (filter_idx && (!dev || dev->ifindex != filter_idx)) 2563 return true; 2564 2565 return false; 2566} 2567 2568struct neigh_dump_filter { 2569 int master_idx; 2570 int dev_idx; 2571}; 2572 2573static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2574 struct netlink_callback *cb, 2575 struct neigh_dump_filter *filter) 2576{ 2577 struct net *net = sock_net(skb->sk); 2578 struct neighbour *n; 2579 int rc, h, s_h = cb->args[1]; 2580 int idx, s_idx = idx = cb->args[2]; 2581 struct neigh_hash_table *nht; 2582 unsigned int flags = NLM_F_MULTI; 2583 2584 if (filter->dev_idx || filter->master_idx) 2585 flags |= NLM_F_DUMP_FILTERED; 2586 2587 rcu_read_lock_bh(); 2588 nht = rcu_dereference_bh(tbl->nht); 2589 2590 for (h = s_h; h < (1 << nht->hash_shift); h++) { 2591 if (h > s_h) 2592 s_idx = 0; 2593 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; 2594 n != NULL; 2595 n = rcu_dereference_bh(n->next)) { 2596 if (idx < s_idx || !net_eq(dev_net(n->dev), net)) 2597 goto next; 2598 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2599 neigh_master_filtered(n->dev, filter->master_idx)) 2600 goto next; 2601 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2602 cb->nlh->nlmsg_seq, 2603 RTM_NEWNEIGH, 2604 flags) < 0) { 2605 rc = -1; 2606 goto out; 2607 } 2608next: 2609 idx++; 2610 } 2611 } 2612 rc = skb->len; 2613out: 2614 rcu_read_unlock_bh(); 2615 cb->args[1] = h; 2616 cb->args[2] = idx; 2617 return rc; 2618} 2619 2620static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, 2621 struct netlink_callback *cb, 2622 struct neigh_dump_filter *filter) 2623{ 2624 struct pneigh_entry *n; 2625 struct net *net = sock_net(skb->sk); 2626 int rc, h, s_h = cb->args[3]; 2627 int idx, s_idx = idx = cb->args[4]; 2628 unsigned int flags = NLM_F_MULTI; 2629 2630 if (filter->dev_idx || filter->master_idx) 2631 flags |= NLM_F_DUMP_FILTERED; 2632 2633 read_lock_bh(&tbl->lock); 2634 2635 for (h = s_h; h <= PNEIGH_HASHMASK; h++) { 2636 if (h > s_h) 2637 s_idx = 0; 2638 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) { 2639 if (idx < s_idx || pneigh_net(n) != net) 2640 goto next; 2641 if (neigh_ifindex_filtered(n->dev, filter->dev_idx) || 2642 neigh_master_filtered(n->dev, filter->master_idx)) 2643 goto next; 2644 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, 2645 cb->nlh->nlmsg_seq, 2646 RTM_NEWNEIGH, flags, tbl) < 0) { 2647 read_unlock_bh(&tbl->lock); 2648 rc = -1; 2649 goto out; 2650 } 2651 next: 2652 idx++; 2653 } 2654 } 2655 2656 read_unlock_bh(&tbl->lock); 2657 rc = skb->len; 2658out: 2659 cb->args[3] = h; 2660 cb->args[4] = idx; 2661 return rc; 2662 2663} 2664 2665static int neigh_valid_dump_req(const struct nlmsghdr *nlh, 2666 bool strict_check, 2667 struct neigh_dump_filter *filter, 2668 struct netlink_ext_ack *extack) 2669{ 2670 struct nlattr *tb[NDA_MAX + 1]; 2671 int err, i; 2672 2673 if (strict_check) { 2674 struct ndmsg *ndm; 2675 2676 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2677 NL_SET_ERR_MSG(extack, "Invalid header for neighbor dump request"); 2678 return -EINVAL; 2679 } 2680 2681 ndm = nlmsg_data(nlh); 2682 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_ifindex || 2683 ndm->ndm_state || ndm->ndm_type) { 2684 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor dump request"); 2685 return -EINVAL; 2686 } 2687 2688 if (ndm->ndm_flags & ~NTF_PROXY) { 2689 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor dump request"); 2690 return -EINVAL; 2691 } 2692 2693 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), 2694 tb, NDA_MAX, nda_policy, 2695 extack); 2696 } else { 2697 err = nlmsg_parse_deprecated(nlh, sizeof(struct ndmsg), tb, 2698 NDA_MAX, nda_policy, extack); 2699 } 2700 if (err < 0) 2701 return err; 2702 2703 for (i = 0; i <= NDA_MAX; ++i) { 2704 if (!tb[i]) 2705 continue; 2706 2707 /* all new attributes should require strict_check */ 2708 switch (i) { 2709 case NDA_IFINDEX: 2710 filter->dev_idx = nla_get_u32(tb[i]); 2711 break; 2712 case NDA_MASTER: 2713 filter->master_idx = nla_get_u32(tb[i]); 2714 break; 2715 default: 2716 if (strict_check) { 2717 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor dump request"); 2718 return -EINVAL; 2719 } 2720 } 2721 } 2722 2723 return 0; 2724} 2725 2726static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) 2727{ 2728 const struct nlmsghdr *nlh = cb->nlh; 2729 struct neigh_dump_filter filter = {}; 2730 struct neigh_table *tbl; 2731 int t, family, s_t; 2732 int proxy = 0; 2733 int err; 2734 2735 family = ((struct rtgenmsg *)nlmsg_data(nlh))->rtgen_family; 2736 2737 /* check for full ndmsg structure presence, family member is 2738 * the same for both structures 2739 */ 2740 if (nlmsg_len(nlh) >= sizeof(struct ndmsg) && 2741 ((struct ndmsg *)nlmsg_data(nlh))->ndm_flags == NTF_PROXY) 2742 proxy = 1; 2743 2744 err = neigh_valid_dump_req(nlh, cb->strict_check, &filter, cb->extack); 2745 if (err < 0 && cb->strict_check) 2746 return err; 2747 2748 s_t = cb->args[0]; 2749 2750 for (t = 0; t < NEIGH_NR_TABLES; t++) { 2751 tbl = neigh_tables[t]; 2752 2753 if (!tbl) 2754 continue; 2755 if (t < s_t || (family && tbl->family != family)) 2756 continue; 2757 if (t > s_t) 2758 memset(&cb->args[1], 0, sizeof(cb->args) - 2759 sizeof(cb->args[0])); 2760 if (proxy) 2761 err = pneigh_dump_table(tbl, skb, cb, &filter); 2762 else 2763 err = neigh_dump_table(tbl, skb, cb, &filter); 2764 if (err < 0) 2765 break; 2766 } 2767 2768 cb->args[0] = t; 2769 return skb->len; 2770} 2771 2772static int neigh_valid_get_req(const struct nlmsghdr *nlh, 2773 struct neigh_table **tbl, 2774 void **dst, int *dev_idx, u8 *ndm_flags, 2775 struct netlink_ext_ack *extack) 2776{ 2777 struct nlattr *tb[NDA_MAX + 1]; 2778 struct ndmsg *ndm; 2779 int err, i; 2780 2781 if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*ndm))) { 2782 NL_SET_ERR_MSG(extack, "Invalid header for neighbor get request"); 2783 return -EINVAL; 2784 } 2785 2786 ndm = nlmsg_data(nlh); 2787 if (ndm->ndm_pad1 || ndm->ndm_pad2 || ndm->ndm_state || 2788 ndm->ndm_type) { 2789 NL_SET_ERR_MSG(extack, "Invalid values in header for neighbor get request"); 2790 return -EINVAL; 2791 } 2792 2793 if (ndm->ndm_flags & ~NTF_PROXY) { 2794 NL_SET_ERR_MSG(extack, "Invalid flags in header for neighbor get request"); 2795 return -EINVAL; 2796 } 2797 2798 err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct ndmsg), tb, 2799 NDA_MAX, nda_policy, extack); 2800 if (err < 0) 2801 return err; 2802 2803 *ndm_flags = ndm->ndm_flags; 2804 *dev_idx = ndm->ndm_ifindex; 2805 *tbl = neigh_find_table(ndm->ndm_family); 2806 if (*tbl == NULL) { 2807 NL_SET_ERR_MSG(extack, "Unsupported family in header for neighbor get request"); 2808 return -EAFNOSUPPORT; 2809 } 2810 2811 for (i = 0; i <= NDA_MAX; ++i) { 2812 if (!tb[i]) 2813 continue; 2814 2815 switch (i) { 2816 case NDA_DST: 2817 if (nla_len(tb[i]) != (int)(*tbl)->key_len) { 2818 NL_SET_ERR_MSG(extack, "Invalid network address in neighbor get request"); 2819 return -EINVAL; 2820 } 2821 *dst = nla_data(tb[i]); 2822 break; 2823 default: 2824 NL_SET_ERR_MSG(extack, "Unsupported attribute in neighbor get request"); 2825 return -EINVAL; 2826 } 2827 } 2828 2829 return 0; 2830} 2831 2832static inline size_t neigh_nlmsg_size(void) 2833{ 2834 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2835 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2836 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */ 2837 + nla_total_size(sizeof(struct nda_cacheinfo)) 2838 + nla_total_size(4) /* NDA_PROBES */ 2839 + nla_total_size(1); /* NDA_PROTOCOL */ 2840} 2841 2842static int neigh_get_reply(struct net *net, struct neighbour *neigh, 2843 u32 pid, u32 seq) 2844{ 2845 struct sk_buff *skb; 2846 int err = 0; 2847 2848 skb = nlmsg_new(neigh_nlmsg_size(), GFP_KERNEL); 2849 if (!skb) 2850 return -ENOBUFS; 2851 2852 err = neigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0); 2853 if (err) { 2854 kfree_skb(skb); 2855 goto errout; 2856 } 2857 2858 err = rtnl_unicast(skb, net, pid); 2859errout: 2860 return err; 2861} 2862 2863static inline size_t pneigh_nlmsg_size(void) 2864{ 2865 return NLMSG_ALIGN(sizeof(struct ndmsg)) 2866 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */ 2867 + nla_total_size(1); /* NDA_PROTOCOL */ 2868} 2869 2870static int pneigh_get_reply(struct net *net, struct pneigh_entry *neigh, 2871 u32 pid, u32 seq, struct neigh_table *tbl) 2872{ 2873 struct sk_buff *skb; 2874 int err = 0; 2875 2876 skb = nlmsg_new(pneigh_nlmsg_size(), GFP_KERNEL); 2877 if (!skb) 2878 return -ENOBUFS; 2879 2880 err = pneigh_fill_info(skb, neigh, pid, seq, RTM_NEWNEIGH, 0, tbl); 2881 if (err) { 2882 kfree_skb(skb); 2883 goto errout; 2884 } 2885 2886 err = rtnl_unicast(skb, net, pid); 2887errout: 2888 return err; 2889} 2890 2891static int neigh_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, 2892 struct netlink_ext_ack *extack) 2893{ 2894 struct net *net = sock_net(in_skb->sk); 2895 struct net_device *dev = NULL; 2896 struct neigh_table *tbl = NULL; 2897 struct neighbour *neigh; 2898 void *dst = NULL; 2899 u8 ndm_flags = 0; 2900 int dev_idx = 0; 2901 int err; 2902 2903 err = neigh_valid_get_req(nlh, &tbl, &dst, &dev_idx, &ndm_flags, 2904 extack); 2905 if (err < 0) 2906 return err; 2907 2908 if (dev_idx) { 2909 dev = __dev_get_by_index(net, dev_idx); 2910 if (!dev) { 2911 NL_SET_ERR_MSG(extack, "Unknown device ifindex"); 2912 return -ENODEV; 2913 } 2914 } 2915 2916 if (!dst) { 2917 NL_SET_ERR_MSG(extack, "Network address not specified"); 2918 return -EINVAL; 2919 } 2920 2921 if (ndm_flags & NTF_PROXY) { 2922 struct pneigh_entry *pn; 2923 2924 pn = pneigh_lookup(tbl, net, dst, dev, 0); 2925 if (!pn) { 2926 NL_SET_ERR_MSG(extack, "Proxy neighbour entry not found"); 2927 return -ENOENT; 2928 } 2929 return pneigh_get_reply(net, pn, NETLINK_CB(in_skb).portid, 2930 nlh->nlmsg_seq, tbl); 2931 } 2932 2933 if (!dev) { 2934 NL_SET_ERR_MSG(extack, "No device specified"); 2935 return -EINVAL; 2936 } 2937 2938 neigh = neigh_lookup(tbl, dst, dev); 2939 if (!neigh) { 2940 NL_SET_ERR_MSG(extack, "Neighbour entry not found"); 2941 return -ENOENT; 2942 } 2943 2944 err = neigh_get_reply(net, neigh, NETLINK_CB(in_skb).portid, 2945 nlh->nlmsg_seq); 2946 2947 neigh_release(neigh); 2948 2949 return err; 2950} 2951 2952void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) 2953{ 2954 int chain; 2955 struct neigh_hash_table *nht; 2956 2957 rcu_read_lock_bh(); 2958 nht = rcu_dereference_bh(tbl->nht); 2959 2960 read_lock(&tbl->lock); /* avoid resizes */ 2961 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2962 struct neighbour *n; 2963 2964 for (n = rcu_dereference_bh(nht->hash_buckets[chain]); 2965 n != NULL; 2966 n = rcu_dereference_bh(n->next)) 2967 cb(n, cookie); 2968 } 2969 read_unlock(&tbl->lock); 2970 rcu_read_unlock_bh(); 2971} 2972EXPORT_SYMBOL(neigh_for_each); 2973 2974/* The tbl->lock must be held as a writer and BH disabled. */ 2975void __neigh_for_each_release(struct neigh_table *tbl, 2976 int (*cb)(struct neighbour *)) 2977{ 2978 int chain; 2979 struct neigh_hash_table *nht; 2980 2981 nht = rcu_dereference_protected(tbl->nht, 2982 lockdep_is_held(&tbl->lock)); 2983 for (chain = 0; chain < (1 << nht->hash_shift); chain++) { 2984 struct neighbour *n; 2985 struct neighbour __rcu **np; 2986 2987 np = &nht->hash_buckets[chain]; 2988 while ((n = rcu_dereference_protected(*np, 2989 lockdep_is_held(&tbl->lock))) != NULL) { 2990 int release; 2991 2992 write_lock(&n->lock); 2993 release = cb(n); 2994 if (release) { 2995 rcu_assign_pointer(*np, 2996 rcu_dereference_protected(n->next, 2997 lockdep_is_held(&tbl->lock))); 2998 neigh_mark_dead(n); 2999 } else 3000 np = &n->next; 3001 write_unlock(&n->lock); 3002 if (release) 3003 neigh_cleanup_and_release(n); 3004 } 3005 } 3006} 3007EXPORT_SYMBOL(__neigh_for_each_release); 3008 3009int neigh_xmit(int index, struct net_device *dev, 3010 const void *addr, struct sk_buff *skb) 3011{ 3012 int err = -EAFNOSUPPORT; 3013 if (likely(index < NEIGH_NR_TABLES)) { 3014 struct neigh_table *tbl; 3015 struct neighbour *neigh; 3016 3017 tbl = neigh_tables[index]; 3018 if (!tbl) 3019 goto out; 3020 rcu_read_lock_bh(); 3021 if (index == NEIGH_ARP_TABLE) { 3022 u32 key = *((u32 *)addr); 3023 3024 neigh = __ipv4_neigh_lookup_noref(dev, key); 3025 } else { 3026 neigh = __neigh_lookup_noref(tbl, addr, dev); 3027 } 3028 if (!neigh) 3029 neigh = __neigh_create(tbl, addr, dev, false); 3030 err = PTR_ERR(neigh); 3031 if (IS_ERR(neigh)) { 3032 rcu_read_unlock_bh(); 3033 goto out_kfree_skb; 3034 } 3035 err = neigh->output(neigh, skb); 3036 rcu_read_unlock_bh(); 3037 } 3038 else if (index == NEIGH_LINK_TABLE) { 3039 err = dev_hard_header(skb, dev, ntohs(skb->protocol), 3040 addr, NULL, skb->len); 3041 if (err < 0) 3042 goto out_kfree_skb; 3043 err = dev_queue_xmit(skb); 3044 } 3045out: 3046 return err; 3047out_kfree_skb: 3048 kfree_skb(skb); 3049 goto out; 3050} 3051EXPORT_SYMBOL(neigh_xmit); 3052 3053#ifdef CONFIG_PROC_FS 3054 3055static struct neighbour *neigh_get_first(struct seq_file *seq) 3056{ 3057 struct neigh_seq_state *state = seq->private; 3058 struct net *net = seq_file_net(seq); 3059 struct neigh_hash_table *nht = state->nht; 3060 struct neighbour *n = NULL; 3061 int bucket; 3062 3063 state->flags &= ~NEIGH_SEQ_IS_PNEIGH; 3064 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) { 3065 n = rcu_dereference_bh(nht->hash_buckets[bucket]); 3066 3067 while (n) { 3068 if (!net_eq(dev_net(n->dev), net)) 3069 goto next; 3070 if (state->neigh_sub_iter) { 3071 loff_t fakep = 0; 3072 void *v; 3073 3074 v = state->neigh_sub_iter(state, n, &fakep); 3075 if (!v) 3076 goto next; 3077 } 3078 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3079 break; 3080 if (n->nud_state & ~NUD_NOARP) 3081 break; 3082next: 3083 n = rcu_dereference_bh(n->next); 3084 } 3085 3086 if (n) 3087 break; 3088 } 3089 state->bucket = bucket; 3090 3091 return n; 3092} 3093 3094static struct neighbour *neigh_get_next(struct seq_file *seq, 3095 struct neighbour *n, 3096 loff_t *pos) 3097{ 3098 struct neigh_seq_state *state = seq->private; 3099 struct net *net = seq_file_net(seq); 3100 struct neigh_hash_table *nht = state->nht; 3101 3102 if (state->neigh_sub_iter) { 3103 void *v = state->neigh_sub_iter(state, n, pos); 3104 if (v) 3105 return n; 3106 } 3107 n = rcu_dereference_bh(n->next); 3108 3109 while (1) { 3110 while (n) { 3111 if (!net_eq(dev_net(n->dev), net)) 3112 goto next; 3113 if (state->neigh_sub_iter) { 3114 void *v = state->neigh_sub_iter(state, n, pos); 3115 if (v) 3116 return n; 3117 goto next; 3118 } 3119 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP)) 3120 break; 3121 3122 if (n->nud_state & ~NUD_NOARP) 3123 break; 3124next: 3125 n = rcu_dereference_bh(n->next); 3126 } 3127 3128 if (n) 3129 break; 3130 3131 if (++state->bucket >= (1 << nht->hash_shift)) 3132 break; 3133 3134 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); 3135 } 3136 3137 if (n && pos) 3138 --(*pos); 3139 return n; 3140} 3141 3142static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos) 3143{ 3144 struct neighbour *n = neigh_get_first(seq); 3145 3146 if (n) { 3147 --(*pos); 3148 while (*pos) { 3149 n = neigh_get_next(seq, n, pos); 3150 if (!n) 3151 break; 3152 } 3153 } 3154 return *pos ? NULL : n; 3155} 3156 3157static struct pneigh_entry *pneigh_get_first(struct seq_file *seq) 3158{ 3159 struct neigh_seq_state *state = seq->private; 3160 struct net *net = seq_file_net(seq); 3161 struct neigh_table *tbl = state->tbl; 3162 struct pneigh_entry *pn = NULL; 3163 int bucket = state->bucket; 3164 3165 state->flags |= NEIGH_SEQ_IS_PNEIGH; 3166 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) { 3167 pn = tbl->phash_buckets[bucket]; 3168 while (pn && !net_eq(pneigh_net(pn), net)) 3169 pn = pn->next; 3170 if (pn) 3171 break; 3172 } 3173 state->bucket = bucket; 3174 3175 return pn; 3176} 3177 3178static struct pneigh_entry *pneigh_get_next(struct seq_file *seq, 3179 struct pneigh_entry *pn, 3180 loff_t *pos) 3181{ 3182 struct neigh_seq_state *state = seq->private; 3183 struct net *net = seq_file_net(seq); 3184 struct neigh_table *tbl = state->tbl; 3185 3186 do { 3187 pn = pn->next; 3188 } while (pn && !net_eq(pneigh_net(pn), net)); 3189 3190 while (!pn) { 3191 if (++state->bucket > PNEIGH_HASHMASK) 3192 break; 3193 pn = tbl->phash_buckets[state->bucket]; 3194 while (pn && !net_eq(pneigh_net(pn), net)) 3195 pn = pn->next; 3196 if (pn) 3197 break; 3198 } 3199 3200 if (pn && pos) 3201 --(*pos); 3202 3203 return pn; 3204} 3205 3206static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos) 3207{ 3208 struct pneigh_entry *pn = pneigh_get_first(seq); 3209 3210 if (pn) { 3211 --(*pos); 3212 while (*pos) { 3213 pn = pneigh_get_next(seq, pn, pos); 3214 if (!pn) 3215 break; 3216 } 3217 } 3218 return *pos ? NULL : pn; 3219} 3220 3221static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) 3222{ 3223 struct neigh_seq_state *state = seq->private; 3224 void *rc; 3225 loff_t idxpos = *pos; 3226 3227 rc = neigh_get_idx(seq, &idxpos); 3228 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3229 rc = pneigh_get_idx(seq, &idxpos); 3230 3231 return rc; 3232} 3233 3234void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) 3235 __acquires(tbl->lock) 3236 __acquires(rcu_bh) 3237{ 3238 struct neigh_seq_state *state = seq->private; 3239 3240 state->tbl = tbl; 3241 state->bucket = 0; 3242 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); 3243 3244 rcu_read_lock_bh(); 3245 state->nht = rcu_dereference_bh(tbl->nht); 3246 read_lock(&tbl->lock); 3247 3248 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; 3249} 3250EXPORT_SYMBOL(neigh_seq_start); 3251 3252void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3253{ 3254 struct neigh_seq_state *state; 3255 void *rc; 3256 3257 if (v == SEQ_START_TOKEN) { 3258 rc = neigh_get_first(seq); 3259 goto out; 3260 } 3261 3262 state = seq->private; 3263 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) { 3264 rc = neigh_get_next(seq, v, NULL); 3265 if (rc) 3266 goto out; 3267 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY)) 3268 rc = pneigh_get_first(seq); 3269 } else { 3270 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY); 3271 rc = pneigh_get_next(seq, v, NULL); 3272 } 3273out: 3274 ++(*pos); 3275 return rc; 3276} 3277EXPORT_SYMBOL(neigh_seq_next); 3278 3279void neigh_seq_stop(struct seq_file *seq, void *v) 3280 __releases(tbl->lock) 3281 __releases(rcu_bh) 3282{ 3283 struct neigh_seq_state *state = seq->private; 3284 struct neigh_table *tbl = state->tbl; 3285 3286 read_unlock(&tbl->lock); 3287 rcu_read_unlock_bh(); 3288} 3289EXPORT_SYMBOL(neigh_seq_stop); 3290 3291/* statistics via seq_file */ 3292 3293static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos) 3294{ 3295 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3296 int cpu; 3297 3298 if (*pos == 0) 3299 return SEQ_START_TOKEN; 3300 3301 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { 3302 if (!cpu_possible(cpu)) 3303 continue; 3304 *pos = cpu+1; 3305 return per_cpu_ptr(tbl->stats, cpu); 3306 } 3307 return NULL; 3308} 3309 3310static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos) 3311{ 3312 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3313 int cpu; 3314 3315 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { 3316 if (!cpu_possible(cpu)) 3317 continue; 3318 *pos = cpu+1; 3319 return per_cpu_ptr(tbl->stats, cpu); 3320 } 3321 (*pos)++; 3322 return NULL; 3323} 3324 3325static void neigh_stat_seq_stop(struct seq_file *seq, void *v) 3326{ 3327 3328} 3329 3330static int neigh_stat_seq_show(struct seq_file *seq, void *v) 3331{ 3332 struct neigh_table *tbl = PDE_DATA(file_inode(seq->file)); 3333 struct neigh_statistics *st = v; 3334 3335 if (v == SEQ_START_TOKEN) { 3336 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards table_fulls\n"); 3337 return 0; 3338 } 3339 3340 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx " 3341 "%08lx %08lx %08lx %08lx %08lx %08lx\n", 3342 atomic_read(&tbl->entries), 3343 3344 st->allocs, 3345 st->destroys, 3346 st->hash_grows, 3347 3348 st->lookups, 3349 st->hits, 3350 3351 st->res_failed, 3352 3353 st->rcv_probes_mcast, 3354 st->rcv_probes_ucast, 3355 3356 st->periodic_gc_runs, 3357 st->forced_gc_runs, 3358 st->unres_discards, 3359 st->table_fulls 3360 ); 3361 3362 return 0; 3363} 3364 3365static const struct seq_operations neigh_stat_seq_ops = { 3366 .start = neigh_stat_seq_start, 3367 .next = neigh_stat_seq_next, 3368 .stop = neigh_stat_seq_stop, 3369 .show = neigh_stat_seq_show, 3370}; 3371#endif /* CONFIG_PROC_FS */ 3372 3373static void __neigh_notify(struct neighbour *n, int type, int flags, 3374 u32 pid) 3375{ 3376 struct net *net = dev_net(n->dev); 3377 struct sk_buff *skb; 3378 int err = -ENOBUFS; 3379 3380 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC); 3381 if (skb == NULL) 3382 goto errout; 3383 3384 err = neigh_fill_info(skb, n, pid, 0, type, flags); 3385 if (err < 0) { 3386 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */ 3387 WARN_ON(err == -EMSGSIZE); 3388 kfree_skb(skb); 3389 goto errout; 3390 } 3391 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); 3392 return; 3393errout: 3394 if (err < 0) 3395 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err); 3396} 3397 3398void neigh_app_ns(struct neighbour *n) 3399{ 3400 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST, 0); 3401} 3402EXPORT_SYMBOL(neigh_app_ns); 3403 3404#ifdef CONFIG_SYSCTL 3405static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); 3406 3407static int proc_unres_qlen(struct ctl_table *ctl, int write, 3408 void *buffer, size_t *lenp, loff_t *ppos) 3409{ 3410 int size, ret; 3411 struct ctl_table tmp = *ctl; 3412 3413 tmp.extra1 = SYSCTL_ZERO; 3414 tmp.extra2 = &unres_qlen_max; 3415 tmp.data = &size; 3416 3417 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN); 3418 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3419 3420 if (write && !ret) 3421 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN); 3422 return ret; 3423} 3424 3425static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev, 3426 int family) 3427{ 3428 switch (family) { 3429 case AF_INET: 3430 return __in_dev_arp_parms_get_rcu(dev); 3431 case AF_INET6: 3432 return __in6_dev_nd_parms_get_rcu(dev); 3433 } 3434 return NULL; 3435} 3436 3437static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p, 3438 int index) 3439{ 3440 struct net_device *dev; 3441 int family = neigh_parms_family(p); 3442 3443 rcu_read_lock(); 3444 for_each_netdev_rcu(net, dev) { 3445 struct neigh_parms *dst_p = 3446 neigh_get_dev_parms_rcu(dev, family); 3447 3448 if (dst_p && !test_bit(index, dst_p->data_state)) 3449 dst_p->data[index] = p->data[index]; 3450 } 3451 rcu_read_unlock(); 3452} 3453 3454static void neigh_proc_update(struct ctl_table *ctl, int write) 3455{ 3456 struct net_device *dev = ctl->extra1; 3457 struct neigh_parms *p = ctl->extra2; 3458 struct net *net = neigh_parms_net(p); 3459 int index = (int *) ctl->data - p->data; 3460 3461 if (!write) 3462 return; 3463 3464 set_bit(index, p->data_state); 3465 if (index == NEIGH_VAR_DELAY_PROBE_TIME) 3466 call_netevent_notifiers(NETEVENT_DELAY_PROBE_TIME_UPDATE, p); 3467 if (!dev) /* NULL dev means this is default value */ 3468 neigh_copy_dflt_parms(net, p, index); 3469} 3470 3471static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, 3472 void *buffer, size_t *lenp, 3473 loff_t *ppos) 3474{ 3475 struct ctl_table tmp = *ctl; 3476 int ret; 3477 3478 tmp.extra1 = SYSCTL_ZERO; 3479 tmp.extra2 = SYSCTL_INT_MAX; 3480 3481 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); 3482 neigh_proc_update(ctl, write); 3483 return ret; 3484} 3485 3486int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, 3487 size_t *lenp, loff_t *ppos) 3488{ 3489 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); 3490 3491 neigh_proc_update(ctl, write); 3492 return ret; 3493} 3494EXPORT_SYMBOL(neigh_proc_dointvec); 3495 3496int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer, 3497 size_t *lenp, loff_t *ppos) 3498{ 3499 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3500 3501 neigh_proc_update(ctl, write); 3502 return ret; 3503} 3504EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); 3505 3506static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, 3507 void *buffer, size_t *lenp, 3508 loff_t *ppos) 3509{ 3510 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); 3511 3512 neigh_proc_update(ctl, write); 3513 return ret; 3514} 3515 3516int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, 3517 void *buffer, size_t *lenp, loff_t *ppos) 3518{ 3519 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3520 3521 neigh_proc_update(ctl, write); 3522 return ret; 3523} 3524EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); 3525 3526static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, 3527 void *buffer, size_t *lenp, 3528 loff_t *ppos) 3529{ 3530 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); 3531 3532 neigh_proc_update(ctl, write); 3533 return ret; 3534} 3535 3536static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, 3537 void *buffer, size_t *lenp, 3538 loff_t *ppos) 3539{ 3540 struct neigh_parms *p = ctl->extra2; 3541 int ret; 3542 3543 if (strcmp(ctl->procname, "base_reachable_time") == 0) 3544 ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); 3545 else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) 3546 ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); 3547 else 3548 ret = -1; 3549 3550 if (write && ret == 0) { 3551 /* update reachable_time as well, otherwise, the change will 3552 * only be effective after the next time neigh_periodic_work 3553 * decides to recompute it 3554 */ 3555 p->reachable_time = 3556 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); 3557 } 3558 return ret; 3559} 3560 3561#define NEIGH_PARMS_DATA_OFFSET(index) \ 3562 (&((struct neigh_parms *) 0)->data[index]) 3563 3564#define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \ 3565 [NEIGH_VAR_ ## attr] = { \ 3566 .procname = name, \ 3567 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \ 3568 .maxlen = sizeof(int), \ 3569 .mode = mval, \ 3570 .proc_handler = proc, \ 3571 } 3572 3573#define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \ 3574 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax) 3575 3576#define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \ 3577 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies) 3578 3579#define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \ 3580 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies) 3581 3582#define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \ 3583 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies) 3584 3585#define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \ 3586 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen) 3587 3588static struct neigh_sysctl_table { 3589 struct ctl_table_header *sysctl_header; 3590 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1]; 3591} neigh_sysctl_template __read_mostly = { 3592 .neigh_vars = { 3593 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"), 3594 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"), 3595 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"), 3596 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"), 3597 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"), 3598 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"), 3599 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"), 3600 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"), 3601 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"), 3602 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"), 3603 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"), 3604 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"), 3605 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"), 3606 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"), 3607 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"), 3608 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"), 3609 [NEIGH_VAR_GC_INTERVAL] = { 3610 .procname = "gc_interval", 3611 .maxlen = sizeof(int), 3612 .mode = 0644, 3613 .proc_handler = proc_dointvec_jiffies, 3614 }, 3615 [NEIGH_VAR_GC_THRESH1] = { 3616 .procname = "gc_thresh1", 3617 .maxlen = sizeof(int), 3618 .mode = 0644, 3619 .extra1 = SYSCTL_ZERO, 3620 .extra2 = SYSCTL_INT_MAX, 3621 .proc_handler = proc_dointvec_minmax, 3622 }, 3623 [NEIGH_VAR_GC_THRESH2] = { 3624 .procname = "gc_thresh2", 3625 .maxlen = sizeof(int), 3626 .mode = 0644, 3627 .extra1 = SYSCTL_ZERO, 3628 .extra2 = SYSCTL_INT_MAX, 3629 .proc_handler = proc_dointvec_minmax, 3630 }, 3631 [NEIGH_VAR_GC_THRESH3] = { 3632 .procname = "gc_thresh3", 3633 .maxlen = sizeof(int), 3634 .mode = 0644, 3635 .extra1 = SYSCTL_ZERO, 3636 .extra2 = SYSCTL_INT_MAX, 3637 .proc_handler = proc_dointvec_minmax, 3638 }, 3639 {}, 3640 }, 3641}; 3642 3643int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, 3644 proc_handler *handler) 3645{ 3646 int i; 3647 struct neigh_sysctl_table *t; 3648 const char *dev_name_source; 3649 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ]; 3650 char *p_name; 3651 3652 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL); 3653 if (!t) 3654 goto err; 3655 3656 for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) { 3657 t->neigh_vars[i].data += (long) p; 3658 t->neigh_vars[i].extra1 = dev; 3659 t->neigh_vars[i].extra2 = p; 3660 } 3661 3662 if (dev) { 3663 dev_name_source = dev->name; 3664 /* Terminate the table early */ 3665 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0, 3666 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL])); 3667 } else { 3668 struct neigh_table *tbl = p->tbl; 3669 dev_name_source = "default"; 3670 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = &tbl->gc_interval; 3671 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = &tbl->gc_thresh1; 3672 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = &tbl->gc_thresh2; 3673 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = &tbl->gc_thresh3; 3674 } 3675 3676 if (handler) { 3677 /* RetransTime */ 3678 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler; 3679 /* ReachableTime */ 3680 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler; 3681 /* RetransTime (in milliseconds)*/ 3682 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; 3683 /* ReachableTime (in milliseconds) */ 3684 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; 3685 } else { 3686 /* Those handlers will update p->reachable_time after 3687 * base_reachable_time(_ms) is set to ensure the new timer starts being 3688 * applied after the next neighbour update instead of waiting for 3689 * neigh_periodic_work to update its value (can be multiple minutes) 3690 * So any handler that replaces them should do this as well 3691 */ 3692 /* ReachableTime */ 3693 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = 3694 neigh_proc_base_reachable_time; 3695 /* ReachableTime (in milliseconds) */ 3696 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = 3697 neigh_proc_base_reachable_time; 3698 } 3699 3700 /* Don't export sysctls to unprivileged users */ 3701 if (neigh_parms_net(p)->user_ns != &init_user_ns) 3702 t->neigh_vars[0].procname = NULL; 3703 3704 switch (neigh_parms_family(p)) { 3705 case AF_INET: 3706 p_name = "ipv4"; 3707 break; 3708 case AF_INET6: 3709 p_name = "ipv6"; 3710 break; 3711 default: 3712 BUG(); 3713 } 3714 3715 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s", 3716 p_name, dev_name_source); 3717 t->sysctl_header = 3718 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars); 3719 if (!t->sysctl_header) 3720 goto free; 3721 3722 p->sysctl_table = t; 3723 return 0; 3724 3725free: 3726 kfree(t); 3727err: 3728 return -ENOBUFS; 3729} 3730EXPORT_SYMBOL(neigh_sysctl_register); 3731 3732void neigh_sysctl_unregister(struct neigh_parms *p) 3733{ 3734 if (p->sysctl_table) { 3735 struct neigh_sysctl_table *t = p->sysctl_table; 3736 p->sysctl_table = NULL; 3737 unregister_net_sysctl_table(t->sysctl_header); 3738 kfree(t); 3739 } 3740} 3741EXPORT_SYMBOL(neigh_sysctl_unregister); 3742 3743#endif /* CONFIG_SYSCTL */ 3744 3745static int __init neigh_init(void) 3746{ 3747 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); 3748 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); 3749 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, neigh_get, neigh_dump_info, 0); 3750 3751 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, 3752 0); 3753 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); 3754 3755 return 0; 3756} 3757 3758subsys_initcall(neigh_init); 3759