162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * inet fragments management 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Authors: Pavel Emelyanov <xemul@openvz.org> 662306a36Sopenharmony_ci * Started as consolidation of ipv4/ip_fragment.c, 762306a36Sopenharmony_ci * ipv6/reassembly. and ipv6 nf conntrack reassembly 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#include <linux/list.h> 1162306a36Sopenharmony_ci#include <linux/spinlock.h> 1262306a36Sopenharmony_ci#include <linux/module.h> 1362306a36Sopenharmony_ci#include <linux/timer.h> 1462306a36Sopenharmony_ci#include <linux/mm.h> 1562306a36Sopenharmony_ci#include <linux/random.h> 1662306a36Sopenharmony_ci#include <linux/skbuff.h> 1762306a36Sopenharmony_ci#include <linux/rtnetlink.h> 1862306a36Sopenharmony_ci#include <linux/slab.h> 1962306a36Sopenharmony_ci#include <linux/rhashtable.h> 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ci#include <net/sock.h> 2262306a36Sopenharmony_ci#include <net/inet_frag.h> 2362306a36Sopenharmony_ci#include <net/inet_ecn.h> 2462306a36Sopenharmony_ci#include <net/ip.h> 2562306a36Sopenharmony_ci#include <net/ipv6.h> 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_ci/* Use skb->cb to track consecutive/adjacent fragments coming at 2862306a36Sopenharmony_ci * the end of the queue. Nodes in the rb-tree queue will 2962306a36Sopenharmony_ci * contain "runs" of one or more adjacent fragments. 3062306a36Sopenharmony_ci * 3162306a36Sopenharmony_ci * Invariants: 3262306a36Sopenharmony_ci * - next_frag is NULL at the tail of a "run"; 3362306a36Sopenharmony_ci * - the head of a "run" has the sum of all fragment lengths in frag_run_len. 3462306a36Sopenharmony_ci */ 3562306a36Sopenharmony_cistruct ipfrag_skb_cb { 3662306a36Sopenharmony_ci union { 3762306a36Sopenharmony_ci struct inet_skb_parm h4; 3862306a36Sopenharmony_ci struct inet6_skb_parm h6; 3962306a36Sopenharmony_ci }; 4062306a36Sopenharmony_ci struct sk_buff *next_frag; 4162306a36Sopenharmony_ci int frag_run_len; 4262306a36Sopenharmony_ci}; 4362306a36Sopenharmony_ci 4462306a36Sopenharmony_ci#define FRAG_CB(skb) ((struct ipfrag_skb_cb *)((skb)->cb)) 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic void fragcb_clear(struct sk_buff *skb) 4762306a36Sopenharmony_ci{ 4862306a36Sopenharmony_ci RB_CLEAR_NODE(&skb->rbnode); 4962306a36Sopenharmony_ci FRAG_CB(skb)->next_frag = NULL; 5062306a36Sopenharmony_ci FRAG_CB(skb)->frag_run_len = skb->len; 5162306a36Sopenharmony_ci} 5262306a36Sopenharmony_ci 5362306a36Sopenharmony_ci/* Append skb to the last "run". */ 5462306a36Sopenharmony_cistatic void fragrun_append_to_last(struct inet_frag_queue *q, 5562306a36Sopenharmony_ci struct sk_buff *skb) 5662306a36Sopenharmony_ci{ 5762306a36Sopenharmony_ci fragcb_clear(skb); 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_ci FRAG_CB(q->last_run_head)->frag_run_len += skb->len; 6062306a36Sopenharmony_ci FRAG_CB(q->fragments_tail)->next_frag = skb; 6162306a36Sopenharmony_ci q->fragments_tail = skb; 6262306a36Sopenharmony_ci} 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_ci/* Create a new "run" with the skb. */ 6562306a36Sopenharmony_cistatic void fragrun_create(struct inet_frag_queue *q, struct sk_buff *skb) 6662306a36Sopenharmony_ci{ 6762306a36Sopenharmony_ci BUILD_BUG_ON(sizeof(struct ipfrag_skb_cb) > sizeof(skb->cb)); 6862306a36Sopenharmony_ci fragcb_clear(skb); 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci if (q->last_run_head) 7162306a36Sopenharmony_ci rb_link_node(&skb->rbnode, &q->last_run_head->rbnode, 7262306a36Sopenharmony_ci &q->last_run_head->rbnode.rb_right); 7362306a36Sopenharmony_ci else 7462306a36Sopenharmony_ci rb_link_node(&skb->rbnode, NULL, &q->rb_fragments.rb_node); 7562306a36Sopenharmony_ci rb_insert_color(&skb->rbnode, &q->rb_fragments); 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_ci q->fragments_tail = skb; 7862306a36Sopenharmony_ci q->last_run_head = skb; 7962306a36Sopenharmony_ci} 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_ci/* Given the OR values of all fragments, apply RFC 3168 5.3 requirements 8262306a36Sopenharmony_ci * Value : 0xff if frame should be dropped. 8362306a36Sopenharmony_ci * 0 or INET_ECN_CE value, to be ORed in to final iph->tos field 8462306a36Sopenharmony_ci */ 8562306a36Sopenharmony_ciconst u8 ip_frag_ecn_table[16] = { 8662306a36Sopenharmony_ci /* at least one fragment had CE, and others ECT_0 or ECT_1 */ 8762306a36Sopenharmony_ci [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = INET_ECN_CE, 8862306a36Sopenharmony_ci [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 8962306a36Sopenharmony_ci [IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = INET_ECN_CE, 9062306a36Sopenharmony_ci 9162306a36Sopenharmony_ci /* invalid combinations : drop frame */ 9262306a36Sopenharmony_ci [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE] = 0xff, 9362306a36Sopenharmony_ci [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0] = 0xff, 9462306a36Sopenharmony_ci [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_1] = 0xff, 9562306a36Sopenharmony_ci [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 9662306a36Sopenharmony_ci [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0] = 0xff, 9762306a36Sopenharmony_ci [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_1] = 0xff, 9862306a36Sopenharmony_ci [IPFRAG_ECN_NOT_ECT | IPFRAG_ECN_CE | IPFRAG_ECN_ECT_0 | IPFRAG_ECN_ECT_1] = 0xff, 9962306a36Sopenharmony_ci}; 10062306a36Sopenharmony_ciEXPORT_SYMBOL(ip_frag_ecn_table); 10162306a36Sopenharmony_ci 10262306a36Sopenharmony_ciint inet_frags_init(struct inet_frags *f) 10362306a36Sopenharmony_ci{ 10462306a36Sopenharmony_ci f->frags_cachep = kmem_cache_create(f->frags_cache_name, f->qsize, 0, 0, 10562306a36Sopenharmony_ci NULL); 10662306a36Sopenharmony_ci if (!f->frags_cachep) 10762306a36Sopenharmony_ci return -ENOMEM; 10862306a36Sopenharmony_ci 10962306a36Sopenharmony_ci refcount_set(&f->refcnt, 1); 11062306a36Sopenharmony_ci init_completion(&f->completion); 11162306a36Sopenharmony_ci return 0; 11262306a36Sopenharmony_ci} 11362306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frags_init); 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_civoid inet_frags_fini(struct inet_frags *f) 11662306a36Sopenharmony_ci{ 11762306a36Sopenharmony_ci if (refcount_dec_and_test(&f->refcnt)) 11862306a36Sopenharmony_ci complete(&f->completion); 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ci wait_for_completion(&f->completion); 12162306a36Sopenharmony_ci 12262306a36Sopenharmony_ci kmem_cache_destroy(f->frags_cachep); 12362306a36Sopenharmony_ci f->frags_cachep = NULL; 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frags_fini); 12662306a36Sopenharmony_ci 12762306a36Sopenharmony_ci/* called from rhashtable_free_and_destroy() at netns_frags dismantle */ 12862306a36Sopenharmony_cistatic void inet_frags_free_cb(void *ptr, void *arg) 12962306a36Sopenharmony_ci{ 13062306a36Sopenharmony_ci struct inet_frag_queue *fq = ptr; 13162306a36Sopenharmony_ci int count; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci count = del_timer_sync(&fq->timer) ? 1 : 0; 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_ci spin_lock_bh(&fq->lock); 13662306a36Sopenharmony_ci fq->flags |= INET_FRAG_DROP; 13762306a36Sopenharmony_ci if (!(fq->flags & INET_FRAG_COMPLETE)) { 13862306a36Sopenharmony_ci fq->flags |= INET_FRAG_COMPLETE; 13962306a36Sopenharmony_ci count++; 14062306a36Sopenharmony_ci } else if (fq->flags & INET_FRAG_HASH_DEAD) { 14162306a36Sopenharmony_ci count++; 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci spin_unlock_bh(&fq->lock); 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci if (refcount_sub_and_test(count, &fq->refcnt)) 14662306a36Sopenharmony_ci inet_frag_destroy(fq); 14762306a36Sopenharmony_ci} 14862306a36Sopenharmony_ci 14962306a36Sopenharmony_cistatic LLIST_HEAD(fqdir_free_list); 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_cistatic void fqdir_free_fn(struct work_struct *work) 15262306a36Sopenharmony_ci{ 15362306a36Sopenharmony_ci struct llist_node *kill_list; 15462306a36Sopenharmony_ci struct fqdir *fqdir, *tmp; 15562306a36Sopenharmony_ci struct inet_frags *f; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci /* Atomically snapshot the list of fqdirs to free */ 15862306a36Sopenharmony_ci kill_list = llist_del_all(&fqdir_free_list); 15962306a36Sopenharmony_ci 16062306a36Sopenharmony_ci /* We need to make sure all ongoing call_rcu(..., inet_frag_destroy_rcu) 16162306a36Sopenharmony_ci * have completed, since they need to dereference fqdir. 16262306a36Sopenharmony_ci * Would it not be nice to have kfree_rcu_barrier() ? :) 16362306a36Sopenharmony_ci */ 16462306a36Sopenharmony_ci rcu_barrier(); 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ci llist_for_each_entry_safe(fqdir, tmp, kill_list, free_list) { 16762306a36Sopenharmony_ci f = fqdir->f; 16862306a36Sopenharmony_ci if (refcount_dec_and_test(&f->refcnt)) 16962306a36Sopenharmony_ci complete(&f->completion); 17062306a36Sopenharmony_ci 17162306a36Sopenharmony_ci kfree(fqdir); 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci} 17462306a36Sopenharmony_ci 17562306a36Sopenharmony_cistatic DECLARE_WORK(fqdir_free_work, fqdir_free_fn); 17662306a36Sopenharmony_ci 17762306a36Sopenharmony_cistatic void fqdir_work_fn(struct work_struct *work) 17862306a36Sopenharmony_ci{ 17962306a36Sopenharmony_ci struct fqdir *fqdir = container_of(work, struct fqdir, destroy_work); 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL); 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci if (llist_add(&fqdir->free_list, &fqdir_free_list)) 18462306a36Sopenharmony_ci queue_work(system_wq, &fqdir_free_work); 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ciint fqdir_init(struct fqdir **fqdirp, struct inet_frags *f, struct net *net) 18862306a36Sopenharmony_ci{ 18962306a36Sopenharmony_ci struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL); 19062306a36Sopenharmony_ci int res; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci if (!fqdir) 19362306a36Sopenharmony_ci return -ENOMEM; 19462306a36Sopenharmony_ci fqdir->f = f; 19562306a36Sopenharmony_ci fqdir->net = net; 19662306a36Sopenharmony_ci res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params); 19762306a36Sopenharmony_ci if (res < 0) { 19862306a36Sopenharmony_ci kfree(fqdir); 19962306a36Sopenharmony_ci return res; 20062306a36Sopenharmony_ci } 20162306a36Sopenharmony_ci refcount_inc(&f->refcnt); 20262306a36Sopenharmony_ci *fqdirp = fqdir; 20362306a36Sopenharmony_ci return 0; 20462306a36Sopenharmony_ci} 20562306a36Sopenharmony_ciEXPORT_SYMBOL(fqdir_init); 20662306a36Sopenharmony_ci 20762306a36Sopenharmony_cistatic struct workqueue_struct *inet_frag_wq; 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_cistatic int __init inet_frag_wq_init(void) 21062306a36Sopenharmony_ci{ 21162306a36Sopenharmony_ci inet_frag_wq = create_workqueue("inet_frag_wq"); 21262306a36Sopenharmony_ci if (!inet_frag_wq) 21362306a36Sopenharmony_ci panic("Could not create inet frag workq"); 21462306a36Sopenharmony_ci return 0; 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_cipure_initcall(inet_frag_wq_init); 21862306a36Sopenharmony_ci 21962306a36Sopenharmony_civoid fqdir_exit(struct fqdir *fqdir) 22062306a36Sopenharmony_ci{ 22162306a36Sopenharmony_ci INIT_WORK(&fqdir->destroy_work, fqdir_work_fn); 22262306a36Sopenharmony_ci queue_work(inet_frag_wq, &fqdir->destroy_work); 22362306a36Sopenharmony_ci} 22462306a36Sopenharmony_ciEXPORT_SYMBOL(fqdir_exit); 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_civoid inet_frag_kill(struct inet_frag_queue *fq) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci if (del_timer(&fq->timer)) 22962306a36Sopenharmony_ci refcount_dec(&fq->refcnt); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci if (!(fq->flags & INET_FRAG_COMPLETE)) { 23262306a36Sopenharmony_ci struct fqdir *fqdir = fq->fqdir; 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci fq->flags |= INET_FRAG_COMPLETE; 23562306a36Sopenharmony_ci rcu_read_lock(); 23662306a36Sopenharmony_ci /* The RCU read lock provides a memory barrier 23762306a36Sopenharmony_ci * guaranteeing that if fqdir->dead is false then 23862306a36Sopenharmony_ci * the hash table destruction will not start until 23962306a36Sopenharmony_ci * after we unlock. Paired with fqdir_pre_exit(). 24062306a36Sopenharmony_ci */ 24162306a36Sopenharmony_ci if (!READ_ONCE(fqdir->dead)) { 24262306a36Sopenharmony_ci rhashtable_remove_fast(&fqdir->rhashtable, &fq->node, 24362306a36Sopenharmony_ci fqdir->f->rhash_params); 24462306a36Sopenharmony_ci refcount_dec(&fq->refcnt); 24562306a36Sopenharmony_ci } else { 24662306a36Sopenharmony_ci fq->flags |= INET_FRAG_HASH_DEAD; 24762306a36Sopenharmony_ci } 24862306a36Sopenharmony_ci rcu_read_unlock(); 24962306a36Sopenharmony_ci } 25062306a36Sopenharmony_ci} 25162306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_kill); 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cistatic void inet_frag_destroy_rcu(struct rcu_head *head) 25462306a36Sopenharmony_ci{ 25562306a36Sopenharmony_ci struct inet_frag_queue *q = container_of(head, struct inet_frag_queue, 25662306a36Sopenharmony_ci rcu); 25762306a36Sopenharmony_ci struct inet_frags *f = q->fqdir->f; 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_ci if (f->destructor) 26062306a36Sopenharmony_ci f->destructor(q); 26162306a36Sopenharmony_ci kmem_cache_free(f->frags_cachep, q); 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ciunsigned int inet_frag_rbtree_purge(struct rb_root *root, 26562306a36Sopenharmony_ci enum skb_drop_reason reason) 26662306a36Sopenharmony_ci{ 26762306a36Sopenharmony_ci struct rb_node *p = rb_first(root); 26862306a36Sopenharmony_ci unsigned int sum = 0; 26962306a36Sopenharmony_ci 27062306a36Sopenharmony_ci while (p) { 27162306a36Sopenharmony_ci struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_ci p = rb_next(p); 27462306a36Sopenharmony_ci rb_erase(&skb->rbnode, root); 27562306a36Sopenharmony_ci while (skb) { 27662306a36Sopenharmony_ci struct sk_buff *next = FRAG_CB(skb)->next_frag; 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci sum += skb->truesize; 27962306a36Sopenharmony_ci kfree_skb_reason(skb, reason); 28062306a36Sopenharmony_ci skb = next; 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci } 28362306a36Sopenharmony_ci return sum; 28462306a36Sopenharmony_ci} 28562306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_rbtree_purge); 28662306a36Sopenharmony_ci 28762306a36Sopenharmony_civoid inet_frag_destroy(struct inet_frag_queue *q) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci unsigned int sum, sum_truesize = 0; 29062306a36Sopenharmony_ci enum skb_drop_reason reason; 29162306a36Sopenharmony_ci struct inet_frags *f; 29262306a36Sopenharmony_ci struct fqdir *fqdir; 29362306a36Sopenharmony_ci 29462306a36Sopenharmony_ci WARN_ON(!(q->flags & INET_FRAG_COMPLETE)); 29562306a36Sopenharmony_ci reason = (q->flags & INET_FRAG_DROP) ? 29662306a36Sopenharmony_ci SKB_DROP_REASON_FRAG_REASM_TIMEOUT : 29762306a36Sopenharmony_ci SKB_CONSUMED; 29862306a36Sopenharmony_ci WARN_ON(del_timer(&q->timer) != 0); 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_ci /* Release all fragment data. */ 30162306a36Sopenharmony_ci fqdir = q->fqdir; 30262306a36Sopenharmony_ci f = fqdir->f; 30362306a36Sopenharmony_ci sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments, reason); 30462306a36Sopenharmony_ci sum = sum_truesize + f->qsize; 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci call_rcu(&q->rcu, inet_frag_destroy_rcu); 30762306a36Sopenharmony_ci 30862306a36Sopenharmony_ci sub_frag_mem_limit(fqdir, sum); 30962306a36Sopenharmony_ci} 31062306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_destroy); 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_cistatic struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir, 31362306a36Sopenharmony_ci struct inet_frags *f, 31462306a36Sopenharmony_ci void *arg) 31562306a36Sopenharmony_ci{ 31662306a36Sopenharmony_ci struct inet_frag_queue *q; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci q = kmem_cache_zalloc(f->frags_cachep, GFP_ATOMIC); 31962306a36Sopenharmony_ci if (!q) 32062306a36Sopenharmony_ci return NULL; 32162306a36Sopenharmony_ci 32262306a36Sopenharmony_ci q->fqdir = fqdir; 32362306a36Sopenharmony_ci f->constructor(q, arg); 32462306a36Sopenharmony_ci add_frag_mem_limit(fqdir, f->qsize); 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci timer_setup(&q->timer, f->frag_expire, 0); 32762306a36Sopenharmony_ci spin_lock_init(&q->lock); 32862306a36Sopenharmony_ci refcount_set(&q->refcnt, 3); 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci return q; 33162306a36Sopenharmony_ci} 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_cistatic struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir, 33462306a36Sopenharmony_ci void *arg, 33562306a36Sopenharmony_ci struct inet_frag_queue **prev) 33662306a36Sopenharmony_ci{ 33762306a36Sopenharmony_ci struct inet_frags *f = fqdir->f; 33862306a36Sopenharmony_ci struct inet_frag_queue *q; 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci q = inet_frag_alloc(fqdir, f, arg); 34162306a36Sopenharmony_ci if (!q) { 34262306a36Sopenharmony_ci *prev = ERR_PTR(-ENOMEM); 34362306a36Sopenharmony_ci return NULL; 34462306a36Sopenharmony_ci } 34562306a36Sopenharmony_ci mod_timer(&q->timer, jiffies + fqdir->timeout); 34662306a36Sopenharmony_ci 34762306a36Sopenharmony_ci *prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key, 34862306a36Sopenharmony_ci &q->node, f->rhash_params); 34962306a36Sopenharmony_ci if (*prev) { 35062306a36Sopenharmony_ci q->flags |= INET_FRAG_COMPLETE; 35162306a36Sopenharmony_ci inet_frag_kill(q); 35262306a36Sopenharmony_ci inet_frag_destroy(q); 35362306a36Sopenharmony_ci return NULL; 35462306a36Sopenharmony_ci } 35562306a36Sopenharmony_ci return q; 35662306a36Sopenharmony_ci} 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */ 35962306a36Sopenharmony_cistruct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key) 36062306a36Sopenharmony_ci{ 36162306a36Sopenharmony_ci /* This pairs with WRITE_ONCE() in fqdir_pre_exit(). */ 36262306a36Sopenharmony_ci long high_thresh = READ_ONCE(fqdir->high_thresh); 36362306a36Sopenharmony_ci struct inet_frag_queue *fq = NULL, *prev; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci if (!high_thresh || frag_mem_limit(fqdir) > high_thresh) 36662306a36Sopenharmony_ci return NULL; 36762306a36Sopenharmony_ci 36862306a36Sopenharmony_ci rcu_read_lock(); 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params); 37162306a36Sopenharmony_ci if (!prev) 37262306a36Sopenharmony_ci fq = inet_frag_create(fqdir, key, &prev); 37362306a36Sopenharmony_ci if (!IS_ERR_OR_NULL(prev)) { 37462306a36Sopenharmony_ci fq = prev; 37562306a36Sopenharmony_ci if (!refcount_inc_not_zero(&fq->refcnt)) 37662306a36Sopenharmony_ci fq = NULL; 37762306a36Sopenharmony_ci } 37862306a36Sopenharmony_ci rcu_read_unlock(); 37962306a36Sopenharmony_ci return fq; 38062306a36Sopenharmony_ci} 38162306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_find); 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ciint inet_frag_queue_insert(struct inet_frag_queue *q, struct sk_buff *skb, 38462306a36Sopenharmony_ci int offset, int end) 38562306a36Sopenharmony_ci{ 38662306a36Sopenharmony_ci struct sk_buff *last = q->fragments_tail; 38762306a36Sopenharmony_ci 38862306a36Sopenharmony_ci /* RFC5722, Section 4, amended by Errata ID : 3089 38962306a36Sopenharmony_ci * When reassembling an IPv6 datagram, if 39062306a36Sopenharmony_ci * one or more its constituent fragments is determined to be an 39162306a36Sopenharmony_ci * overlapping fragment, the entire datagram (and any constituent 39262306a36Sopenharmony_ci * fragments) MUST be silently discarded. 39362306a36Sopenharmony_ci * 39462306a36Sopenharmony_ci * Duplicates, however, should be ignored (i.e. skb dropped, but the 39562306a36Sopenharmony_ci * queue/fragments kept for later reassembly). 39662306a36Sopenharmony_ci */ 39762306a36Sopenharmony_ci if (!last) 39862306a36Sopenharmony_ci fragrun_create(q, skb); /* First fragment. */ 39962306a36Sopenharmony_ci else if (last->ip_defrag_offset + last->len < end) { 40062306a36Sopenharmony_ci /* This is the common case: skb goes to the end. */ 40162306a36Sopenharmony_ci /* Detect and discard overlaps. */ 40262306a36Sopenharmony_ci if (offset < last->ip_defrag_offset + last->len) 40362306a36Sopenharmony_ci return IPFRAG_OVERLAP; 40462306a36Sopenharmony_ci if (offset == last->ip_defrag_offset + last->len) 40562306a36Sopenharmony_ci fragrun_append_to_last(q, skb); 40662306a36Sopenharmony_ci else 40762306a36Sopenharmony_ci fragrun_create(q, skb); 40862306a36Sopenharmony_ci } else { 40962306a36Sopenharmony_ci /* Binary search. Note that skb can become the first fragment, 41062306a36Sopenharmony_ci * but not the last (covered above). 41162306a36Sopenharmony_ci */ 41262306a36Sopenharmony_ci struct rb_node **rbn, *parent; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci rbn = &q->rb_fragments.rb_node; 41562306a36Sopenharmony_ci do { 41662306a36Sopenharmony_ci struct sk_buff *curr; 41762306a36Sopenharmony_ci int curr_run_end; 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_ci parent = *rbn; 42062306a36Sopenharmony_ci curr = rb_to_skb(parent); 42162306a36Sopenharmony_ci curr_run_end = curr->ip_defrag_offset + 42262306a36Sopenharmony_ci FRAG_CB(curr)->frag_run_len; 42362306a36Sopenharmony_ci if (end <= curr->ip_defrag_offset) 42462306a36Sopenharmony_ci rbn = &parent->rb_left; 42562306a36Sopenharmony_ci else if (offset >= curr_run_end) 42662306a36Sopenharmony_ci rbn = &parent->rb_right; 42762306a36Sopenharmony_ci else if (offset >= curr->ip_defrag_offset && 42862306a36Sopenharmony_ci end <= curr_run_end) 42962306a36Sopenharmony_ci return IPFRAG_DUP; 43062306a36Sopenharmony_ci else 43162306a36Sopenharmony_ci return IPFRAG_OVERLAP; 43262306a36Sopenharmony_ci } while (*rbn); 43362306a36Sopenharmony_ci /* Here we have parent properly set, and rbn pointing to 43462306a36Sopenharmony_ci * one of its NULL left/right children. Insert skb. 43562306a36Sopenharmony_ci */ 43662306a36Sopenharmony_ci fragcb_clear(skb); 43762306a36Sopenharmony_ci rb_link_node(&skb->rbnode, parent, rbn); 43862306a36Sopenharmony_ci rb_insert_color(&skb->rbnode, &q->rb_fragments); 43962306a36Sopenharmony_ci } 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci skb->ip_defrag_offset = offset; 44262306a36Sopenharmony_ci 44362306a36Sopenharmony_ci return IPFRAG_OK; 44462306a36Sopenharmony_ci} 44562306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_queue_insert); 44662306a36Sopenharmony_ci 44762306a36Sopenharmony_civoid *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb, 44862306a36Sopenharmony_ci struct sk_buff *parent) 44962306a36Sopenharmony_ci{ 45062306a36Sopenharmony_ci struct sk_buff *fp, *head = skb_rb_first(&q->rb_fragments); 45162306a36Sopenharmony_ci struct sk_buff **nextp; 45262306a36Sopenharmony_ci int delta; 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_ci if (head != skb) { 45562306a36Sopenharmony_ci fp = skb_clone(skb, GFP_ATOMIC); 45662306a36Sopenharmony_ci if (!fp) 45762306a36Sopenharmony_ci return NULL; 45862306a36Sopenharmony_ci FRAG_CB(fp)->next_frag = FRAG_CB(skb)->next_frag; 45962306a36Sopenharmony_ci if (RB_EMPTY_NODE(&skb->rbnode)) 46062306a36Sopenharmony_ci FRAG_CB(parent)->next_frag = fp; 46162306a36Sopenharmony_ci else 46262306a36Sopenharmony_ci rb_replace_node(&skb->rbnode, &fp->rbnode, 46362306a36Sopenharmony_ci &q->rb_fragments); 46462306a36Sopenharmony_ci if (q->fragments_tail == skb) 46562306a36Sopenharmony_ci q->fragments_tail = fp; 46662306a36Sopenharmony_ci skb_morph(skb, head); 46762306a36Sopenharmony_ci FRAG_CB(skb)->next_frag = FRAG_CB(head)->next_frag; 46862306a36Sopenharmony_ci rb_replace_node(&head->rbnode, &skb->rbnode, 46962306a36Sopenharmony_ci &q->rb_fragments); 47062306a36Sopenharmony_ci consume_skb(head); 47162306a36Sopenharmony_ci head = skb; 47262306a36Sopenharmony_ci } 47362306a36Sopenharmony_ci WARN_ON(head->ip_defrag_offset != 0); 47462306a36Sopenharmony_ci 47562306a36Sopenharmony_ci delta = -head->truesize; 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci /* Head of list must not be cloned. */ 47862306a36Sopenharmony_ci if (skb_unclone(head, GFP_ATOMIC)) 47962306a36Sopenharmony_ci return NULL; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci delta += head->truesize; 48262306a36Sopenharmony_ci if (delta) 48362306a36Sopenharmony_ci add_frag_mem_limit(q->fqdir, delta); 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci /* If the first fragment is fragmented itself, we split 48662306a36Sopenharmony_ci * it to two chunks: the first with data and paged part 48762306a36Sopenharmony_ci * and the second, holding only fragments. 48862306a36Sopenharmony_ci */ 48962306a36Sopenharmony_ci if (skb_has_frag_list(head)) { 49062306a36Sopenharmony_ci struct sk_buff *clone; 49162306a36Sopenharmony_ci int i, plen = 0; 49262306a36Sopenharmony_ci 49362306a36Sopenharmony_ci clone = alloc_skb(0, GFP_ATOMIC); 49462306a36Sopenharmony_ci if (!clone) 49562306a36Sopenharmony_ci return NULL; 49662306a36Sopenharmony_ci skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; 49762306a36Sopenharmony_ci skb_frag_list_init(head); 49862306a36Sopenharmony_ci for (i = 0; i < skb_shinfo(head)->nr_frags; i++) 49962306a36Sopenharmony_ci plen += skb_frag_size(&skb_shinfo(head)->frags[i]); 50062306a36Sopenharmony_ci clone->data_len = head->data_len - plen; 50162306a36Sopenharmony_ci clone->len = clone->data_len; 50262306a36Sopenharmony_ci head->truesize += clone->truesize; 50362306a36Sopenharmony_ci clone->csum = 0; 50462306a36Sopenharmony_ci clone->ip_summed = head->ip_summed; 50562306a36Sopenharmony_ci add_frag_mem_limit(q->fqdir, clone->truesize); 50662306a36Sopenharmony_ci skb_shinfo(head)->frag_list = clone; 50762306a36Sopenharmony_ci nextp = &clone->next; 50862306a36Sopenharmony_ci } else { 50962306a36Sopenharmony_ci nextp = &skb_shinfo(head)->frag_list; 51062306a36Sopenharmony_ci } 51162306a36Sopenharmony_ci 51262306a36Sopenharmony_ci return nextp; 51362306a36Sopenharmony_ci} 51462306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_reasm_prepare); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_civoid inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head, 51762306a36Sopenharmony_ci void *reasm_data, bool try_coalesce) 51862306a36Sopenharmony_ci{ 51962306a36Sopenharmony_ci struct sk_buff **nextp = reasm_data; 52062306a36Sopenharmony_ci struct rb_node *rbn; 52162306a36Sopenharmony_ci struct sk_buff *fp; 52262306a36Sopenharmony_ci int sum_truesize; 52362306a36Sopenharmony_ci 52462306a36Sopenharmony_ci skb_push(head, head->data - skb_network_header(head)); 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci /* Traverse the tree in order, to build frag_list. */ 52762306a36Sopenharmony_ci fp = FRAG_CB(head)->next_frag; 52862306a36Sopenharmony_ci rbn = rb_next(&head->rbnode); 52962306a36Sopenharmony_ci rb_erase(&head->rbnode, &q->rb_fragments); 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci sum_truesize = head->truesize; 53262306a36Sopenharmony_ci while (rbn || fp) { 53362306a36Sopenharmony_ci /* fp points to the next sk_buff in the current run; 53462306a36Sopenharmony_ci * rbn points to the next run. 53562306a36Sopenharmony_ci */ 53662306a36Sopenharmony_ci /* Go through the current run. */ 53762306a36Sopenharmony_ci while (fp) { 53862306a36Sopenharmony_ci struct sk_buff *next_frag = FRAG_CB(fp)->next_frag; 53962306a36Sopenharmony_ci bool stolen; 54062306a36Sopenharmony_ci int delta; 54162306a36Sopenharmony_ci 54262306a36Sopenharmony_ci sum_truesize += fp->truesize; 54362306a36Sopenharmony_ci if (head->ip_summed != fp->ip_summed) 54462306a36Sopenharmony_ci head->ip_summed = CHECKSUM_NONE; 54562306a36Sopenharmony_ci else if (head->ip_summed == CHECKSUM_COMPLETE) 54662306a36Sopenharmony_ci head->csum = csum_add(head->csum, fp->csum); 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_ci if (try_coalesce && skb_try_coalesce(head, fp, &stolen, 54962306a36Sopenharmony_ci &delta)) { 55062306a36Sopenharmony_ci kfree_skb_partial(fp, stolen); 55162306a36Sopenharmony_ci } else { 55262306a36Sopenharmony_ci fp->prev = NULL; 55362306a36Sopenharmony_ci memset(&fp->rbnode, 0, sizeof(fp->rbnode)); 55462306a36Sopenharmony_ci fp->sk = NULL; 55562306a36Sopenharmony_ci 55662306a36Sopenharmony_ci head->data_len += fp->len; 55762306a36Sopenharmony_ci head->len += fp->len; 55862306a36Sopenharmony_ci head->truesize += fp->truesize; 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci *nextp = fp; 56162306a36Sopenharmony_ci nextp = &fp->next; 56262306a36Sopenharmony_ci } 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci fp = next_frag; 56562306a36Sopenharmony_ci } 56662306a36Sopenharmony_ci /* Move to the next run. */ 56762306a36Sopenharmony_ci if (rbn) { 56862306a36Sopenharmony_ci struct rb_node *rbnext = rb_next(rbn); 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci fp = rb_to_skb(rbn); 57162306a36Sopenharmony_ci rb_erase(rbn, &q->rb_fragments); 57262306a36Sopenharmony_ci rbn = rbnext; 57362306a36Sopenharmony_ci } 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ci sub_frag_mem_limit(q->fqdir, sum_truesize); 57662306a36Sopenharmony_ci 57762306a36Sopenharmony_ci *nextp = NULL; 57862306a36Sopenharmony_ci skb_mark_not_on_list(head); 57962306a36Sopenharmony_ci head->prev = NULL; 58062306a36Sopenharmony_ci head->tstamp = q->stamp; 58162306a36Sopenharmony_ci head->mono_delivery_time = q->mono_delivery_time; 58262306a36Sopenharmony_ci} 58362306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_reasm_finish); 58462306a36Sopenharmony_ci 58562306a36Sopenharmony_cistruct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q) 58662306a36Sopenharmony_ci{ 58762306a36Sopenharmony_ci struct sk_buff *head, *skb; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci head = skb_rb_first(&q->rb_fragments); 59062306a36Sopenharmony_ci if (!head) 59162306a36Sopenharmony_ci return NULL; 59262306a36Sopenharmony_ci skb = FRAG_CB(head)->next_frag; 59362306a36Sopenharmony_ci if (skb) 59462306a36Sopenharmony_ci rb_replace_node(&head->rbnode, &skb->rbnode, 59562306a36Sopenharmony_ci &q->rb_fragments); 59662306a36Sopenharmony_ci else 59762306a36Sopenharmony_ci rb_erase(&head->rbnode, &q->rb_fragments); 59862306a36Sopenharmony_ci memset(&head->rbnode, 0, sizeof(head->rbnode)); 59962306a36Sopenharmony_ci barrier(); 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci if (head == q->fragments_tail) 60262306a36Sopenharmony_ci q->fragments_tail = NULL; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci sub_frag_mem_limit(q->fqdir, head->truesize); 60562306a36Sopenharmony_ci 60662306a36Sopenharmony_ci return head; 60762306a36Sopenharmony_ci} 60862306a36Sopenharmony_ciEXPORT_SYMBOL(inet_frag_pull_head); 609