18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 28c2ecf20Sopenharmony_ci/* net/sched/sch_hhf.c Heavy-Hitter Filter (HHF) 38c2ecf20Sopenharmony_ci * 48c2ecf20Sopenharmony_ci * Copyright (C) 2013 Terry Lam <vtlam@google.com> 58c2ecf20Sopenharmony_ci * Copyright (C) 2013 Nandita Dukkipati <nanditad@google.com> 68c2ecf20Sopenharmony_ci */ 78c2ecf20Sopenharmony_ci 88c2ecf20Sopenharmony_ci#include <linux/jiffies.h> 98c2ecf20Sopenharmony_ci#include <linux/module.h> 108c2ecf20Sopenharmony_ci#include <linux/skbuff.h> 118c2ecf20Sopenharmony_ci#include <linux/vmalloc.h> 128c2ecf20Sopenharmony_ci#include <linux/siphash.h> 138c2ecf20Sopenharmony_ci#include <net/pkt_sched.h> 148c2ecf20Sopenharmony_ci#include <net/sock.h> 158c2ecf20Sopenharmony_ci 168c2ecf20Sopenharmony_ci/* Heavy-Hitter Filter (HHF) 178c2ecf20Sopenharmony_ci * 188c2ecf20Sopenharmony_ci * Principles : 198c2ecf20Sopenharmony_ci * Flows are classified into two buckets: non-heavy-hitter and heavy-hitter 208c2ecf20Sopenharmony_ci * buckets. Initially, a new flow starts as non-heavy-hitter. Once classified 218c2ecf20Sopenharmony_ci * as heavy-hitter, it is immediately switched to the heavy-hitter bucket. 228c2ecf20Sopenharmony_ci * The buckets are dequeued by a Weighted Deficit Round Robin (WDRR) scheduler, 238c2ecf20Sopenharmony_ci * in which the heavy-hitter bucket is served with less weight. 248c2ecf20Sopenharmony_ci * In other words, non-heavy-hitters (e.g., short bursts of critical traffic) 258c2ecf20Sopenharmony_ci * are isolated from heavy-hitters (e.g., persistent bulk traffic) and also have 268c2ecf20Sopenharmony_ci * higher share of bandwidth. 278c2ecf20Sopenharmony_ci * 288c2ecf20Sopenharmony_ci * To capture heavy-hitters, we use the "multi-stage filter" algorithm in the 298c2ecf20Sopenharmony_ci * following paper: 308c2ecf20Sopenharmony_ci * [EV02] C. Estan and G. Varghese, "New Directions in Traffic Measurement and 318c2ecf20Sopenharmony_ci * Accounting", in ACM SIGCOMM, 2002. 328c2ecf20Sopenharmony_ci * 338c2ecf20Sopenharmony_ci * Conceptually, a multi-stage filter comprises k independent hash functions 348c2ecf20Sopenharmony_ci * and k counter arrays. Packets are indexed into k counter arrays by k hash 358c2ecf20Sopenharmony_ci * functions, respectively. The counters are then increased by the packet sizes. 368c2ecf20Sopenharmony_ci * Therefore, 378c2ecf20Sopenharmony_ci * - For a heavy-hitter flow: *all* of its k array counters must be large. 388c2ecf20Sopenharmony_ci * - For a non-heavy-hitter flow: some of its k array counters can be large 398c2ecf20Sopenharmony_ci * due to hash collision with other small flows; however, with high 408c2ecf20Sopenharmony_ci * probability, not *all* k counters are large. 418c2ecf20Sopenharmony_ci * 428c2ecf20Sopenharmony_ci * By the design of the multi-stage filter algorithm, the false negative rate 438c2ecf20Sopenharmony_ci * (heavy-hitters getting away uncaptured) is zero. However, the algorithm is 448c2ecf20Sopenharmony_ci * susceptible to false positives (non-heavy-hitters mistakenly classified as 458c2ecf20Sopenharmony_ci * heavy-hitters). 468c2ecf20Sopenharmony_ci * Therefore, we also implement the following optimizations to reduce false 478c2ecf20Sopenharmony_ci * positives by avoiding unnecessary increment of the counter values: 488c2ecf20Sopenharmony_ci * - Optimization O1: once a heavy-hitter is identified, its bytes are not 498c2ecf20Sopenharmony_ci * accounted in the array counters. This technique is called "shielding" 508c2ecf20Sopenharmony_ci * in Section 3.3.1 of [EV02]. 518c2ecf20Sopenharmony_ci * - Optimization O2: conservative update of counters 528c2ecf20Sopenharmony_ci * (Section 3.3.2 of [EV02]), 538c2ecf20Sopenharmony_ci * New counter value = max {old counter value, 548c2ecf20Sopenharmony_ci * smallest counter value + packet bytes} 558c2ecf20Sopenharmony_ci * 568c2ecf20Sopenharmony_ci * Finally, we refresh the counters periodically since otherwise the counter 578c2ecf20Sopenharmony_ci * values will keep accumulating. 588c2ecf20Sopenharmony_ci * 598c2ecf20Sopenharmony_ci * Once a flow is classified as heavy-hitter, we also save its per-flow state 608c2ecf20Sopenharmony_ci * in an exact-matching flow table so that its subsequent packets can be 618c2ecf20Sopenharmony_ci * dispatched to the heavy-hitter bucket accordingly. 628c2ecf20Sopenharmony_ci * 638c2ecf20Sopenharmony_ci * 648c2ecf20Sopenharmony_ci * At a high level, this qdisc works as follows: 658c2ecf20Sopenharmony_ci * Given a packet p: 668c2ecf20Sopenharmony_ci * - If the flow-id of p (e.g., TCP 5-tuple) is already in the exact-matching 678c2ecf20Sopenharmony_ci * heavy-hitter flow table, denoted table T, then send p to the heavy-hitter 688c2ecf20Sopenharmony_ci * bucket. 698c2ecf20Sopenharmony_ci * - Otherwise, forward p to the multi-stage filter, denoted filter F 708c2ecf20Sopenharmony_ci * + If F decides that p belongs to a non-heavy-hitter flow, then send p 718c2ecf20Sopenharmony_ci * to the non-heavy-hitter bucket. 728c2ecf20Sopenharmony_ci * + Otherwise, if F decides that p belongs to a new heavy-hitter flow, 738c2ecf20Sopenharmony_ci * then set up a new flow entry for the flow-id of p in the table T and 748c2ecf20Sopenharmony_ci * send p to the heavy-hitter bucket. 758c2ecf20Sopenharmony_ci * 768c2ecf20Sopenharmony_ci * In this implementation: 778c2ecf20Sopenharmony_ci * - T is a fixed-size hash-table with 1024 entries. Hash collision is 788c2ecf20Sopenharmony_ci * resolved by linked-list chaining. 798c2ecf20Sopenharmony_ci * - F has four counter arrays, each array containing 1024 32-bit counters. 808c2ecf20Sopenharmony_ci * That means 4 * 1024 * 32 bits = 16KB of memory. 818c2ecf20Sopenharmony_ci * - Since each array in F contains 1024 counters, 10 bits are sufficient to 828c2ecf20Sopenharmony_ci * index into each array. 838c2ecf20Sopenharmony_ci * Hence, instead of having four hash functions, we chop the 32-bit 848c2ecf20Sopenharmony_ci * skb-hash into three 10-bit chunks, and the remaining 10-bit chunk is 858c2ecf20Sopenharmony_ci * computed as XOR sum of those three chunks. 868c2ecf20Sopenharmony_ci * - We need to clear the counter arrays periodically; however, directly 878c2ecf20Sopenharmony_ci * memsetting 16KB of memory can lead to cache eviction and unwanted delay. 888c2ecf20Sopenharmony_ci * So by representing each counter by a valid bit, we only need to reset 898c2ecf20Sopenharmony_ci * 4K of 1 bit (i.e. 512 bytes) instead of 16KB of memory. 908c2ecf20Sopenharmony_ci * - The Deficit Round Robin engine is taken from fq_codel implementation 918c2ecf20Sopenharmony_ci * (net/sched/sch_fq_codel.c). Note that wdrr_bucket corresponds to 928c2ecf20Sopenharmony_ci * fq_codel_flow in fq_codel implementation. 938c2ecf20Sopenharmony_ci * 948c2ecf20Sopenharmony_ci */ 958c2ecf20Sopenharmony_ci 968c2ecf20Sopenharmony_ci/* Non-configurable parameters */ 978c2ecf20Sopenharmony_ci#define HH_FLOWS_CNT 1024 /* number of entries in exact-matching table T */ 988c2ecf20Sopenharmony_ci#define HHF_ARRAYS_CNT 4 /* number of arrays in multi-stage filter F */ 998c2ecf20Sopenharmony_ci#define HHF_ARRAYS_LEN 1024 /* number of counters in each array of F */ 1008c2ecf20Sopenharmony_ci#define HHF_BIT_MASK_LEN 10 /* masking 10 bits */ 1018c2ecf20Sopenharmony_ci#define HHF_BIT_MASK 0x3FF /* bitmask of 10 bits */ 1028c2ecf20Sopenharmony_ci 1038c2ecf20Sopenharmony_ci#define WDRR_BUCKET_CNT 2 /* two buckets for Weighted DRR */ 1048c2ecf20Sopenharmony_cienum wdrr_bucket_idx { 1058c2ecf20Sopenharmony_ci WDRR_BUCKET_FOR_HH = 0, /* bucket id for heavy-hitters */ 1068c2ecf20Sopenharmony_ci WDRR_BUCKET_FOR_NON_HH = 1 /* bucket id for non-heavy-hitters */ 1078c2ecf20Sopenharmony_ci}; 1088c2ecf20Sopenharmony_ci 1098c2ecf20Sopenharmony_ci#define hhf_time_before(a, b) \ 1108c2ecf20Sopenharmony_ci (typecheck(u32, a) && typecheck(u32, b) && ((s32)((a) - (b)) < 0)) 1118c2ecf20Sopenharmony_ci 1128c2ecf20Sopenharmony_ci/* Heavy-hitter per-flow state */ 1138c2ecf20Sopenharmony_cistruct hh_flow_state { 1148c2ecf20Sopenharmony_ci u32 hash_id; /* hash of flow-id (e.g. TCP 5-tuple) */ 1158c2ecf20Sopenharmony_ci u32 hit_timestamp; /* last time heavy-hitter was seen */ 1168c2ecf20Sopenharmony_ci struct list_head flowchain; /* chaining under hash collision */ 1178c2ecf20Sopenharmony_ci}; 1188c2ecf20Sopenharmony_ci 1198c2ecf20Sopenharmony_ci/* Weighted Deficit Round Robin (WDRR) scheduler */ 1208c2ecf20Sopenharmony_cistruct wdrr_bucket { 1218c2ecf20Sopenharmony_ci struct sk_buff *head; 1228c2ecf20Sopenharmony_ci struct sk_buff *tail; 1238c2ecf20Sopenharmony_ci struct list_head bucketchain; 1248c2ecf20Sopenharmony_ci int deficit; 1258c2ecf20Sopenharmony_ci}; 1268c2ecf20Sopenharmony_ci 1278c2ecf20Sopenharmony_cistruct hhf_sched_data { 1288c2ecf20Sopenharmony_ci struct wdrr_bucket buckets[WDRR_BUCKET_CNT]; 1298c2ecf20Sopenharmony_ci siphash_key_t perturbation; /* hash perturbation */ 1308c2ecf20Sopenharmony_ci u32 quantum; /* psched_mtu(qdisc_dev(sch)); */ 1318c2ecf20Sopenharmony_ci u32 drop_overlimit; /* number of times max qdisc packet 1328c2ecf20Sopenharmony_ci * limit was hit 1338c2ecf20Sopenharmony_ci */ 1348c2ecf20Sopenharmony_ci struct list_head *hh_flows; /* table T (currently active HHs) */ 1358c2ecf20Sopenharmony_ci u32 hh_flows_limit; /* max active HH allocs */ 1368c2ecf20Sopenharmony_ci u32 hh_flows_overlimit; /* num of disallowed HH allocs */ 1378c2ecf20Sopenharmony_ci u32 hh_flows_total_cnt; /* total admitted HHs */ 1388c2ecf20Sopenharmony_ci u32 hh_flows_current_cnt; /* total current HHs */ 1398c2ecf20Sopenharmony_ci u32 *hhf_arrays[HHF_ARRAYS_CNT]; /* HH filter F */ 1408c2ecf20Sopenharmony_ci u32 hhf_arrays_reset_timestamp; /* last time hhf_arrays 1418c2ecf20Sopenharmony_ci * was reset 1428c2ecf20Sopenharmony_ci */ 1438c2ecf20Sopenharmony_ci unsigned long *hhf_valid_bits[HHF_ARRAYS_CNT]; /* shadow valid bits 1448c2ecf20Sopenharmony_ci * of hhf_arrays 1458c2ecf20Sopenharmony_ci */ 1468c2ecf20Sopenharmony_ci /* Similar to the "new_flows" vs. "old_flows" concept in fq_codel DRR */ 1478c2ecf20Sopenharmony_ci struct list_head new_buckets; /* list of new buckets */ 1488c2ecf20Sopenharmony_ci struct list_head old_buckets; /* list of old buckets */ 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci /* Configurable HHF parameters */ 1518c2ecf20Sopenharmony_ci u32 hhf_reset_timeout; /* interval to reset counter 1528c2ecf20Sopenharmony_ci * arrays in filter F 1538c2ecf20Sopenharmony_ci * (default 40ms) 1548c2ecf20Sopenharmony_ci */ 1558c2ecf20Sopenharmony_ci u32 hhf_admit_bytes; /* counter thresh to classify as 1568c2ecf20Sopenharmony_ci * HH (default 128KB). 1578c2ecf20Sopenharmony_ci * With these default values, 1588c2ecf20Sopenharmony_ci * 128KB / 40ms = 25 Mbps 1598c2ecf20Sopenharmony_ci * i.e., we expect to capture HHs 1608c2ecf20Sopenharmony_ci * sending > 25 Mbps. 1618c2ecf20Sopenharmony_ci */ 1628c2ecf20Sopenharmony_ci u32 hhf_evict_timeout; /* aging threshold to evict idle 1638c2ecf20Sopenharmony_ci * HHs out of table T. This should 1648c2ecf20Sopenharmony_ci * be large enough to avoid 1658c2ecf20Sopenharmony_ci * reordering during HH eviction. 1668c2ecf20Sopenharmony_ci * (default 1s) 1678c2ecf20Sopenharmony_ci */ 1688c2ecf20Sopenharmony_ci u32 hhf_non_hh_weight; /* WDRR weight for non-HHs 1698c2ecf20Sopenharmony_ci * (default 2, 1708c2ecf20Sopenharmony_ci * i.e., non-HH : HH = 2 : 1) 1718c2ecf20Sopenharmony_ci */ 1728c2ecf20Sopenharmony_ci}; 1738c2ecf20Sopenharmony_ci 1748c2ecf20Sopenharmony_cistatic u32 hhf_time_stamp(void) 1758c2ecf20Sopenharmony_ci{ 1768c2ecf20Sopenharmony_ci return jiffies; 1778c2ecf20Sopenharmony_ci} 1788c2ecf20Sopenharmony_ci 1798c2ecf20Sopenharmony_ci/* Looks up a heavy-hitter flow in a chaining list of table T. */ 1808c2ecf20Sopenharmony_cistatic struct hh_flow_state *seek_list(const u32 hash, 1818c2ecf20Sopenharmony_ci struct list_head *head, 1828c2ecf20Sopenharmony_ci struct hhf_sched_data *q) 1838c2ecf20Sopenharmony_ci{ 1848c2ecf20Sopenharmony_ci struct hh_flow_state *flow, *next; 1858c2ecf20Sopenharmony_ci u32 now = hhf_time_stamp(); 1868c2ecf20Sopenharmony_ci 1878c2ecf20Sopenharmony_ci if (list_empty(head)) 1888c2ecf20Sopenharmony_ci return NULL; 1898c2ecf20Sopenharmony_ci 1908c2ecf20Sopenharmony_ci list_for_each_entry_safe(flow, next, head, flowchain) { 1918c2ecf20Sopenharmony_ci u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; 1928c2ecf20Sopenharmony_ci 1938c2ecf20Sopenharmony_ci if (hhf_time_before(prev, now)) { 1948c2ecf20Sopenharmony_ci /* Delete expired heavy-hitters, but preserve one entry 1958c2ecf20Sopenharmony_ci * to avoid kzalloc() when next time this slot is hit. 1968c2ecf20Sopenharmony_ci */ 1978c2ecf20Sopenharmony_ci if (list_is_last(&flow->flowchain, head)) 1988c2ecf20Sopenharmony_ci return NULL; 1998c2ecf20Sopenharmony_ci list_del(&flow->flowchain); 2008c2ecf20Sopenharmony_ci kfree(flow); 2018c2ecf20Sopenharmony_ci q->hh_flows_current_cnt--; 2028c2ecf20Sopenharmony_ci } else if (flow->hash_id == hash) { 2038c2ecf20Sopenharmony_ci return flow; 2048c2ecf20Sopenharmony_ci } 2058c2ecf20Sopenharmony_ci } 2068c2ecf20Sopenharmony_ci return NULL; 2078c2ecf20Sopenharmony_ci} 2088c2ecf20Sopenharmony_ci 2098c2ecf20Sopenharmony_ci/* Returns a flow state entry for a new heavy-hitter. Either reuses an expired 2108c2ecf20Sopenharmony_ci * entry or dynamically alloc a new entry. 2118c2ecf20Sopenharmony_ci */ 2128c2ecf20Sopenharmony_cistatic struct hh_flow_state *alloc_new_hh(struct list_head *head, 2138c2ecf20Sopenharmony_ci struct hhf_sched_data *q) 2148c2ecf20Sopenharmony_ci{ 2158c2ecf20Sopenharmony_ci struct hh_flow_state *flow; 2168c2ecf20Sopenharmony_ci u32 now = hhf_time_stamp(); 2178c2ecf20Sopenharmony_ci 2188c2ecf20Sopenharmony_ci if (!list_empty(head)) { 2198c2ecf20Sopenharmony_ci /* Find an expired heavy-hitter flow entry. */ 2208c2ecf20Sopenharmony_ci list_for_each_entry(flow, head, flowchain) { 2218c2ecf20Sopenharmony_ci u32 prev = flow->hit_timestamp + q->hhf_evict_timeout; 2228c2ecf20Sopenharmony_ci 2238c2ecf20Sopenharmony_ci if (hhf_time_before(prev, now)) 2248c2ecf20Sopenharmony_ci return flow; 2258c2ecf20Sopenharmony_ci } 2268c2ecf20Sopenharmony_ci } 2278c2ecf20Sopenharmony_ci 2288c2ecf20Sopenharmony_ci if (q->hh_flows_current_cnt >= q->hh_flows_limit) { 2298c2ecf20Sopenharmony_ci q->hh_flows_overlimit++; 2308c2ecf20Sopenharmony_ci return NULL; 2318c2ecf20Sopenharmony_ci } 2328c2ecf20Sopenharmony_ci /* Create new entry. */ 2338c2ecf20Sopenharmony_ci flow = kzalloc(sizeof(struct hh_flow_state), GFP_ATOMIC); 2348c2ecf20Sopenharmony_ci if (!flow) 2358c2ecf20Sopenharmony_ci return NULL; 2368c2ecf20Sopenharmony_ci 2378c2ecf20Sopenharmony_ci q->hh_flows_current_cnt++; 2388c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&flow->flowchain); 2398c2ecf20Sopenharmony_ci list_add_tail(&flow->flowchain, head); 2408c2ecf20Sopenharmony_ci 2418c2ecf20Sopenharmony_ci return flow; 2428c2ecf20Sopenharmony_ci} 2438c2ecf20Sopenharmony_ci 2448c2ecf20Sopenharmony_ci/* Assigns packets to WDRR buckets. Implements a multi-stage filter to 2458c2ecf20Sopenharmony_ci * classify heavy-hitters. 2468c2ecf20Sopenharmony_ci */ 2478c2ecf20Sopenharmony_cistatic enum wdrr_bucket_idx hhf_classify(struct sk_buff *skb, struct Qdisc *sch) 2488c2ecf20Sopenharmony_ci{ 2498c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 2508c2ecf20Sopenharmony_ci u32 tmp_hash, hash; 2518c2ecf20Sopenharmony_ci u32 xorsum, filter_pos[HHF_ARRAYS_CNT], flow_pos; 2528c2ecf20Sopenharmony_ci struct hh_flow_state *flow; 2538c2ecf20Sopenharmony_ci u32 pkt_len, min_hhf_val; 2548c2ecf20Sopenharmony_ci int i; 2558c2ecf20Sopenharmony_ci u32 prev; 2568c2ecf20Sopenharmony_ci u32 now = hhf_time_stamp(); 2578c2ecf20Sopenharmony_ci 2588c2ecf20Sopenharmony_ci /* Reset the HHF counter arrays if this is the right time. */ 2598c2ecf20Sopenharmony_ci prev = q->hhf_arrays_reset_timestamp + q->hhf_reset_timeout; 2608c2ecf20Sopenharmony_ci if (hhf_time_before(prev, now)) { 2618c2ecf20Sopenharmony_ci for (i = 0; i < HHF_ARRAYS_CNT; i++) 2628c2ecf20Sopenharmony_ci bitmap_zero(q->hhf_valid_bits[i], HHF_ARRAYS_LEN); 2638c2ecf20Sopenharmony_ci q->hhf_arrays_reset_timestamp = now; 2648c2ecf20Sopenharmony_ci } 2658c2ecf20Sopenharmony_ci 2668c2ecf20Sopenharmony_ci /* Get hashed flow-id of the skb. */ 2678c2ecf20Sopenharmony_ci hash = skb_get_hash_perturb(skb, &q->perturbation); 2688c2ecf20Sopenharmony_ci 2698c2ecf20Sopenharmony_ci /* Check if this packet belongs to an already established HH flow. */ 2708c2ecf20Sopenharmony_ci flow_pos = hash & HHF_BIT_MASK; 2718c2ecf20Sopenharmony_ci flow = seek_list(hash, &q->hh_flows[flow_pos], q); 2728c2ecf20Sopenharmony_ci if (flow) { /* found its HH flow */ 2738c2ecf20Sopenharmony_ci flow->hit_timestamp = now; 2748c2ecf20Sopenharmony_ci return WDRR_BUCKET_FOR_HH; 2758c2ecf20Sopenharmony_ci } 2768c2ecf20Sopenharmony_ci 2778c2ecf20Sopenharmony_ci /* Now pass the packet through the multi-stage filter. */ 2788c2ecf20Sopenharmony_ci tmp_hash = hash; 2798c2ecf20Sopenharmony_ci xorsum = 0; 2808c2ecf20Sopenharmony_ci for (i = 0; i < HHF_ARRAYS_CNT - 1; i++) { 2818c2ecf20Sopenharmony_ci /* Split the skb_hash into three 10-bit chunks. */ 2828c2ecf20Sopenharmony_ci filter_pos[i] = tmp_hash & HHF_BIT_MASK; 2838c2ecf20Sopenharmony_ci xorsum ^= filter_pos[i]; 2848c2ecf20Sopenharmony_ci tmp_hash >>= HHF_BIT_MASK_LEN; 2858c2ecf20Sopenharmony_ci } 2868c2ecf20Sopenharmony_ci /* The last chunk is computed as XOR sum of other chunks. */ 2878c2ecf20Sopenharmony_ci filter_pos[HHF_ARRAYS_CNT - 1] = xorsum ^ tmp_hash; 2888c2ecf20Sopenharmony_ci 2898c2ecf20Sopenharmony_ci pkt_len = qdisc_pkt_len(skb); 2908c2ecf20Sopenharmony_ci min_hhf_val = ~0U; 2918c2ecf20Sopenharmony_ci for (i = 0; i < HHF_ARRAYS_CNT; i++) { 2928c2ecf20Sopenharmony_ci u32 val; 2938c2ecf20Sopenharmony_ci 2948c2ecf20Sopenharmony_ci if (!test_bit(filter_pos[i], q->hhf_valid_bits[i])) { 2958c2ecf20Sopenharmony_ci q->hhf_arrays[i][filter_pos[i]] = 0; 2968c2ecf20Sopenharmony_ci __set_bit(filter_pos[i], q->hhf_valid_bits[i]); 2978c2ecf20Sopenharmony_ci } 2988c2ecf20Sopenharmony_ci 2998c2ecf20Sopenharmony_ci val = q->hhf_arrays[i][filter_pos[i]] + pkt_len; 3008c2ecf20Sopenharmony_ci if (min_hhf_val > val) 3018c2ecf20Sopenharmony_ci min_hhf_val = val; 3028c2ecf20Sopenharmony_ci } 3038c2ecf20Sopenharmony_ci 3048c2ecf20Sopenharmony_ci /* Found a new HH iff all counter values > HH admit threshold. */ 3058c2ecf20Sopenharmony_ci if (min_hhf_val > q->hhf_admit_bytes) { 3068c2ecf20Sopenharmony_ci /* Just captured a new heavy-hitter. */ 3078c2ecf20Sopenharmony_ci flow = alloc_new_hh(&q->hh_flows[flow_pos], q); 3088c2ecf20Sopenharmony_ci if (!flow) /* memory alloc problem */ 3098c2ecf20Sopenharmony_ci return WDRR_BUCKET_FOR_NON_HH; 3108c2ecf20Sopenharmony_ci flow->hash_id = hash; 3118c2ecf20Sopenharmony_ci flow->hit_timestamp = now; 3128c2ecf20Sopenharmony_ci q->hh_flows_total_cnt++; 3138c2ecf20Sopenharmony_ci 3148c2ecf20Sopenharmony_ci /* By returning without updating counters in q->hhf_arrays, 3158c2ecf20Sopenharmony_ci * we implicitly implement "shielding" (see Optimization O1). 3168c2ecf20Sopenharmony_ci */ 3178c2ecf20Sopenharmony_ci return WDRR_BUCKET_FOR_HH; 3188c2ecf20Sopenharmony_ci } 3198c2ecf20Sopenharmony_ci 3208c2ecf20Sopenharmony_ci /* Conservative update of HHF arrays (see Optimization O2). */ 3218c2ecf20Sopenharmony_ci for (i = 0; i < HHF_ARRAYS_CNT; i++) { 3228c2ecf20Sopenharmony_ci if (q->hhf_arrays[i][filter_pos[i]] < min_hhf_val) 3238c2ecf20Sopenharmony_ci q->hhf_arrays[i][filter_pos[i]] = min_hhf_val; 3248c2ecf20Sopenharmony_ci } 3258c2ecf20Sopenharmony_ci return WDRR_BUCKET_FOR_NON_HH; 3268c2ecf20Sopenharmony_ci} 3278c2ecf20Sopenharmony_ci 3288c2ecf20Sopenharmony_ci/* Removes one skb from head of bucket. */ 3298c2ecf20Sopenharmony_cistatic struct sk_buff *dequeue_head(struct wdrr_bucket *bucket) 3308c2ecf20Sopenharmony_ci{ 3318c2ecf20Sopenharmony_ci struct sk_buff *skb = bucket->head; 3328c2ecf20Sopenharmony_ci 3338c2ecf20Sopenharmony_ci bucket->head = skb->next; 3348c2ecf20Sopenharmony_ci skb_mark_not_on_list(skb); 3358c2ecf20Sopenharmony_ci return skb; 3368c2ecf20Sopenharmony_ci} 3378c2ecf20Sopenharmony_ci 3388c2ecf20Sopenharmony_ci/* Tail-adds skb to bucket. */ 3398c2ecf20Sopenharmony_cistatic void bucket_add(struct wdrr_bucket *bucket, struct sk_buff *skb) 3408c2ecf20Sopenharmony_ci{ 3418c2ecf20Sopenharmony_ci if (bucket->head == NULL) 3428c2ecf20Sopenharmony_ci bucket->head = skb; 3438c2ecf20Sopenharmony_ci else 3448c2ecf20Sopenharmony_ci bucket->tail->next = skb; 3458c2ecf20Sopenharmony_ci bucket->tail = skb; 3468c2ecf20Sopenharmony_ci skb->next = NULL; 3478c2ecf20Sopenharmony_ci} 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_cistatic unsigned int hhf_drop(struct Qdisc *sch, struct sk_buff **to_free) 3508c2ecf20Sopenharmony_ci{ 3518c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 3528c2ecf20Sopenharmony_ci struct wdrr_bucket *bucket; 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci /* Always try to drop from heavy-hitters first. */ 3558c2ecf20Sopenharmony_ci bucket = &q->buckets[WDRR_BUCKET_FOR_HH]; 3568c2ecf20Sopenharmony_ci if (!bucket->head) 3578c2ecf20Sopenharmony_ci bucket = &q->buckets[WDRR_BUCKET_FOR_NON_HH]; 3588c2ecf20Sopenharmony_ci 3598c2ecf20Sopenharmony_ci if (bucket->head) { 3608c2ecf20Sopenharmony_ci struct sk_buff *skb = dequeue_head(bucket); 3618c2ecf20Sopenharmony_ci 3628c2ecf20Sopenharmony_ci sch->q.qlen--; 3638c2ecf20Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 3648c2ecf20Sopenharmony_ci qdisc_drop(skb, sch, to_free); 3658c2ecf20Sopenharmony_ci } 3668c2ecf20Sopenharmony_ci 3678c2ecf20Sopenharmony_ci /* Return id of the bucket from which the packet was dropped. */ 3688c2ecf20Sopenharmony_ci return bucket - q->buckets; 3698c2ecf20Sopenharmony_ci} 3708c2ecf20Sopenharmony_ci 3718c2ecf20Sopenharmony_cistatic int hhf_enqueue(struct sk_buff *skb, struct Qdisc *sch, 3728c2ecf20Sopenharmony_ci struct sk_buff **to_free) 3738c2ecf20Sopenharmony_ci{ 3748c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 3758c2ecf20Sopenharmony_ci enum wdrr_bucket_idx idx; 3768c2ecf20Sopenharmony_ci struct wdrr_bucket *bucket; 3778c2ecf20Sopenharmony_ci unsigned int prev_backlog; 3788c2ecf20Sopenharmony_ci 3798c2ecf20Sopenharmony_ci idx = hhf_classify(skb, sch); 3808c2ecf20Sopenharmony_ci 3818c2ecf20Sopenharmony_ci bucket = &q->buckets[idx]; 3828c2ecf20Sopenharmony_ci bucket_add(bucket, skb); 3838c2ecf20Sopenharmony_ci qdisc_qstats_backlog_inc(sch, skb); 3848c2ecf20Sopenharmony_ci 3858c2ecf20Sopenharmony_ci if (list_empty(&bucket->bucketchain)) { 3868c2ecf20Sopenharmony_ci unsigned int weight; 3878c2ecf20Sopenharmony_ci 3888c2ecf20Sopenharmony_ci /* The logic of new_buckets vs. old_buckets is the same as 3898c2ecf20Sopenharmony_ci * new_flows vs. old_flows in the implementation of fq_codel, 3908c2ecf20Sopenharmony_ci * i.e., short bursts of non-HHs should have strict priority. 3918c2ecf20Sopenharmony_ci */ 3928c2ecf20Sopenharmony_ci if (idx == WDRR_BUCKET_FOR_HH) { 3938c2ecf20Sopenharmony_ci /* Always move heavy-hitters to old bucket. */ 3948c2ecf20Sopenharmony_ci weight = 1; 3958c2ecf20Sopenharmony_ci list_add_tail(&bucket->bucketchain, &q->old_buckets); 3968c2ecf20Sopenharmony_ci } else { 3978c2ecf20Sopenharmony_ci weight = q->hhf_non_hh_weight; 3988c2ecf20Sopenharmony_ci list_add_tail(&bucket->bucketchain, &q->new_buckets); 3998c2ecf20Sopenharmony_ci } 4008c2ecf20Sopenharmony_ci bucket->deficit = weight * q->quantum; 4018c2ecf20Sopenharmony_ci } 4028c2ecf20Sopenharmony_ci if (++sch->q.qlen <= sch->limit) 4038c2ecf20Sopenharmony_ci return NET_XMIT_SUCCESS; 4048c2ecf20Sopenharmony_ci 4058c2ecf20Sopenharmony_ci prev_backlog = sch->qstats.backlog; 4068c2ecf20Sopenharmony_ci q->drop_overlimit++; 4078c2ecf20Sopenharmony_ci /* Return Congestion Notification only if we dropped a packet from this 4088c2ecf20Sopenharmony_ci * bucket. 4098c2ecf20Sopenharmony_ci */ 4108c2ecf20Sopenharmony_ci if (hhf_drop(sch, to_free) == idx) 4118c2ecf20Sopenharmony_ci return NET_XMIT_CN; 4128c2ecf20Sopenharmony_ci 4138c2ecf20Sopenharmony_ci /* As we dropped a packet, better let upper stack know this. */ 4148c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, 1, prev_backlog - sch->qstats.backlog); 4158c2ecf20Sopenharmony_ci return NET_XMIT_SUCCESS; 4168c2ecf20Sopenharmony_ci} 4178c2ecf20Sopenharmony_ci 4188c2ecf20Sopenharmony_cistatic struct sk_buff *hhf_dequeue(struct Qdisc *sch) 4198c2ecf20Sopenharmony_ci{ 4208c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 4218c2ecf20Sopenharmony_ci struct sk_buff *skb = NULL; 4228c2ecf20Sopenharmony_ci struct wdrr_bucket *bucket; 4238c2ecf20Sopenharmony_ci struct list_head *head; 4248c2ecf20Sopenharmony_ci 4258c2ecf20Sopenharmony_cibegin: 4268c2ecf20Sopenharmony_ci head = &q->new_buckets; 4278c2ecf20Sopenharmony_ci if (list_empty(head)) { 4288c2ecf20Sopenharmony_ci head = &q->old_buckets; 4298c2ecf20Sopenharmony_ci if (list_empty(head)) 4308c2ecf20Sopenharmony_ci return NULL; 4318c2ecf20Sopenharmony_ci } 4328c2ecf20Sopenharmony_ci bucket = list_first_entry(head, struct wdrr_bucket, bucketchain); 4338c2ecf20Sopenharmony_ci 4348c2ecf20Sopenharmony_ci if (bucket->deficit <= 0) { 4358c2ecf20Sopenharmony_ci int weight = (bucket - q->buckets == WDRR_BUCKET_FOR_HH) ? 4368c2ecf20Sopenharmony_ci 1 : q->hhf_non_hh_weight; 4378c2ecf20Sopenharmony_ci 4388c2ecf20Sopenharmony_ci bucket->deficit += weight * q->quantum; 4398c2ecf20Sopenharmony_ci list_move_tail(&bucket->bucketchain, &q->old_buckets); 4408c2ecf20Sopenharmony_ci goto begin; 4418c2ecf20Sopenharmony_ci } 4428c2ecf20Sopenharmony_ci 4438c2ecf20Sopenharmony_ci if (bucket->head) { 4448c2ecf20Sopenharmony_ci skb = dequeue_head(bucket); 4458c2ecf20Sopenharmony_ci sch->q.qlen--; 4468c2ecf20Sopenharmony_ci qdisc_qstats_backlog_dec(sch, skb); 4478c2ecf20Sopenharmony_ci } 4488c2ecf20Sopenharmony_ci 4498c2ecf20Sopenharmony_ci if (!skb) { 4508c2ecf20Sopenharmony_ci /* Force a pass through old_buckets to prevent starvation. */ 4518c2ecf20Sopenharmony_ci if ((head == &q->new_buckets) && !list_empty(&q->old_buckets)) 4528c2ecf20Sopenharmony_ci list_move_tail(&bucket->bucketchain, &q->old_buckets); 4538c2ecf20Sopenharmony_ci else 4548c2ecf20Sopenharmony_ci list_del_init(&bucket->bucketchain); 4558c2ecf20Sopenharmony_ci goto begin; 4568c2ecf20Sopenharmony_ci } 4578c2ecf20Sopenharmony_ci qdisc_bstats_update(sch, skb); 4588c2ecf20Sopenharmony_ci bucket->deficit -= qdisc_pkt_len(skb); 4598c2ecf20Sopenharmony_ci 4608c2ecf20Sopenharmony_ci return skb; 4618c2ecf20Sopenharmony_ci} 4628c2ecf20Sopenharmony_ci 4638c2ecf20Sopenharmony_cistatic void hhf_reset(struct Qdisc *sch) 4648c2ecf20Sopenharmony_ci{ 4658c2ecf20Sopenharmony_ci struct sk_buff *skb; 4668c2ecf20Sopenharmony_ci 4678c2ecf20Sopenharmony_ci while ((skb = hhf_dequeue(sch)) != NULL) 4688c2ecf20Sopenharmony_ci rtnl_kfree_skbs(skb, skb); 4698c2ecf20Sopenharmony_ci} 4708c2ecf20Sopenharmony_ci 4718c2ecf20Sopenharmony_cistatic void hhf_destroy(struct Qdisc *sch) 4728c2ecf20Sopenharmony_ci{ 4738c2ecf20Sopenharmony_ci int i; 4748c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 4758c2ecf20Sopenharmony_ci 4768c2ecf20Sopenharmony_ci for (i = 0; i < HHF_ARRAYS_CNT; i++) { 4778c2ecf20Sopenharmony_ci kvfree(q->hhf_arrays[i]); 4788c2ecf20Sopenharmony_ci kvfree(q->hhf_valid_bits[i]); 4798c2ecf20Sopenharmony_ci } 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci if (!q->hh_flows) 4828c2ecf20Sopenharmony_ci return; 4838c2ecf20Sopenharmony_ci 4848c2ecf20Sopenharmony_ci for (i = 0; i < HH_FLOWS_CNT; i++) { 4858c2ecf20Sopenharmony_ci struct hh_flow_state *flow, *next; 4868c2ecf20Sopenharmony_ci struct list_head *head = &q->hh_flows[i]; 4878c2ecf20Sopenharmony_ci 4888c2ecf20Sopenharmony_ci if (list_empty(head)) 4898c2ecf20Sopenharmony_ci continue; 4908c2ecf20Sopenharmony_ci list_for_each_entry_safe(flow, next, head, flowchain) { 4918c2ecf20Sopenharmony_ci list_del(&flow->flowchain); 4928c2ecf20Sopenharmony_ci kfree(flow); 4938c2ecf20Sopenharmony_ci } 4948c2ecf20Sopenharmony_ci } 4958c2ecf20Sopenharmony_ci kvfree(q->hh_flows); 4968c2ecf20Sopenharmony_ci} 4978c2ecf20Sopenharmony_ci 4988c2ecf20Sopenharmony_cistatic const struct nla_policy hhf_policy[TCA_HHF_MAX + 1] = { 4998c2ecf20Sopenharmony_ci [TCA_HHF_BACKLOG_LIMIT] = { .type = NLA_U32 }, 5008c2ecf20Sopenharmony_ci [TCA_HHF_QUANTUM] = { .type = NLA_U32 }, 5018c2ecf20Sopenharmony_ci [TCA_HHF_HH_FLOWS_LIMIT] = { .type = NLA_U32 }, 5028c2ecf20Sopenharmony_ci [TCA_HHF_RESET_TIMEOUT] = { .type = NLA_U32 }, 5038c2ecf20Sopenharmony_ci [TCA_HHF_ADMIT_BYTES] = { .type = NLA_U32 }, 5048c2ecf20Sopenharmony_ci [TCA_HHF_EVICT_TIMEOUT] = { .type = NLA_U32 }, 5058c2ecf20Sopenharmony_ci [TCA_HHF_NON_HH_WEIGHT] = { .type = NLA_U32 }, 5068c2ecf20Sopenharmony_ci}; 5078c2ecf20Sopenharmony_ci 5088c2ecf20Sopenharmony_cistatic int hhf_change(struct Qdisc *sch, struct nlattr *opt, 5098c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 5108c2ecf20Sopenharmony_ci{ 5118c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 5128c2ecf20Sopenharmony_ci struct nlattr *tb[TCA_HHF_MAX + 1]; 5138c2ecf20Sopenharmony_ci unsigned int qlen, prev_backlog; 5148c2ecf20Sopenharmony_ci int err; 5158c2ecf20Sopenharmony_ci u64 non_hh_quantum; 5168c2ecf20Sopenharmony_ci u32 new_quantum = q->quantum; 5178c2ecf20Sopenharmony_ci u32 new_hhf_non_hh_weight = q->hhf_non_hh_weight; 5188c2ecf20Sopenharmony_ci 5198c2ecf20Sopenharmony_ci if (!opt) 5208c2ecf20Sopenharmony_ci return -EINVAL; 5218c2ecf20Sopenharmony_ci 5228c2ecf20Sopenharmony_ci err = nla_parse_nested_deprecated(tb, TCA_HHF_MAX, opt, hhf_policy, 5238c2ecf20Sopenharmony_ci NULL); 5248c2ecf20Sopenharmony_ci if (err < 0) 5258c2ecf20Sopenharmony_ci return err; 5268c2ecf20Sopenharmony_ci 5278c2ecf20Sopenharmony_ci if (tb[TCA_HHF_QUANTUM]) 5288c2ecf20Sopenharmony_ci new_quantum = nla_get_u32(tb[TCA_HHF_QUANTUM]); 5298c2ecf20Sopenharmony_ci 5308c2ecf20Sopenharmony_ci if (tb[TCA_HHF_NON_HH_WEIGHT]) 5318c2ecf20Sopenharmony_ci new_hhf_non_hh_weight = nla_get_u32(tb[TCA_HHF_NON_HH_WEIGHT]); 5328c2ecf20Sopenharmony_ci 5338c2ecf20Sopenharmony_ci non_hh_quantum = (u64)new_quantum * new_hhf_non_hh_weight; 5348c2ecf20Sopenharmony_ci if (non_hh_quantum == 0 || non_hh_quantum > INT_MAX) 5358c2ecf20Sopenharmony_ci return -EINVAL; 5368c2ecf20Sopenharmony_ci 5378c2ecf20Sopenharmony_ci sch_tree_lock(sch); 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci if (tb[TCA_HHF_BACKLOG_LIMIT]) 5408c2ecf20Sopenharmony_ci sch->limit = nla_get_u32(tb[TCA_HHF_BACKLOG_LIMIT]); 5418c2ecf20Sopenharmony_ci 5428c2ecf20Sopenharmony_ci q->quantum = new_quantum; 5438c2ecf20Sopenharmony_ci q->hhf_non_hh_weight = new_hhf_non_hh_weight; 5448c2ecf20Sopenharmony_ci 5458c2ecf20Sopenharmony_ci if (tb[TCA_HHF_HH_FLOWS_LIMIT]) 5468c2ecf20Sopenharmony_ci q->hh_flows_limit = nla_get_u32(tb[TCA_HHF_HH_FLOWS_LIMIT]); 5478c2ecf20Sopenharmony_ci 5488c2ecf20Sopenharmony_ci if (tb[TCA_HHF_RESET_TIMEOUT]) { 5498c2ecf20Sopenharmony_ci u32 us = nla_get_u32(tb[TCA_HHF_RESET_TIMEOUT]); 5508c2ecf20Sopenharmony_ci 5518c2ecf20Sopenharmony_ci q->hhf_reset_timeout = usecs_to_jiffies(us); 5528c2ecf20Sopenharmony_ci } 5538c2ecf20Sopenharmony_ci 5548c2ecf20Sopenharmony_ci if (tb[TCA_HHF_ADMIT_BYTES]) 5558c2ecf20Sopenharmony_ci q->hhf_admit_bytes = nla_get_u32(tb[TCA_HHF_ADMIT_BYTES]); 5568c2ecf20Sopenharmony_ci 5578c2ecf20Sopenharmony_ci if (tb[TCA_HHF_EVICT_TIMEOUT]) { 5588c2ecf20Sopenharmony_ci u32 us = nla_get_u32(tb[TCA_HHF_EVICT_TIMEOUT]); 5598c2ecf20Sopenharmony_ci 5608c2ecf20Sopenharmony_ci q->hhf_evict_timeout = usecs_to_jiffies(us); 5618c2ecf20Sopenharmony_ci } 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ci qlen = sch->q.qlen; 5648c2ecf20Sopenharmony_ci prev_backlog = sch->qstats.backlog; 5658c2ecf20Sopenharmony_ci while (sch->q.qlen > sch->limit) { 5668c2ecf20Sopenharmony_ci struct sk_buff *skb = hhf_dequeue(sch); 5678c2ecf20Sopenharmony_ci 5688c2ecf20Sopenharmony_ci rtnl_kfree_skbs(skb, skb); 5698c2ecf20Sopenharmony_ci } 5708c2ecf20Sopenharmony_ci qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, 5718c2ecf20Sopenharmony_ci prev_backlog - sch->qstats.backlog); 5728c2ecf20Sopenharmony_ci 5738c2ecf20Sopenharmony_ci sch_tree_unlock(sch); 5748c2ecf20Sopenharmony_ci return 0; 5758c2ecf20Sopenharmony_ci} 5768c2ecf20Sopenharmony_ci 5778c2ecf20Sopenharmony_cistatic int hhf_init(struct Qdisc *sch, struct nlattr *opt, 5788c2ecf20Sopenharmony_ci struct netlink_ext_ack *extack) 5798c2ecf20Sopenharmony_ci{ 5808c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 5818c2ecf20Sopenharmony_ci int i; 5828c2ecf20Sopenharmony_ci 5838c2ecf20Sopenharmony_ci sch->limit = 1000; 5848c2ecf20Sopenharmony_ci q->quantum = psched_mtu(qdisc_dev(sch)); 5858c2ecf20Sopenharmony_ci get_random_bytes(&q->perturbation, sizeof(q->perturbation)); 5868c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->new_buckets); 5878c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->old_buckets); 5888c2ecf20Sopenharmony_ci 5898c2ecf20Sopenharmony_ci /* Configurable HHF parameters */ 5908c2ecf20Sopenharmony_ci q->hhf_reset_timeout = HZ / 25; /* 40 ms */ 5918c2ecf20Sopenharmony_ci q->hhf_admit_bytes = 131072; /* 128 KB */ 5928c2ecf20Sopenharmony_ci q->hhf_evict_timeout = HZ; /* 1 sec */ 5938c2ecf20Sopenharmony_ci q->hhf_non_hh_weight = 2; 5948c2ecf20Sopenharmony_ci 5958c2ecf20Sopenharmony_ci if (opt) { 5968c2ecf20Sopenharmony_ci int err = hhf_change(sch, opt, extack); 5978c2ecf20Sopenharmony_ci 5988c2ecf20Sopenharmony_ci if (err) 5998c2ecf20Sopenharmony_ci return err; 6008c2ecf20Sopenharmony_ci } 6018c2ecf20Sopenharmony_ci 6028c2ecf20Sopenharmony_ci if (!q->hh_flows) { 6038c2ecf20Sopenharmony_ci /* Initialize heavy-hitter flow table. */ 6048c2ecf20Sopenharmony_ci q->hh_flows = kvcalloc(HH_FLOWS_CNT, sizeof(struct list_head), 6058c2ecf20Sopenharmony_ci GFP_KERNEL); 6068c2ecf20Sopenharmony_ci if (!q->hh_flows) 6078c2ecf20Sopenharmony_ci return -ENOMEM; 6088c2ecf20Sopenharmony_ci for (i = 0; i < HH_FLOWS_CNT; i++) 6098c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&q->hh_flows[i]); 6108c2ecf20Sopenharmony_ci 6118c2ecf20Sopenharmony_ci /* Cap max active HHs at twice len of hh_flows table. */ 6128c2ecf20Sopenharmony_ci q->hh_flows_limit = 2 * HH_FLOWS_CNT; 6138c2ecf20Sopenharmony_ci q->hh_flows_overlimit = 0; 6148c2ecf20Sopenharmony_ci q->hh_flows_total_cnt = 0; 6158c2ecf20Sopenharmony_ci q->hh_flows_current_cnt = 0; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci /* Initialize heavy-hitter filter arrays. */ 6188c2ecf20Sopenharmony_ci for (i = 0; i < HHF_ARRAYS_CNT; i++) { 6198c2ecf20Sopenharmony_ci q->hhf_arrays[i] = kvcalloc(HHF_ARRAYS_LEN, 6208c2ecf20Sopenharmony_ci sizeof(u32), 6218c2ecf20Sopenharmony_ci GFP_KERNEL); 6228c2ecf20Sopenharmony_ci if (!q->hhf_arrays[i]) { 6238c2ecf20Sopenharmony_ci /* Note: hhf_destroy() will be called 6248c2ecf20Sopenharmony_ci * by our caller. 6258c2ecf20Sopenharmony_ci */ 6268c2ecf20Sopenharmony_ci return -ENOMEM; 6278c2ecf20Sopenharmony_ci } 6288c2ecf20Sopenharmony_ci } 6298c2ecf20Sopenharmony_ci q->hhf_arrays_reset_timestamp = hhf_time_stamp(); 6308c2ecf20Sopenharmony_ci 6318c2ecf20Sopenharmony_ci /* Initialize valid bits of heavy-hitter filter arrays. */ 6328c2ecf20Sopenharmony_ci for (i = 0; i < HHF_ARRAYS_CNT; i++) { 6338c2ecf20Sopenharmony_ci q->hhf_valid_bits[i] = kvzalloc(HHF_ARRAYS_LEN / 6348c2ecf20Sopenharmony_ci BITS_PER_BYTE, GFP_KERNEL); 6358c2ecf20Sopenharmony_ci if (!q->hhf_valid_bits[i]) { 6368c2ecf20Sopenharmony_ci /* Note: hhf_destroy() will be called 6378c2ecf20Sopenharmony_ci * by our caller. 6388c2ecf20Sopenharmony_ci */ 6398c2ecf20Sopenharmony_ci return -ENOMEM; 6408c2ecf20Sopenharmony_ci } 6418c2ecf20Sopenharmony_ci } 6428c2ecf20Sopenharmony_ci 6438c2ecf20Sopenharmony_ci /* Initialize Weighted DRR buckets. */ 6448c2ecf20Sopenharmony_ci for (i = 0; i < WDRR_BUCKET_CNT; i++) { 6458c2ecf20Sopenharmony_ci struct wdrr_bucket *bucket = q->buckets + i; 6468c2ecf20Sopenharmony_ci 6478c2ecf20Sopenharmony_ci INIT_LIST_HEAD(&bucket->bucketchain); 6488c2ecf20Sopenharmony_ci } 6498c2ecf20Sopenharmony_ci } 6508c2ecf20Sopenharmony_ci 6518c2ecf20Sopenharmony_ci return 0; 6528c2ecf20Sopenharmony_ci} 6538c2ecf20Sopenharmony_ci 6548c2ecf20Sopenharmony_cistatic int hhf_dump(struct Qdisc *sch, struct sk_buff *skb) 6558c2ecf20Sopenharmony_ci{ 6568c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 6578c2ecf20Sopenharmony_ci struct nlattr *opts; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci opts = nla_nest_start_noflag(skb, TCA_OPTIONS); 6608c2ecf20Sopenharmony_ci if (opts == NULL) 6618c2ecf20Sopenharmony_ci goto nla_put_failure; 6628c2ecf20Sopenharmony_ci 6638c2ecf20Sopenharmony_ci if (nla_put_u32(skb, TCA_HHF_BACKLOG_LIMIT, sch->limit) || 6648c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_HHF_QUANTUM, q->quantum) || 6658c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_HHF_HH_FLOWS_LIMIT, q->hh_flows_limit) || 6668c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_HHF_RESET_TIMEOUT, 6678c2ecf20Sopenharmony_ci jiffies_to_usecs(q->hhf_reset_timeout)) || 6688c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_HHF_ADMIT_BYTES, q->hhf_admit_bytes) || 6698c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_HHF_EVICT_TIMEOUT, 6708c2ecf20Sopenharmony_ci jiffies_to_usecs(q->hhf_evict_timeout)) || 6718c2ecf20Sopenharmony_ci nla_put_u32(skb, TCA_HHF_NON_HH_WEIGHT, q->hhf_non_hh_weight)) 6728c2ecf20Sopenharmony_ci goto nla_put_failure; 6738c2ecf20Sopenharmony_ci 6748c2ecf20Sopenharmony_ci return nla_nest_end(skb, opts); 6758c2ecf20Sopenharmony_ci 6768c2ecf20Sopenharmony_cinla_put_failure: 6778c2ecf20Sopenharmony_ci return -1; 6788c2ecf20Sopenharmony_ci} 6798c2ecf20Sopenharmony_ci 6808c2ecf20Sopenharmony_cistatic int hhf_dump_stats(struct Qdisc *sch, struct gnet_dump *d) 6818c2ecf20Sopenharmony_ci{ 6828c2ecf20Sopenharmony_ci struct hhf_sched_data *q = qdisc_priv(sch); 6838c2ecf20Sopenharmony_ci struct tc_hhf_xstats st = { 6848c2ecf20Sopenharmony_ci .drop_overlimit = q->drop_overlimit, 6858c2ecf20Sopenharmony_ci .hh_overlimit = q->hh_flows_overlimit, 6868c2ecf20Sopenharmony_ci .hh_tot_count = q->hh_flows_total_cnt, 6878c2ecf20Sopenharmony_ci .hh_cur_count = q->hh_flows_current_cnt, 6888c2ecf20Sopenharmony_ci }; 6898c2ecf20Sopenharmony_ci 6908c2ecf20Sopenharmony_ci return gnet_stats_copy_app(d, &st, sizeof(st)); 6918c2ecf20Sopenharmony_ci} 6928c2ecf20Sopenharmony_ci 6938c2ecf20Sopenharmony_cistatic struct Qdisc_ops hhf_qdisc_ops __read_mostly = { 6948c2ecf20Sopenharmony_ci .id = "hhf", 6958c2ecf20Sopenharmony_ci .priv_size = sizeof(struct hhf_sched_data), 6968c2ecf20Sopenharmony_ci 6978c2ecf20Sopenharmony_ci .enqueue = hhf_enqueue, 6988c2ecf20Sopenharmony_ci .dequeue = hhf_dequeue, 6998c2ecf20Sopenharmony_ci .peek = qdisc_peek_dequeued, 7008c2ecf20Sopenharmony_ci .init = hhf_init, 7018c2ecf20Sopenharmony_ci .reset = hhf_reset, 7028c2ecf20Sopenharmony_ci .destroy = hhf_destroy, 7038c2ecf20Sopenharmony_ci .change = hhf_change, 7048c2ecf20Sopenharmony_ci .dump = hhf_dump, 7058c2ecf20Sopenharmony_ci .dump_stats = hhf_dump_stats, 7068c2ecf20Sopenharmony_ci .owner = THIS_MODULE, 7078c2ecf20Sopenharmony_ci}; 7088c2ecf20Sopenharmony_ci 7098c2ecf20Sopenharmony_cistatic int __init hhf_module_init(void) 7108c2ecf20Sopenharmony_ci{ 7118c2ecf20Sopenharmony_ci return register_qdisc(&hhf_qdisc_ops); 7128c2ecf20Sopenharmony_ci} 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_cistatic void __exit hhf_module_exit(void) 7158c2ecf20Sopenharmony_ci{ 7168c2ecf20Sopenharmony_ci unregister_qdisc(&hhf_qdisc_ops); 7178c2ecf20Sopenharmony_ci} 7188c2ecf20Sopenharmony_ci 7198c2ecf20Sopenharmony_cimodule_init(hhf_module_init) 7208c2ecf20Sopenharmony_cimodule_exit(hhf_module_exit) 7218c2ecf20Sopenharmony_ciMODULE_AUTHOR("Terry Lam"); 7228c2ecf20Sopenharmony_ciMODULE_AUTHOR("Nandita Dukkipati"); 7238c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL"); 7248c2ecf20Sopenharmony_ciMODULE_DESCRIPTION("Heavy-Hitter Filter (HHF)"); 725