1/* SPDX-License-Identifier: GPL-2.0 2 * 3 * Copyright (c) 2019 Facebook 4 * 5 * This program is free software; you can redistribute it and/or 6 * modify it under the terms of version 2 of the GNU General Public 7 * License as published by the Free Software Foundation. 8 * 9 * Include file for sample Host Bandwidth Manager (HBM) BPF programs 10 */ 11#define KBUILD_MODNAME "foo" 12#include <stddef.h> 13#include <stdbool.h> 14#include <uapi/linux/bpf.h> 15#include <uapi/linux/if_ether.h> 16#include <uapi/linux/if_packet.h> 17#include <uapi/linux/ip.h> 18#include <uapi/linux/ipv6.h> 19#include <uapi/linux/in.h> 20#include <uapi/linux/tcp.h> 21#include <uapi/linux/filter.h> 22#include <uapi/linux/pkt_cls.h> 23#include <net/ipv6.h> 24#include <net/inet_ecn.h> 25#include <bpf/bpf_endian.h> 26#include <bpf/bpf_helpers.h> 27#include "hbm.h" 28 29#define DROP_PKT 0 30#define ALLOW_PKT 1 31#define TCP_ECN_OK 1 32#define CWR 2 33 34#ifndef HBM_DEBUG // Define HBM_DEBUG to enable debugging 35#undef bpf_printk 36#define bpf_printk(fmt, ...) 37#endif 38 39#define INITIAL_CREDIT_PACKETS 100 40#define MAX_BYTES_PER_PACKET 1500 41#define MARK_THRESH (40 * MAX_BYTES_PER_PACKET) 42#define DROP_THRESH (80 * 5 * MAX_BYTES_PER_PACKET) 43#define LARGE_PKT_DROP_THRESH (DROP_THRESH - (15 * MAX_BYTES_PER_PACKET)) 44#define MARK_REGION_SIZE (LARGE_PKT_DROP_THRESH - MARK_THRESH) 45#define LARGE_PKT_THRESH 120 46#define MAX_CREDIT (100 * MAX_BYTES_PER_PACKET) 47#define INIT_CREDIT (INITIAL_CREDIT_PACKETS * MAX_BYTES_PER_PACKET) 48 49// Time base accounting for fq's EDT 50#define BURST_SIZE_NS 100000 // 100us 51#define MARK_THRESH_NS 50000 // 50us 52#define DROP_THRESH_NS 500000 // 500us 53// Reserve 20us of queuing for small packets (less than 120 bytes) 54#define LARGE_PKT_DROP_THRESH_NS (DROP_THRESH_NS - 20000) 55#define MARK_REGION_SIZE_NS (LARGE_PKT_DROP_THRESH_NS - MARK_THRESH_NS) 56 57// rate in bytes per ns << 20 58#define CREDIT_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) 59#define BYTES_PER_NS(delta, rate) ((((u64)(delta)) * (rate)) >> 20) 60#define BYTES_TO_NS(bytes, rate) div64_u64(((u64)(bytes)) << 20, (u64)(rate)) 61 62struct { 63 __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); 64 __type(key, struct bpf_cgroup_storage_key); 65 __type(value, struct hbm_vqueue); 66} queue_state SEC(".maps"); 67 68struct { 69 __uint(type, BPF_MAP_TYPE_ARRAY); 70 __uint(max_entries, 1); 71 __type(key, u32); 72 __type(value, struct hvm_queue_stats); 73} queue_stats SEC(".maps"); 74 75struct hbm_pkt_info { 76 int cwnd; 77 int rtt; 78 int packets_out; 79 bool is_ip; 80 bool is_tcp; 81 short ecn; 82}; 83 84static int get_tcp_info(struct __sk_buff *skb, struct hbm_pkt_info *pkti) 85{ 86 struct bpf_sock *sk; 87 struct bpf_tcp_sock *tp; 88 89 sk = skb->sk; 90 if (sk) { 91 sk = bpf_sk_fullsock(sk); 92 if (sk) { 93 if (sk->protocol == IPPROTO_TCP) { 94 tp = bpf_tcp_sock(sk); 95 if (tp) { 96 pkti->cwnd = tp->snd_cwnd; 97 pkti->rtt = tp->srtt_us >> 3; 98 pkti->packets_out = tp->packets_out; 99 return 0; 100 } 101 } 102 } 103 } 104 pkti->cwnd = 0; 105 pkti->rtt = 0; 106 pkti->packets_out = 0; 107 return 1; 108} 109 110static void hbm_get_pkt_info(struct __sk_buff *skb, 111 struct hbm_pkt_info *pkti) 112{ 113 struct iphdr iph; 114 struct ipv6hdr *ip6h; 115 116 pkti->cwnd = 0; 117 pkti->rtt = 0; 118 bpf_skb_load_bytes(skb, 0, &iph, 12); 119 if (iph.version == 6) { 120 ip6h = (struct ipv6hdr *)&iph; 121 pkti->is_ip = true; 122 pkti->is_tcp = (ip6h->nexthdr == 6); 123 pkti->ecn = (ip6h->flow_lbl[0] >> 4) & INET_ECN_MASK; 124 } else if (iph.version == 4) { 125 pkti->is_ip = true; 126 pkti->is_tcp = (iph.protocol == 6); 127 pkti->ecn = iph.tos & INET_ECN_MASK; 128 } else { 129 pkti->is_ip = false; 130 pkti->is_tcp = false; 131 pkti->ecn = 0; 132 } 133 if (pkti->is_tcp) 134 get_tcp_info(skb, pkti); 135} 136 137static __always_inline void hbm_init_vqueue(struct hbm_vqueue *qdp, int rate) 138{ 139 bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); 140 qdp->lasttime = bpf_ktime_get_ns(); 141 qdp->credit = INIT_CREDIT; 142 qdp->rate = rate * 128; 143} 144 145static __always_inline void hbm_init_edt_vqueue(struct hbm_vqueue *qdp, 146 int rate) 147{ 148 unsigned long long curtime; 149 150 curtime = bpf_ktime_get_ns(); 151 bpf_printk("Initializing queue_state, rate:%d\n", rate * 128); 152 qdp->lasttime = curtime - BURST_SIZE_NS; // support initial burst 153 qdp->credit = 0; // not used 154 qdp->rate = rate * 128; 155} 156 157static __always_inline void hbm_update_stats(struct hbm_queue_stats *qsp, 158 int len, 159 unsigned long long curtime, 160 bool congestion_flag, 161 bool drop_flag, 162 bool cwr_flag, 163 bool ecn_ce_flag, 164 struct hbm_pkt_info *pkti, 165 int credit) 166{ 167 int rv = ALLOW_PKT; 168 169 if (qsp != NULL) { 170 // Following is needed for work conserving 171 __sync_add_and_fetch(&(qsp->bytes_total), len); 172 if (qsp->stats) { 173 // Optionally update statistics 174 if (qsp->firstPacketTime == 0) 175 qsp->firstPacketTime = curtime; 176 qsp->lastPacketTime = curtime; 177 __sync_add_and_fetch(&(qsp->pkts_total), 1); 178 if (congestion_flag) { 179 __sync_add_and_fetch(&(qsp->pkts_marked), 1); 180 __sync_add_and_fetch(&(qsp->bytes_marked), len); 181 } 182 if (drop_flag) { 183 __sync_add_and_fetch(&(qsp->pkts_dropped), 1); 184 __sync_add_and_fetch(&(qsp->bytes_dropped), 185 len); 186 } 187 if (ecn_ce_flag) 188 __sync_add_and_fetch(&(qsp->pkts_ecn_ce), 1); 189 if (pkti->cwnd) { 190 __sync_add_and_fetch(&(qsp->sum_cwnd), 191 pkti->cwnd); 192 __sync_add_and_fetch(&(qsp->sum_cwnd_cnt), 1); 193 } 194 if (pkti->rtt) 195 __sync_add_and_fetch(&(qsp->sum_rtt), 196 pkti->rtt); 197 __sync_add_and_fetch(&(qsp->sum_credit), credit); 198 199 if (drop_flag) 200 rv = DROP_PKT; 201 if (cwr_flag) 202 rv |= 2; 203 if (rv == DROP_PKT) 204 __sync_add_and_fetch(&(qsp->returnValCount[0]), 205 1); 206 else if (rv == ALLOW_PKT) 207 __sync_add_and_fetch(&(qsp->returnValCount[1]), 208 1); 209 else if (rv == 2) 210 __sync_add_and_fetch(&(qsp->returnValCount[2]), 211 1); 212 else if (rv == 3) 213 __sync_add_and_fetch(&(qsp->returnValCount[3]), 214 1); 215 } 216 } 217} 218