162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */ 262306a36Sopenharmony_ci#ifndef _INET_ECN_H_ 362306a36Sopenharmony_ci#define _INET_ECN_H_ 462306a36Sopenharmony_ci 562306a36Sopenharmony_ci#include <linux/ip.h> 662306a36Sopenharmony_ci#include <linux/skbuff.h> 762306a36Sopenharmony_ci#include <linux/if_vlan.h> 862306a36Sopenharmony_ci 962306a36Sopenharmony_ci#include <net/inet_sock.h> 1062306a36Sopenharmony_ci#include <net/dsfield.h> 1162306a36Sopenharmony_ci#include <net/checksum.h> 1262306a36Sopenharmony_ci 1362306a36Sopenharmony_cienum { 1462306a36Sopenharmony_ci INET_ECN_NOT_ECT = 0, 1562306a36Sopenharmony_ci INET_ECN_ECT_1 = 1, 1662306a36Sopenharmony_ci INET_ECN_ECT_0 = 2, 1762306a36Sopenharmony_ci INET_ECN_CE = 3, 1862306a36Sopenharmony_ci INET_ECN_MASK = 3, 1962306a36Sopenharmony_ci}; 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_ciextern int sysctl_tunnel_ecn_log; 2262306a36Sopenharmony_ci 2362306a36Sopenharmony_cistatic inline int INET_ECN_is_ce(__u8 dsfield) 2462306a36Sopenharmony_ci{ 2562306a36Sopenharmony_ci return (dsfield & INET_ECN_MASK) == INET_ECN_CE; 2662306a36Sopenharmony_ci} 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_cistatic inline int INET_ECN_is_not_ect(__u8 dsfield) 2962306a36Sopenharmony_ci{ 3062306a36Sopenharmony_ci return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT; 3162306a36Sopenharmony_ci} 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_cistatic inline int INET_ECN_is_capable(__u8 dsfield) 3462306a36Sopenharmony_ci{ 3562306a36Sopenharmony_ci return dsfield & INET_ECN_ECT_0; 3662306a36Sopenharmony_ci} 3762306a36Sopenharmony_ci 3862306a36Sopenharmony_ci/* 3962306a36Sopenharmony_ci * RFC 3168 9.1.1 4062306a36Sopenharmony_ci * The full-functionality option for ECN encapsulation is to copy the 4162306a36Sopenharmony_ci * ECN codepoint of the inside header to the outside header on 4262306a36Sopenharmony_ci * encapsulation if the inside header is not-ECT or ECT, and to set the 4362306a36Sopenharmony_ci * ECN codepoint of the outside header to ECT(0) if the ECN codepoint of 4462306a36Sopenharmony_ci * the inside header is CE. 4562306a36Sopenharmony_ci */ 4662306a36Sopenharmony_cistatic inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) 4762306a36Sopenharmony_ci{ 4862306a36Sopenharmony_ci outer &= ~INET_ECN_MASK; 4962306a36Sopenharmony_ci outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) : 5062306a36Sopenharmony_ci INET_ECN_ECT_0; 5162306a36Sopenharmony_ci return outer; 5262306a36Sopenharmony_ci} 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistatic inline void INET_ECN_xmit(struct sock *sk) 5562306a36Sopenharmony_ci{ 5662306a36Sopenharmony_ci inet_sk(sk)->tos |= INET_ECN_ECT_0; 5762306a36Sopenharmony_ci if (inet6_sk(sk) != NULL) 5862306a36Sopenharmony_ci inet6_sk(sk)->tclass |= INET_ECN_ECT_0; 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic inline void INET_ECN_dontxmit(struct sock *sk) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci inet_sk(sk)->tos &= ~INET_ECN_MASK; 6462306a36Sopenharmony_ci if (inet6_sk(sk) != NULL) 6562306a36Sopenharmony_ci inet6_sk(sk)->tclass &= ~INET_ECN_MASK; 6662306a36Sopenharmony_ci} 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci#define IP6_ECN_flow_init(label) do { \ 6962306a36Sopenharmony_ci (label) &= ~htonl(INET_ECN_MASK << 20); \ 7062306a36Sopenharmony_ci } while (0) 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci#define IP6_ECN_flow_xmit(sk, label) do { \ 7362306a36Sopenharmony_ci if (INET_ECN_is_capable(inet6_sk(sk)->tclass)) \ 7462306a36Sopenharmony_ci (label) |= htonl(INET_ECN_ECT_0 << 20); \ 7562306a36Sopenharmony_ci } while (0) 7662306a36Sopenharmony_ci 7762306a36Sopenharmony_cistatic inline int IP_ECN_set_ce(struct iphdr *iph) 7862306a36Sopenharmony_ci{ 7962306a36Sopenharmony_ci u32 ecn = (iph->tos + 1) & INET_ECN_MASK; 8062306a36Sopenharmony_ci __be16 check_add; 8162306a36Sopenharmony_ci 8262306a36Sopenharmony_ci /* 8362306a36Sopenharmony_ci * After the last operation we have (in binary): 8462306a36Sopenharmony_ci * INET_ECN_NOT_ECT => 01 8562306a36Sopenharmony_ci * INET_ECN_ECT_1 => 10 8662306a36Sopenharmony_ci * INET_ECN_ECT_0 => 11 8762306a36Sopenharmony_ci * INET_ECN_CE => 00 8862306a36Sopenharmony_ci */ 8962306a36Sopenharmony_ci if (!(ecn & 2)) 9062306a36Sopenharmony_ci return !ecn; 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci /* 9362306a36Sopenharmony_ci * The following gives us: 9462306a36Sopenharmony_ci * INET_ECN_ECT_1 => check += htons(0xFFFD) 9562306a36Sopenharmony_ci * INET_ECN_ECT_0 => check += htons(0xFFFE) 9662306a36Sopenharmony_ci */ 9762306a36Sopenharmony_ci check_add = (__force __be16)((__force u16)htons(0xFFFB) + 9862306a36Sopenharmony_ci (__force u16)htons(ecn)); 9962306a36Sopenharmony_ci 10062306a36Sopenharmony_ci iph->check = csum16_add(iph->check, check_add); 10162306a36Sopenharmony_ci iph->tos |= INET_ECN_CE; 10262306a36Sopenharmony_ci return 1; 10362306a36Sopenharmony_ci} 10462306a36Sopenharmony_ci 10562306a36Sopenharmony_cistatic inline int IP_ECN_set_ect1(struct iphdr *iph) 10662306a36Sopenharmony_ci{ 10762306a36Sopenharmony_ci if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0) 10862306a36Sopenharmony_ci return 0; 10962306a36Sopenharmony_ci 11062306a36Sopenharmony_ci iph->check = csum16_add(iph->check, htons(0x1)); 11162306a36Sopenharmony_ci iph->tos ^= INET_ECN_MASK; 11262306a36Sopenharmony_ci return 1; 11362306a36Sopenharmony_ci} 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_cistatic inline void IP_ECN_clear(struct iphdr *iph) 11662306a36Sopenharmony_ci{ 11762306a36Sopenharmony_ci iph->tos &= ~INET_ECN_MASK; 11862306a36Sopenharmony_ci} 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_cistatic inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner) 12162306a36Sopenharmony_ci{ 12262306a36Sopenharmony_ci dscp &= ~INET_ECN_MASK; 12362306a36Sopenharmony_ci ipv4_change_dsfield(inner, INET_ECN_MASK, dscp); 12462306a36Sopenharmony_ci} 12562306a36Sopenharmony_ci 12662306a36Sopenharmony_cistruct ipv6hdr; 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ci/* Note: 12962306a36Sopenharmony_ci * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE, 13062306a36Sopenharmony_ci * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE 13162306a36Sopenharmony_ci * In IPv6 case, no checksum compensates the change in IPv6 header, 13262306a36Sopenharmony_ci * so we have to update skb->csum. 13362306a36Sopenharmony_ci */ 13462306a36Sopenharmony_cistatic inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph) 13562306a36Sopenharmony_ci{ 13662306a36Sopenharmony_ci __be32 from, to; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph))) 13962306a36Sopenharmony_ci return 0; 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci from = *(__be32 *)iph; 14262306a36Sopenharmony_ci to = from | htonl(INET_ECN_CE << 20); 14362306a36Sopenharmony_ci *(__be32 *)iph = to; 14462306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_COMPLETE) 14562306a36Sopenharmony_ci skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), 14662306a36Sopenharmony_ci (__force __wsum)to); 14762306a36Sopenharmony_ci return 1; 14862306a36Sopenharmony_ci} 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_cistatic inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph) 15162306a36Sopenharmony_ci{ 15262306a36Sopenharmony_ci __be32 from, to; 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0) 15562306a36Sopenharmony_ci return 0; 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci from = *(__be32 *)iph; 15862306a36Sopenharmony_ci to = from ^ htonl(INET_ECN_MASK << 20); 15962306a36Sopenharmony_ci *(__be32 *)iph = to; 16062306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_COMPLETE) 16162306a36Sopenharmony_ci skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from), 16262306a36Sopenharmony_ci (__force __wsum)to); 16362306a36Sopenharmony_ci return 1; 16462306a36Sopenharmony_ci} 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_cistatic inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner) 16762306a36Sopenharmony_ci{ 16862306a36Sopenharmony_ci dscp &= ~INET_ECN_MASK; 16962306a36Sopenharmony_ci ipv6_change_dsfield(inner, INET_ECN_MASK, dscp); 17062306a36Sopenharmony_ci} 17162306a36Sopenharmony_ci 17262306a36Sopenharmony_cistatic inline int INET_ECN_set_ce(struct sk_buff *skb) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci switch (skb_protocol(skb, true)) { 17562306a36Sopenharmony_ci case cpu_to_be16(ETH_P_IP): 17662306a36Sopenharmony_ci if (skb_network_header(skb) + sizeof(struct iphdr) <= 17762306a36Sopenharmony_ci skb_tail_pointer(skb)) 17862306a36Sopenharmony_ci return IP_ECN_set_ce(ip_hdr(skb)); 17962306a36Sopenharmony_ci break; 18062306a36Sopenharmony_ci 18162306a36Sopenharmony_ci case cpu_to_be16(ETH_P_IPV6): 18262306a36Sopenharmony_ci if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= 18362306a36Sopenharmony_ci skb_tail_pointer(skb)) 18462306a36Sopenharmony_ci return IP6_ECN_set_ce(skb, ipv6_hdr(skb)); 18562306a36Sopenharmony_ci break; 18662306a36Sopenharmony_ci } 18762306a36Sopenharmony_ci 18862306a36Sopenharmony_ci return 0; 18962306a36Sopenharmony_ci} 19062306a36Sopenharmony_ci 19162306a36Sopenharmony_cistatic inline int skb_get_dsfield(struct sk_buff *skb) 19262306a36Sopenharmony_ci{ 19362306a36Sopenharmony_ci switch (skb_protocol(skb, true)) { 19462306a36Sopenharmony_ci case cpu_to_be16(ETH_P_IP): 19562306a36Sopenharmony_ci if (!pskb_network_may_pull(skb, sizeof(struct iphdr))) 19662306a36Sopenharmony_ci break; 19762306a36Sopenharmony_ci return ipv4_get_dsfield(ip_hdr(skb)); 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci case cpu_to_be16(ETH_P_IPV6): 20062306a36Sopenharmony_ci if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr))) 20162306a36Sopenharmony_ci break; 20262306a36Sopenharmony_ci return ipv6_get_dsfield(ipv6_hdr(skb)); 20362306a36Sopenharmony_ci } 20462306a36Sopenharmony_ci 20562306a36Sopenharmony_ci return -1; 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_cistatic inline int INET_ECN_set_ect1(struct sk_buff *skb) 20962306a36Sopenharmony_ci{ 21062306a36Sopenharmony_ci switch (skb_protocol(skb, true)) { 21162306a36Sopenharmony_ci case cpu_to_be16(ETH_P_IP): 21262306a36Sopenharmony_ci if (skb_network_header(skb) + sizeof(struct iphdr) <= 21362306a36Sopenharmony_ci skb_tail_pointer(skb)) 21462306a36Sopenharmony_ci return IP_ECN_set_ect1(ip_hdr(skb)); 21562306a36Sopenharmony_ci break; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_ci case cpu_to_be16(ETH_P_IPV6): 21862306a36Sopenharmony_ci if (skb_network_header(skb) + sizeof(struct ipv6hdr) <= 21962306a36Sopenharmony_ci skb_tail_pointer(skb)) 22062306a36Sopenharmony_ci return IP6_ECN_set_ect1(skb, ipv6_hdr(skb)); 22162306a36Sopenharmony_ci break; 22262306a36Sopenharmony_ci } 22362306a36Sopenharmony_ci 22462306a36Sopenharmony_ci return 0; 22562306a36Sopenharmony_ci} 22662306a36Sopenharmony_ci 22762306a36Sopenharmony_ci/* 22862306a36Sopenharmony_ci * RFC 6040 4.2 22962306a36Sopenharmony_ci * To decapsulate the inner header at the tunnel egress, a compliant 23062306a36Sopenharmony_ci * tunnel egress MUST set the outgoing ECN field to the codepoint at the 23162306a36Sopenharmony_ci * intersection of the appropriate arriving inner header (row) and outer 23262306a36Sopenharmony_ci * header (column) in Figure 4 23362306a36Sopenharmony_ci * 23462306a36Sopenharmony_ci * +---------+------------------------------------------------+ 23562306a36Sopenharmony_ci * |Arriving | Arriving Outer Header | 23662306a36Sopenharmony_ci * | Inner +---------+------------+------------+------------+ 23762306a36Sopenharmony_ci * | Header | Not-ECT | ECT(0) | ECT(1) | CE | 23862306a36Sopenharmony_ci * +---------+---------+------------+------------+------------+ 23962306a36Sopenharmony_ci * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)| 24062306a36Sopenharmony_ci * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | 24162306a36Sopenharmony_ci * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | 24262306a36Sopenharmony_ci * | CE | CE | CE | CE(!!!)| CE | 24362306a36Sopenharmony_ci * +---------+---------+------------+------------+------------+ 24462306a36Sopenharmony_ci * 24562306a36Sopenharmony_ci * Figure 4: New IP in IP Decapsulation Behaviour 24662306a36Sopenharmony_ci * 24762306a36Sopenharmony_ci * returns 0 on success 24862306a36Sopenharmony_ci * 1 if something is broken and should be logged (!!! above) 24962306a36Sopenharmony_ci * 2 if packet should be dropped 25062306a36Sopenharmony_ci */ 25162306a36Sopenharmony_cistatic inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce) 25262306a36Sopenharmony_ci{ 25362306a36Sopenharmony_ci if (INET_ECN_is_not_ect(inner)) { 25462306a36Sopenharmony_ci switch (outer & INET_ECN_MASK) { 25562306a36Sopenharmony_ci case INET_ECN_NOT_ECT: 25662306a36Sopenharmony_ci return 0; 25762306a36Sopenharmony_ci case INET_ECN_ECT_0: 25862306a36Sopenharmony_ci case INET_ECN_ECT_1: 25962306a36Sopenharmony_ci return 1; 26062306a36Sopenharmony_ci case INET_ECN_CE: 26162306a36Sopenharmony_ci return 2; 26262306a36Sopenharmony_ci } 26362306a36Sopenharmony_ci } 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci *set_ce = INET_ECN_is_ce(outer); 26662306a36Sopenharmony_ci return 0; 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic inline int INET_ECN_decapsulate(struct sk_buff *skb, 27062306a36Sopenharmony_ci __u8 outer, __u8 inner) 27162306a36Sopenharmony_ci{ 27262306a36Sopenharmony_ci bool set_ce = false; 27362306a36Sopenharmony_ci int rc; 27462306a36Sopenharmony_ci 27562306a36Sopenharmony_ci rc = __INET_ECN_decapsulate(outer, inner, &set_ce); 27662306a36Sopenharmony_ci if (!rc) { 27762306a36Sopenharmony_ci if (set_ce) 27862306a36Sopenharmony_ci INET_ECN_set_ce(skb); 27962306a36Sopenharmony_ci else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1) 28062306a36Sopenharmony_ci INET_ECN_set_ect1(skb); 28162306a36Sopenharmony_ci } 28262306a36Sopenharmony_ci 28362306a36Sopenharmony_ci return rc; 28462306a36Sopenharmony_ci} 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_cistatic inline int IP_ECN_decapsulate(const struct iphdr *oiph, 28762306a36Sopenharmony_ci struct sk_buff *skb) 28862306a36Sopenharmony_ci{ 28962306a36Sopenharmony_ci __u8 inner; 29062306a36Sopenharmony_ci 29162306a36Sopenharmony_ci switch (skb_protocol(skb, true)) { 29262306a36Sopenharmony_ci case htons(ETH_P_IP): 29362306a36Sopenharmony_ci inner = ip_hdr(skb)->tos; 29462306a36Sopenharmony_ci break; 29562306a36Sopenharmony_ci case htons(ETH_P_IPV6): 29662306a36Sopenharmony_ci inner = ipv6_get_dsfield(ipv6_hdr(skb)); 29762306a36Sopenharmony_ci break; 29862306a36Sopenharmony_ci default: 29962306a36Sopenharmony_ci return 0; 30062306a36Sopenharmony_ci } 30162306a36Sopenharmony_ci 30262306a36Sopenharmony_ci return INET_ECN_decapsulate(skb, oiph->tos, inner); 30362306a36Sopenharmony_ci} 30462306a36Sopenharmony_ci 30562306a36Sopenharmony_cistatic inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, 30662306a36Sopenharmony_ci struct sk_buff *skb) 30762306a36Sopenharmony_ci{ 30862306a36Sopenharmony_ci __u8 inner; 30962306a36Sopenharmony_ci 31062306a36Sopenharmony_ci switch (skb_protocol(skb, true)) { 31162306a36Sopenharmony_ci case htons(ETH_P_IP): 31262306a36Sopenharmony_ci inner = ip_hdr(skb)->tos; 31362306a36Sopenharmony_ci break; 31462306a36Sopenharmony_ci case htons(ETH_P_IPV6): 31562306a36Sopenharmony_ci inner = ipv6_get_dsfield(ipv6_hdr(skb)); 31662306a36Sopenharmony_ci break; 31762306a36Sopenharmony_ci default: 31862306a36Sopenharmony_ci return 0; 31962306a36Sopenharmony_ci } 32062306a36Sopenharmony_ci 32162306a36Sopenharmony_ci return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci#endif 324