162306a36Sopenharmony_ci/* SPDX-License-Identifier: GPL-2.0 */
262306a36Sopenharmony_ci#ifndef _INET_ECN_H_
362306a36Sopenharmony_ci#define _INET_ECN_H_
462306a36Sopenharmony_ci
562306a36Sopenharmony_ci#include <linux/ip.h>
662306a36Sopenharmony_ci#include <linux/skbuff.h>
762306a36Sopenharmony_ci#include <linux/if_vlan.h>
862306a36Sopenharmony_ci
962306a36Sopenharmony_ci#include <net/inet_sock.h>
1062306a36Sopenharmony_ci#include <net/dsfield.h>
1162306a36Sopenharmony_ci#include <net/checksum.h>
1262306a36Sopenharmony_ci
1362306a36Sopenharmony_cienum {
1462306a36Sopenharmony_ci	INET_ECN_NOT_ECT = 0,
1562306a36Sopenharmony_ci	INET_ECN_ECT_1 = 1,
1662306a36Sopenharmony_ci	INET_ECN_ECT_0 = 2,
1762306a36Sopenharmony_ci	INET_ECN_CE = 3,
1862306a36Sopenharmony_ci	INET_ECN_MASK = 3,
1962306a36Sopenharmony_ci};
2062306a36Sopenharmony_ci
2162306a36Sopenharmony_ciextern int sysctl_tunnel_ecn_log;
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_cistatic inline int INET_ECN_is_ce(__u8 dsfield)
2462306a36Sopenharmony_ci{
2562306a36Sopenharmony_ci	return (dsfield & INET_ECN_MASK) == INET_ECN_CE;
2662306a36Sopenharmony_ci}
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_cistatic inline int INET_ECN_is_not_ect(__u8 dsfield)
2962306a36Sopenharmony_ci{
3062306a36Sopenharmony_ci	return (dsfield & INET_ECN_MASK) == INET_ECN_NOT_ECT;
3162306a36Sopenharmony_ci}
3262306a36Sopenharmony_ci
3362306a36Sopenharmony_cistatic inline int INET_ECN_is_capable(__u8 dsfield)
3462306a36Sopenharmony_ci{
3562306a36Sopenharmony_ci	return dsfield & INET_ECN_ECT_0;
3662306a36Sopenharmony_ci}
3762306a36Sopenharmony_ci
3862306a36Sopenharmony_ci/*
3962306a36Sopenharmony_ci * RFC 3168 9.1.1
4062306a36Sopenharmony_ci *  The full-functionality option for ECN encapsulation is to copy the
4162306a36Sopenharmony_ci *  ECN codepoint of the inside header to the outside header on
4262306a36Sopenharmony_ci *  encapsulation if the inside header is not-ECT or ECT, and to set the
4362306a36Sopenharmony_ci *  ECN codepoint of the outside header to ECT(0) if the ECN codepoint of
4462306a36Sopenharmony_ci *  the inside header is CE.
4562306a36Sopenharmony_ci */
4662306a36Sopenharmony_cistatic inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
4762306a36Sopenharmony_ci{
4862306a36Sopenharmony_ci	outer &= ~INET_ECN_MASK;
4962306a36Sopenharmony_ci	outer |= !INET_ECN_is_ce(inner) ? (inner & INET_ECN_MASK) :
5062306a36Sopenharmony_ci					  INET_ECN_ECT_0;
5162306a36Sopenharmony_ci	return outer;
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_cistatic inline void INET_ECN_xmit(struct sock *sk)
5562306a36Sopenharmony_ci{
5662306a36Sopenharmony_ci	inet_sk(sk)->tos |= INET_ECN_ECT_0;
5762306a36Sopenharmony_ci	if (inet6_sk(sk) != NULL)
5862306a36Sopenharmony_ci		inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
5962306a36Sopenharmony_ci}
6062306a36Sopenharmony_ci
6162306a36Sopenharmony_cistatic inline void INET_ECN_dontxmit(struct sock *sk)
6262306a36Sopenharmony_ci{
6362306a36Sopenharmony_ci	inet_sk(sk)->tos &= ~INET_ECN_MASK;
6462306a36Sopenharmony_ci	if (inet6_sk(sk) != NULL)
6562306a36Sopenharmony_ci		inet6_sk(sk)->tclass &= ~INET_ECN_MASK;
6662306a36Sopenharmony_ci}
6762306a36Sopenharmony_ci
6862306a36Sopenharmony_ci#define IP6_ECN_flow_init(label) do {		\
6962306a36Sopenharmony_ci      (label) &= ~htonl(INET_ECN_MASK << 20);	\
7062306a36Sopenharmony_ci    } while (0)
7162306a36Sopenharmony_ci
7262306a36Sopenharmony_ci#define	IP6_ECN_flow_xmit(sk, label) do {				\
7362306a36Sopenharmony_ci	if (INET_ECN_is_capable(inet6_sk(sk)->tclass))			\
7462306a36Sopenharmony_ci		(label) |= htonl(INET_ECN_ECT_0 << 20);			\
7562306a36Sopenharmony_ci    } while (0)
7662306a36Sopenharmony_ci
7762306a36Sopenharmony_cistatic inline int IP_ECN_set_ce(struct iphdr *iph)
7862306a36Sopenharmony_ci{
7962306a36Sopenharmony_ci	u32 ecn = (iph->tos + 1) & INET_ECN_MASK;
8062306a36Sopenharmony_ci	__be16 check_add;
8162306a36Sopenharmony_ci
8262306a36Sopenharmony_ci	/*
8362306a36Sopenharmony_ci	 * After the last operation we have (in binary):
8462306a36Sopenharmony_ci	 * INET_ECN_NOT_ECT => 01
8562306a36Sopenharmony_ci	 * INET_ECN_ECT_1   => 10
8662306a36Sopenharmony_ci	 * INET_ECN_ECT_0   => 11
8762306a36Sopenharmony_ci	 * INET_ECN_CE      => 00
8862306a36Sopenharmony_ci	 */
8962306a36Sopenharmony_ci	if (!(ecn & 2))
9062306a36Sopenharmony_ci		return !ecn;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	/*
9362306a36Sopenharmony_ci	 * The following gives us:
9462306a36Sopenharmony_ci	 * INET_ECN_ECT_1 => check += htons(0xFFFD)
9562306a36Sopenharmony_ci	 * INET_ECN_ECT_0 => check += htons(0xFFFE)
9662306a36Sopenharmony_ci	 */
9762306a36Sopenharmony_ci	check_add = (__force __be16)((__force u16)htons(0xFFFB) +
9862306a36Sopenharmony_ci				     (__force u16)htons(ecn));
9962306a36Sopenharmony_ci
10062306a36Sopenharmony_ci	iph->check = csum16_add(iph->check, check_add);
10162306a36Sopenharmony_ci	iph->tos |= INET_ECN_CE;
10262306a36Sopenharmony_ci	return 1;
10362306a36Sopenharmony_ci}
10462306a36Sopenharmony_ci
10562306a36Sopenharmony_cistatic inline int IP_ECN_set_ect1(struct iphdr *iph)
10662306a36Sopenharmony_ci{
10762306a36Sopenharmony_ci	if ((iph->tos & INET_ECN_MASK) != INET_ECN_ECT_0)
10862306a36Sopenharmony_ci		return 0;
10962306a36Sopenharmony_ci
11062306a36Sopenharmony_ci	iph->check = csum16_add(iph->check, htons(0x1));
11162306a36Sopenharmony_ci	iph->tos ^= INET_ECN_MASK;
11262306a36Sopenharmony_ci	return 1;
11362306a36Sopenharmony_ci}
11462306a36Sopenharmony_ci
11562306a36Sopenharmony_cistatic inline void IP_ECN_clear(struct iphdr *iph)
11662306a36Sopenharmony_ci{
11762306a36Sopenharmony_ci	iph->tos &= ~INET_ECN_MASK;
11862306a36Sopenharmony_ci}
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_cistatic inline void ipv4_copy_dscp(unsigned int dscp, struct iphdr *inner)
12162306a36Sopenharmony_ci{
12262306a36Sopenharmony_ci	dscp &= ~INET_ECN_MASK;
12362306a36Sopenharmony_ci	ipv4_change_dsfield(inner, INET_ECN_MASK, dscp);
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_cistruct ipv6hdr;
12762306a36Sopenharmony_ci
12862306a36Sopenharmony_ci/* Note:
12962306a36Sopenharmony_ci * IP_ECN_set_ce() has to tweak IPV4 checksum when setting CE,
13062306a36Sopenharmony_ci * meaning both changes have no effect on skb->csum if/when CHECKSUM_COMPLETE
13162306a36Sopenharmony_ci * In IPv6 case, no checksum compensates the change in IPv6 header,
13262306a36Sopenharmony_ci * so we have to update skb->csum.
13362306a36Sopenharmony_ci */
13462306a36Sopenharmony_cistatic inline int IP6_ECN_set_ce(struct sk_buff *skb, struct ipv6hdr *iph)
13562306a36Sopenharmony_ci{
13662306a36Sopenharmony_ci	__be32 from, to;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	if (INET_ECN_is_not_ect(ipv6_get_dsfield(iph)))
13962306a36Sopenharmony_ci		return 0;
14062306a36Sopenharmony_ci
14162306a36Sopenharmony_ci	from = *(__be32 *)iph;
14262306a36Sopenharmony_ci	to = from | htonl(INET_ECN_CE << 20);
14362306a36Sopenharmony_ci	*(__be32 *)iph = to;
14462306a36Sopenharmony_ci	if (skb->ip_summed == CHECKSUM_COMPLETE)
14562306a36Sopenharmony_ci		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
14662306a36Sopenharmony_ci				     (__force __wsum)to);
14762306a36Sopenharmony_ci	return 1;
14862306a36Sopenharmony_ci}
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_cistatic inline int IP6_ECN_set_ect1(struct sk_buff *skb, struct ipv6hdr *iph)
15162306a36Sopenharmony_ci{
15262306a36Sopenharmony_ci	__be32 from, to;
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci	if ((ipv6_get_dsfield(iph) & INET_ECN_MASK) != INET_ECN_ECT_0)
15562306a36Sopenharmony_ci		return 0;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	from = *(__be32 *)iph;
15862306a36Sopenharmony_ci	to = from ^ htonl(INET_ECN_MASK << 20);
15962306a36Sopenharmony_ci	*(__be32 *)iph = to;
16062306a36Sopenharmony_ci	if (skb->ip_summed == CHECKSUM_COMPLETE)
16162306a36Sopenharmony_ci		skb->csum = csum_add(csum_sub(skb->csum, (__force __wsum)from),
16262306a36Sopenharmony_ci				     (__force __wsum)to);
16362306a36Sopenharmony_ci	return 1;
16462306a36Sopenharmony_ci}
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_cistatic inline void ipv6_copy_dscp(unsigned int dscp, struct ipv6hdr *inner)
16762306a36Sopenharmony_ci{
16862306a36Sopenharmony_ci	dscp &= ~INET_ECN_MASK;
16962306a36Sopenharmony_ci	ipv6_change_dsfield(inner, INET_ECN_MASK, dscp);
17062306a36Sopenharmony_ci}
17162306a36Sopenharmony_ci
17262306a36Sopenharmony_cistatic inline int INET_ECN_set_ce(struct sk_buff *skb)
17362306a36Sopenharmony_ci{
17462306a36Sopenharmony_ci	switch (skb_protocol(skb, true)) {
17562306a36Sopenharmony_ci	case cpu_to_be16(ETH_P_IP):
17662306a36Sopenharmony_ci		if (skb_network_header(skb) + sizeof(struct iphdr) <=
17762306a36Sopenharmony_ci		    skb_tail_pointer(skb))
17862306a36Sopenharmony_ci			return IP_ECN_set_ce(ip_hdr(skb));
17962306a36Sopenharmony_ci		break;
18062306a36Sopenharmony_ci
18162306a36Sopenharmony_ci	case cpu_to_be16(ETH_P_IPV6):
18262306a36Sopenharmony_ci		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
18362306a36Sopenharmony_ci		    skb_tail_pointer(skb))
18462306a36Sopenharmony_ci			return IP6_ECN_set_ce(skb, ipv6_hdr(skb));
18562306a36Sopenharmony_ci		break;
18662306a36Sopenharmony_ci	}
18762306a36Sopenharmony_ci
18862306a36Sopenharmony_ci	return 0;
18962306a36Sopenharmony_ci}
19062306a36Sopenharmony_ci
19162306a36Sopenharmony_cistatic inline int skb_get_dsfield(struct sk_buff *skb)
19262306a36Sopenharmony_ci{
19362306a36Sopenharmony_ci	switch (skb_protocol(skb, true)) {
19462306a36Sopenharmony_ci	case cpu_to_be16(ETH_P_IP):
19562306a36Sopenharmony_ci		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
19662306a36Sopenharmony_ci			break;
19762306a36Sopenharmony_ci		return ipv4_get_dsfield(ip_hdr(skb));
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci	case cpu_to_be16(ETH_P_IPV6):
20062306a36Sopenharmony_ci		if (!pskb_network_may_pull(skb, sizeof(struct ipv6hdr)))
20162306a36Sopenharmony_ci			break;
20262306a36Sopenharmony_ci		return ipv6_get_dsfield(ipv6_hdr(skb));
20362306a36Sopenharmony_ci	}
20462306a36Sopenharmony_ci
20562306a36Sopenharmony_ci	return -1;
20662306a36Sopenharmony_ci}
20762306a36Sopenharmony_ci
20862306a36Sopenharmony_cistatic inline int INET_ECN_set_ect1(struct sk_buff *skb)
20962306a36Sopenharmony_ci{
21062306a36Sopenharmony_ci	switch (skb_protocol(skb, true)) {
21162306a36Sopenharmony_ci	case cpu_to_be16(ETH_P_IP):
21262306a36Sopenharmony_ci		if (skb_network_header(skb) + sizeof(struct iphdr) <=
21362306a36Sopenharmony_ci		    skb_tail_pointer(skb))
21462306a36Sopenharmony_ci			return IP_ECN_set_ect1(ip_hdr(skb));
21562306a36Sopenharmony_ci		break;
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci	case cpu_to_be16(ETH_P_IPV6):
21862306a36Sopenharmony_ci		if (skb_network_header(skb) + sizeof(struct ipv6hdr) <=
21962306a36Sopenharmony_ci		    skb_tail_pointer(skb))
22062306a36Sopenharmony_ci			return IP6_ECN_set_ect1(skb, ipv6_hdr(skb));
22162306a36Sopenharmony_ci		break;
22262306a36Sopenharmony_ci	}
22362306a36Sopenharmony_ci
22462306a36Sopenharmony_ci	return 0;
22562306a36Sopenharmony_ci}
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci/*
22862306a36Sopenharmony_ci * RFC 6040 4.2
22962306a36Sopenharmony_ci *  To decapsulate the inner header at the tunnel egress, a compliant
23062306a36Sopenharmony_ci *  tunnel egress MUST set the outgoing ECN field to the codepoint at the
23162306a36Sopenharmony_ci *  intersection of the appropriate arriving inner header (row) and outer
23262306a36Sopenharmony_ci *  header (column) in Figure 4
23362306a36Sopenharmony_ci *
23462306a36Sopenharmony_ci *      +---------+------------------------------------------------+
23562306a36Sopenharmony_ci *      |Arriving |            Arriving Outer Header               |
23662306a36Sopenharmony_ci *      |   Inner +---------+------------+------------+------------+
23762306a36Sopenharmony_ci *      |  Header | Not-ECT | ECT(0)     | ECT(1)     |     CE     |
23862306a36Sopenharmony_ci *      +---------+---------+------------+------------+------------+
23962306a36Sopenharmony_ci *      | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| <drop>(!!!)|
24062306a36Sopenharmony_ci *      |  ECT(0) |  ECT(0) | ECT(0)     | ECT(1)     |     CE     |
24162306a36Sopenharmony_ci *      |  ECT(1) |  ECT(1) | ECT(1) (!) | ECT(1)     |     CE     |
24262306a36Sopenharmony_ci *      |    CE   |      CE |     CE     |     CE(!!!)|     CE     |
24362306a36Sopenharmony_ci *      +---------+---------+------------+------------+------------+
24462306a36Sopenharmony_ci *
24562306a36Sopenharmony_ci *             Figure 4: New IP in IP Decapsulation Behaviour
24662306a36Sopenharmony_ci *
24762306a36Sopenharmony_ci *  returns 0 on success
24862306a36Sopenharmony_ci *          1 if something is broken and should be logged (!!! above)
24962306a36Sopenharmony_ci *          2 if packet should be dropped
25062306a36Sopenharmony_ci */
25162306a36Sopenharmony_cistatic inline int __INET_ECN_decapsulate(__u8 outer, __u8 inner, bool *set_ce)
25262306a36Sopenharmony_ci{
25362306a36Sopenharmony_ci	if (INET_ECN_is_not_ect(inner)) {
25462306a36Sopenharmony_ci		switch (outer & INET_ECN_MASK) {
25562306a36Sopenharmony_ci		case INET_ECN_NOT_ECT:
25662306a36Sopenharmony_ci			return 0;
25762306a36Sopenharmony_ci		case INET_ECN_ECT_0:
25862306a36Sopenharmony_ci		case INET_ECN_ECT_1:
25962306a36Sopenharmony_ci			return 1;
26062306a36Sopenharmony_ci		case INET_ECN_CE:
26162306a36Sopenharmony_ci			return 2;
26262306a36Sopenharmony_ci		}
26362306a36Sopenharmony_ci	}
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci	*set_ce = INET_ECN_is_ce(outer);
26662306a36Sopenharmony_ci	return 0;
26762306a36Sopenharmony_ci}
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_cistatic inline int INET_ECN_decapsulate(struct sk_buff *skb,
27062306a36Sopenharmony_ci				       __u8 outer, __u8 inner)
27162306a36Sopenharmony_ci{
27262306a36Sopenharmony_ci	bool set_ce = false;
27362306a36Sopenharmony_ci	int rc;
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	rc = __INET_ECN_decapsulate(outer, inner, &set_ce);
27662306a36Sopenharmony_ci	if (!rc) {
27762306a36Sopenharmony_ci		if (set_ce)
27862306a36Sopenharmony_ci			INET_ECN_set_ce(skb);
27962306a36Sopenharmony_ci		else if ((outer & INET_ECN_MASK) == INET_ECN_ECT_1)
28062306a36Sopenharmony_ci			INET_ECN_set_ect1(skb);
28162306a36Sopenharmony_ci	}
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	return rc;
28462306a36Sopenharmony_ci}
28562306a36Sopenharmony_ci
28662306a36Sopenharmony_cistatic inline int IP_ECN_decapsulate(const struct iphdr *oiph,
28762306a36Sopenharmony_ci				     struct sk_buff *skb)
28862306a36Sopenharmony_ci{
28962306a36Sopenharmony_ci	__u8 inner;
29062306a36Sopenharmony_ci
29162306a36Sopenharmony_ci	switch (skb_protocol(skb, true)) {
29262306a36Sopenharmony_ci	case htons(ETH_P_IP):
29362306a36Sopenharmony_ci		inner = ip_hdr(skb)->tos;
29462306a36Sopenharmony_ci		break;
29562306a36Sopenharmony_ci	case htons(ETH_P_IPV6):
29662306a36Sopenharmony_ci		inner = ipv6_get_dsfield(ipv6_hdr(skb));
29762306a36Sopenharmony_ci		break;
29862306a36Sopenharmony_ci	default:
29962306a36Sopenharmony_ci		return 0;
30062306a36Sopenharmony_ci	}
30162306a36Sopenharmony_ci
30262306a36Sopenharmony_ci	return INET_ECN_decapsulate(skb, oiph->tos, inner);
30362306a36Sopenharmony_ci}
30462306a36Sopenharmony_ci
30562306a36Sopenharmony_cistatic inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h,
30662306a36Sopenharmony_ci				      struct sk_buff *skb)
30762306a36Sopenharmony_ci{
30862306a36Sopenharmony_ci	__u8 inner;
30962306a36Sopenharmony_ci
31062306a36Sopenharmony_ci	switch (skb_protocol(skb, true)) {
31162306a36Sopenharmony_ci	case htons(ETH_P_IP):
31262306a36Sopenharmony_ci		inner = ip_hdr(skb)->tos;
31362306a36Sopenharmony_ci		break;
31462306a36Sopenharmony_ci	case htons(ETH_P_IPV6):
31562306a36Sopenharmony_ci		inner = ipv6_get_dsfield(ipv6_hdr(skb));
31662306a36Sopenharmony_ci		break;
31762306a36Sopenharmony_ci	default:
31862306a36Sopenharmony_ci		return 0;
31962306a36Sopenharmony_ci	}
32062306a36Sopenharmony_ci
32162306a36Sopenharmony_ci	return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner);
32262306a36Sopenharmony_ci}
32362306a36Sopenharmony_ci#endif
324