162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * GENEVE: Generic Network Virtualization Encapsulation
462306a36Sopenharmony_ci *
562306a36Sopenharmony_ci * Copyright (c) 2015 Red Hat, Inc.
662306a36Sopenharmony_ci */
762306a36Sopenharmony_ci
862306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
962306a36Sopenharmony_ci
1062306a36Sopenharmony_ci#include <linux/ethtool.h>
1162306a36Sopenharmony_ci#include <linux/kernel.h>
1262306a36Sopenharmony_ci#include <linux/module.h>
1362306a36Sopenharmony_ci#include <linux/etherdevice.h>
1462306a36Sopenharmony_ci#include <linux/hash.h>
1562306a36Sopenharmony_ci#include <net/ipv6_stubs.h>
1662306a36Sopenharmony_ci#include <net/dst_metadata.h>
1762306a36Sopenharmony_ci#include <net/gro_cells.h>
1862306a36Sopenharmony_ci#include <net/rtnetlink.h>
1962306a36Sopenharmony_ci#include <net/geneve.h>
2062306a36Sopenharmony_ci#include <net/gro.h>
2162306a36Sopenharmony_ci#include <net/protocol.h>
2262306a36Sopenharmony_ci
2362306a36Sopenharmony_ci#define GENEVE_NETDEV_VER	"0.6"
2462306a36Sopenharmony_ci
2562306a36Sopenharmony_ci#define GENEVE_N_VID		(1u << 24)
2662306a36Sopenharmony_ci#define GENEVE_VID_MASK		(GENEVE_N_VID - 1)
2762306a36Sopenharmony_ci
2862306a36Sopenharmony_ci#define VNI_HASH_BITS		10
2962306a36Sopenharmony_ci#define VNI_HASH_SIZE		(1<<VNI_HASH_BITS)
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_cistatic bool log_ecn_error = true;
3262306a36Sopenharmony_cimodule_param(log_ecn_error, bool, 0644);
3362306a36Sopenharmony_ciMODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
3462306a36Sopenharmony_ci
3562306a36Sopenharmony_ci#define GENEVE_VER 0
3662306a36Sopenharmony_ci#define GENEVE_BASE_HLEN (sizeof(struct udphdr) + sizeof(struct genevehdr))
3762306a36Sopenharmony_ci#define GENEVE_IPV4_HLEN (ETH_HLEN + sizeof(struct iphdr) + GENEVE_BASE_HLEN)
3862306a36Sopenharmony_ci#define GENEVE_IPV6_HLEN (ETH_HLEN + sizeof(struct ipv6hdr) + GENEVE_BASE_HLEN)
3962306a36Sopenharmony_ci
4062306a36Sopenharmony_ci/* per-network namespace private data for this module */
4162306a36Sopenharmony_cistruct geneve_net {
4262306a36Sopenharmony_ci	struct list_head	geneve_list;
4362306a36Sopenharmony_ci	struct list_head	sock_list;
4462306a36Sopenharmony_ci};
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistatic unsigned int geneve_net_id;
4762306a36Sopenharmony_ci
4862306a36Sopenharmony_cistruct geneve_dev_node {
4962306a36Sopenharmony_ci	struct hlist_node hlist;
5062306a36Sopenharmony_ci	struct geneve_dev *geneve;
5162306a36Sopenharmony_ci};
5262306a36Sopenharmony_ci
5362306a36Sopenharmony_cistruct geneve_config {
5462306a36Sopenharmony_ci	struct ip_tunnel_info	info;
5562306a36Sopenharmony_ci	bool			collect_md;
5662306a36Sopenharmony_ci	bool			use_udp6_rx_checksums;
5762306a36Sopenharmony_ci	bool			ttl_inherit;
5862306a36Sopenharmony_ci	enum ifla_geneve_df	df;
5962306a36Sopenharmony_ci	bool			inner_proto_inherit;
6062306a36Sopenharmony_ci};
6162306a36Sopenharmony_ci
6262306a36Sopenharmony_ci/* Pseudo network device */
6362306a36Sopenharmony_cistruct geneve_dev {
6462306a36Sopenharmony_ci	struct geneve_dev_node hlist4;	/* vni hash table for IPv4 socket */
6562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
6662306a36Sopenharmony_ci	struct geneve_dev_node hlist6;	/* vni hash table for IPv6 socket */
6762306a36Sopenharmony_ci#endif
6862306a36Sopenharmony_ci	struct net	   *net;	/* netns for packet i/o */
6962306a36Sopenharmony_ci	struct net_device  *dev;	/* netdev for geneve tunnel */
7062306a36Sopenharmony_ci	struct geneve_sock __rcu *sock4;	/* IPv4 socket used for geneve tunnel */
7162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
7262306a36Sopenharmony_ci	struct geneve_sock __rcu *sock6;	/* IPv6 socket used for geneve tunnel */
7362306a36Sopenharmony_ci#endif
7462306a36Sopenharmony_ci	struct list_head   next;	/* geneve's per namespace list */
7562306a36Sopenharmony_ci	struct gro_cells   gro_cells;
7662306a36Sopenharmony_ci	struct geneve_config cfg;
7762306a36Sopenharmony_ci};
7862306a36Sopenharmony_ci
7962306a36Sopenharmony_cistruct geneve_sock {
8062306a36Sopenharmony_ci	bool			collect_md;
8162306a36Sopenharmony_ci	struct list_head	list;
8262306a36Sopenharmony_ci	struct socket		*sock;
8362306a36Sopenharmony_ci	struct rcu_head		rcu;
8462306a36Sopenharmony_ci	int			refcnt;
8562306a36Sopenharmony_ci	struct hlist_head	vni_list[VNI_HASH_SIZE];
8662306a36Sopenharmony_ci};
8762306a36Sopenharmony_ci
8862306a36Sopenharmony_cistatic inline __u32 geneve_net_vni_hash(u8 vni[3])
8962306a36Sopenharmony_ci{
9062306a36Sopenharmony_ci	__u32 vnid;
9162306a36Sopenharmony_ci
9262306a36Sopenharmony_ci	vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
9362306a36Sopenharmony_ci	return hash_32(vnid, VNI_HASH_BITS);
9462306a36Sopenharmony_ci}
9562306a36Sopenharmony_ci
9662306a36Sopenharmony_cistatic __be64 vni_to_tunnel_id(const __u8 *vni)
9762306a36Sopenharmony_ci{
9862306a36Sopenharmony_ci#ifdef __BIG_ENDIAN
9962306a36Sopenharmony_ci	return (vni[0] << 16) | (vni[1] << 8) | vni[2];
10062306a36Sopenharmony_ci#else
10162306a36Sopenharmony_ci	return (__force __be64)(((__force u64)vni[0] << 40) |
10262306a36Sopenharmony_ci				((__force u64)vni[1] << 48) |
10362306a36Sopenharmony_ci				((__force u64)vni[2] << 56));
10462306a36Sopenharmony_ci#endif
10562306a36Sopenharmony_ci}
10662306a36Sopenharmony_ci
10762306a36Sopenharmony_ci/* Convert 64 bit tunnel ID to 24 bit VNI. */
10862306a36Sopenharmony_cistatic void tunnel_id_to_vni(__be64 tun_id, __u8 *vni)
10962306a36Sopenharmony_ci{
11062306a36Sopenharmony_ci#ifdef __BIG_ENDIAN
11162306a36Sopenharmony_ci	vni[0] = (__force __u8)(tun_id >> 16);
11262306a36Sopenharmony_ci	vni[1] = (__force __u8)(tun_id >> 8);
11362306a36Sopenharmony_ci	vni[2] = (__force __u8)tun_id;
11462306a36Sopenharmony_ci#else
11562306a36Sopenharmony_ci	vni[0] = (__force __u8)((__force u64)tun_id >> 40);
11662306a36Sopenharmony_ci	vni[1] = (__force __u8)((__force u64)tun_id >> 48);
11762306a36Sopenharmony_ci	vni[2] = (__force __u8)((__force u64)tun_id >> 56);
11862306a36Sopenharmony_ci#endif
11962306a36Sopenharmony_ci}
12062306a36Sopenharmony_ci
12162306a36Sopenharmony_cistatic bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
12262306a36Sopenharmony_ci{
12362306a36Sopenharmony_ci	return !memcmp(vni, &tun_id[5], 3);
12462306a36Sopenharmony_ci}
12562306a36Sopenharmony_ci
12662306a36Sopenharmony_cistatic sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
12762306a36Sopenharmony_ci{
12862306a36Sopenharmony_ci	return gs->sock->sk->sk_family;
12962306a36Sopenharmony_ci}
13062306a36Sopenharmony_ci
13162306a36Sopenharmony_cistatic struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
13262306a36Sopenharmony_ci					__be32 addr, u8 vni[])
13362306a36Sopenharmony_ci{
13462306a36Sopenharmony_ci	struct hlist_head *vni_list_head;
13562306a36Sopenharmony_ci	struct geneve_dev_node *node;
13662306a36Sopenharmony_ci	__u32 hash;
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci	/* Find the device for this VNI */
13962306a36Sopenharmony_ci	hash = geneve_net_vni_hash(vni);
14062306a36Sopenharmony_ci	vni_list_head = &gs->vni_list[hash];
14162306a36Sopenharmony_ci	hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
14262306a36Sopenharmony_ci		if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
14362306a36Sopenharmony_ci		    addr == node->geneve->cfg.info.key.u.ipv4.dst)
14462306a36Sopenharmony_ci			return node->geneve;
14562306a36Sopenharmony_ci	}
14662306a36Sopenharmony_ci	return NULL;
14762306a36Sopenharmony_ci}
14862306a36Sopenharmony_ci
14962306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
15062306a36Sopenharmony_cistatic struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
15162306a36Sopenharmony_ci					 struct in6_addr addr6, u8 vni[])
15262306a36Sopenharmony_ci{
15362306a36Sopenharmony_ci	struct hlist_head *vni_list_head;
15462306a36Sopenharmony_ci	struct geneve_dev_node *node;
15562306a36Sopenharmony_ci	__u32 hash;
15662306a36Sopenharmony_ci
15762306a36Sopenharmony_ci	/* Find the device for this VNI */
15862306a36Sopenharmony_ci	hash = geneve_net_vni_hash(vni);
15962306a36Sopenharmony_ci	vni_list_head = &gs->vni_list[hash];
16062306a36Sopenharmony_ci	hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
16162306a36Sopenharmony_ci		if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
16262306a36Sopenharmony_ci		    ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst))
16362306a36Sopenharmony_ci			return node->geneve;
16462306a36Sopenharmony_ci	}
16562306a36Sopenharmony_ci	return NULL;
16662306a36Sopenharmony_ci}
16762306a36Sopenharmony_ci#endif
16862306a36Sopenharmony_ci
16962306a36Sopenharmony_cistatic inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
17062306a36Sopenharmony_ci{
17162306a36Sopenharmony_ci	return (struct genevehdr *)(udp_hdr(skb) + 1);
17262306a36Sopenharmony_ci}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_cistatic struct geneve_dev *geneve_lookup_skb(struct geneve_sock *gs,
17562306a36Sopenharmony_ci					    struct sk_buff *skb)
17662306a36Sopenharmony_ci{
17762306a36Sopenharmony_ci	static u8 zero_vni[3];
17862306a36Sopenharmony_ci	u8 *vni;
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci	if (geneve_get_sk_family(gs) == AF_INET) {
18162306a36Sopenharmony_ci		struct iphdr *iph;
18262306a36Sopenharmony_ci		__be32 addr;
18362306a36Sopenharmony_ci
18462306a36Sopenharmony_ci		iph = ip_hdr(skb); /* outer IP header... */
18562306a36Sopenharmony_ci
18662306a36Sopenharmony_ci		if (gs->collect_md) {
18762306a36Sopenharmony_ci			vni = zero_vni;
18862306a36Sopenharmony_ci			addr = 0;
18962306a36Sopenharmony_ci		} else {
19062306a36Sopenharmony_ci			vni = geneve_hdr(skb)->vni;
19162306a36Sopenharmony_ci			addr = iph->saddr;
19262306a36Sopenharmony_ci		}
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_ci		return geneve_lookup(gs, addr, vni);
19562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
19662306a36Sopenharmony_ci	} else if (geneve_get_sk_family(gs) == AF_INET6) {
19762306a36Sopenharmony_ci		static struct in6_addr zero_addr6;
19862306a36Sopenharmony_ci		struct ipv6hdr *ip6h;
19962306a36Sopenharmony_ci		struct in6_addr addr6;
20062306a36Sopenharmony_ci
20162306a36Sopenharmony_ci		ip6h = ipv6_hdr(skb); /* outer IPv6 header... */
20262306a36Sopenharmony_ci
20362306a36Sopenharmony_ci		if (gs->collect_md) {
20462306a36Sopenharmony_ci			vni = zero_vni;
20562306a36Sopenharmony_ci			addr6 = zero_addr6;
20662306a36Sopenharmony_ci		} else {
20762306a36Sopenharmony_ci			vni = geneve_hdr(skb)->vni;
20862306a36Sopenharmony_ci			addr6 = ip6h->saddr;
20962306a36Sopenharmony_ci		}
21062306a36Sopenharmony_ci
21162306a36Sopenharmony_ci		return geneve6_lookup(gs, addr6, vni);
21262306a36Sopenharmony_ci#endif
21362306a36Sopenharmony_ci	}
21462306a36Sopenharmony_ci	return NULL;
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_ci/* geneve receive/decap routine */
21862306a36Sopenharmony_cistatic void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
21962306a36Sopenharmony_ci		      struct sk_buff *skb)
22062306a36Sopenharmony_ci{
22162306a36Sopenharmony_ci	struct genevehdr *gnvh = geneve_hdr(skb);
22262306a36Sopenharmony_ci	struct metadata_dst *tun_dst = NULL;
22362306a36Sopenharmony_ci	unsigned int len;
22462306a36Sopenharmony_ci	int nh, err = 0;
22562306a36Sopenharmony_ci	void *oiph;
22662306a36Sopenharmony_ci
22762306a36Sopenharmony_ci	if (ip_tunnel_collect_metadata() || gs->collect_md) {
22862306a36Sopenharmony_ci		__be16 flags;
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci		flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) |
23162306a36Sopenharmony_ci			(gnvh->critical ? TUNNEL_CRIT_OPT : 0);
23262306a36Sopenharmony_ci
23362306a36Sopenharmony_ci		tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
23462306a36Sopenharmony_ci					 vni_to_tunnel_id(gnvh->vni),
23562306a36Sopenharmony_ci					 gnvh->opt_len * 4);
23662306a36Sopenharmony_ci		if (!tun_dst) {
23762306a36Sopenharmony_ci			geneve->dev->stats.rx_dropped++;
23862306a36Sopenharmony_ci			goto drop;
23962306a36Sopenharmony_ci		}
24062306a36Sopenharmony_ci		/* Update tunnel dst according to Geneve options. */
24162306a36Sopenharmony_ci		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
24262306a36Sopenharmony_ci					gnvh->options, gnvh->opt_len * 4,
24362306a36Sopenharmony_ci					TUNNEL_GENEVE_OPT);
24462306a36Sopenharmony_ci	} else {
24562306a36Sopenharmony_ci		/* Drop packets w/ critical options,
24662306a36Sopenharmony_ci		 * since we don't support any...
24762306a36Sopenharmony_ci		 */
24862306a36Sopenharmony_ci		if (gnvh->critical) {
24962306a36Sopenharmony_ci			geneve->dev->stats.rx_frame_errors++;
25062306a36Sopenharmony_ci			geneve->dev->stats.rx_errors++;
25162306a36Sopenharmony_ci			goto drop;
25262306a36Sopenharmony_ci		}
25362306a36Sopenharmony_ci	}
25462306a36Sopenharmony_ci
25562306a36Sopenharmony_ci	if (tun_dst)
25662306a36Sopenharmony_ci		skb_dst_set(skb, &tun_dst->dst);
25762306a36Sopenharmony_ci
25862306a36Sopenharmony_ci	if (gnvh->proto_type == htons(ETH_P_TEB)) {
25962306a36Sopenharmony_ci		skb_reset_mac_header(skb);
26062306a36Sopenharmony_ci		skb->protocol = eth_type_trans(skb, geneve->dev);
26162306a36Sopenharmony_ci		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
26262306a36Sopenharmony_ci
26362306a36Sopenharmony_ci		/* Ignore packet loops (and multicast echo) */
26462306a36Sopenharmony_ci		if (ether_addr_equal(eth_hdr(skb)->h_source,
26562306a36Sopenharmony_ci				     geneve->dev->dev_addr)) {
26662306a36Sopenharmony_ci			geneve->dev->stats.rx_errors++;
26762306a36Sopenharmony_ci			goto drop;
26862306a36Sopenharmony_ci		}
26962306a36Sopenharmony_ci	} else {
27062306a36Sopenharmony_ci		skb_reset_mac_header(skb);
27162306a36Sopenharmony_ci		skb->dev = geneve->dev;
27262306a36Sopenharmony_ci		skb->pkt_type = PACKET_HOST;
27362306a36Sopenharmony_ci	}
27462306a36Sopenharmony_ci
27562306a36Sopenharmony_ci	/* Save offset of outer header relative to skb->head,
27662306a36Sopenharmony_ci	 * because we are going to reset the network header to the inner header
27762306a36Sopenharmony_ci	 * and might change skb->head.
27862306a36Sopenharmony_ci	 */
27962306a36Sopenharmony_ci	nh = skb_network_header(skb) - skb->head;
28062306a36Sopenharmony_ci
28162306a36Sopenharmony_ci	skb_reset_network_header(skb);
28262306a36Sopenharmony_ci
28362306a36Sopenharmony_ci	if (!pskb_inet_may_pull(skb)) {
28462306a36Sopenharmony_ci		DEV_STATS_INC(geneve->dev, rx_length_errors);
28562306a36Sopenharmony_ci		DEV_STATS_INC(geneve->dev, rx_errors);
28662306a36Sopenharmony_ci		goto drop;
28762306a36Sopenharmony_ci	}
28862306a36Sopenharmony_ci
28962306a36Sopenharmony_ci	/* Get the outer header. */
29062306a36Sopenharmony_ci	oiph = skb->head + nh;
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	if (geneve_get_sk_family(gs) == AF_INET)
29362306a36Sopenharmony_ci		err = IP_ECN_decapsulate(oiph, skb);
29462306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
29562306a36Sopenharmony_ci	else
29662306a36Sopenharmony_ci		err = IP6_ECN_decapsulate(oiph, skb);
29762306a36Sopenharmony_ci#endif
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	if (unlikely(err)) {
30062306a36Sopenharmony_ci		if (log_ecn_error) {
30162306a36Sopenharmony_ci			if (geneve_get_sk_family(gs) == AF_INET)
30262306a36Sopenharmony_ci				net_info_ratelimited("non-ECT from %pI4 "
30362306a36Sopenharmony_ci						     "with TOS=%#x\n",
30462306a36Sopenharmony_ci						     &((struct iphdr *)oiph)->saddr,
30562306a36Sopenharmony_ci						     ((struct iphdr *)oiph)->tos);
30662306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
30762306a36Sopenharmony_ci			else
30862306a36Sopenharmony_ci				net_info_ratelimited("non-ECT from %pI6\n",
30962306a36Sopenharmony_ci						     &((struct ipv6hdr *)oiph)->saddr);
31062306a36Sopenharmony_ci#endif
31162306a36Sopenharmony_ci		}
31262306a36Sopenharmony_ci		if (err > 1) {
31362306a36Sopenharmony_ci			++geneve->dev->stats.rx_frame_errors;
31462306a36Sopenharmony_ci			++geneve->dev->stats.rx_errors;
31562306a36Sopenharmony_ci			goto drop;
31662306a36Sopenharmony_ci		}
31762306a36Sopenharmony_ci	}
31862306a36Sopenharmony_ci
31962306a36Sopenharmony_ci	len = skb->len;
32062306a36Sopenharmony_ci	err = gro_cells_receive(&geneve->gro_cells, skb);
32162306a36Sopenharmony_ci	if (likely(err == NET_RX_SUCCESS))
32262306a36Sopenharmony_ci		dev_sw_netstats_rx_add(geneve->dev, len);
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	return;
32562306a36Sopenharmony_cidrop:
32662306a36Sopenharmony_ci	/* Consume bad packet */
32762306a36Sopenharmony_ci	kfree_skb(skb);
32862306a36Sopenharmony_ci}
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci/* Setup stats when device is created */
33162306a36Sopenharmony_cistatic int geneve_init(struct net_device *dev)
33262306a36Sopenharmony_ci{
33362306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
33462306a36Sopenharmony_ci	int err;
33562306a36Sopenharmony_ci
33662306a36Sopenharmony_ci	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
33762306a36Sopenharmony_ci	if (!dev->tstats)
33862306a36Sopenharmony_ci		return -ENOMEM;
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	err = gro_cells_init(&geneve->gro_cells, dev);
34162306a36Sopenharmony_ci	if (err) {
34262306a36Sopenharmony_ci		free_percpu(dev->tstats);
34362306a36Sopenharmony_ci		return err;
34462306a36Sopenharmony_ci	}
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci	err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
34762306a36Sopenharmony_ci	if (err) {
34862306a36Sopenharmony_ci		free_percpu(dev->tstats);
34962306a36Sopenharmony_ci		gro_cells_destroy(&geneve->gro_cells);
35062306a36Sopenharmony_ci		return err;
35162306a36Sopenharmony_ci	}
35262306a36Sopenharmony_ci	return 0;
35362306a36Sopenharmony_ci}
35462306a36Sopenharmony_ci
35562306a36Sopenharmony_cistatic void geneve_uninit(struct net_device *dev)
35662306a36Sopenharmony_ci{
35762306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
35862306a36Sopenharmony_ci
35962306a36Sopenharmony_ci	dst_cache_destroy(&geneve->cfg.info.dst_cache);
36062306a36Sopenharmony_ci	gro_cells_destroy(&geneve->gro_cells);
36162306a36Sopenharmony_ci	free_percpu(dev->tstats);
36262306a36Sopenharmony_ci}
36362306a36Sopenharmony_ci
36462306a36Sopenharmony_ci/* Callback from net/ipv4/udp.c to receive packets */
36562306a36Sopenharmony_cistatic int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
36662306a36Sopenharmony_ci{
36762306a36Sopenharmony_ci	struct genevehdr *geneveh;
36862306a36Sopenharmony_ci	struct geneve_dev *geneve;
36962306a36Sopenharmony_ci	struct geneve_sock *gs;
37062306a36Sopenharmony_ci	__be16 inner_proto;
37162306a36Sopenharmony_ci	int opts_len;
37262306a36Sopenharmony_ci
37362306a36Sopenharmony_ci	/* Need UDP and Geneve header to be present */
37462306a36Sopenharmony_ci	if (unlikely(!pskb_may_pull(skb, GENEVE_BASE_HLEN)))
37562306a36Sopenharmony_ci		goto drop;
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_ci	/* Return packets with reserved bits set */
37862306a36Sopenharmony_ci	geneveh = geneve_hdr(skb);
37962306a36Sopenharmony_ci	if (unlikely(geneveh->ver != GENEVE_VER))
38062306a36Sopenharmony_ci		goto drop;
38162306a36Sopenharmony_ci
38262306a36Sopenharmony_ci	gs = rcu_dereference_sk_user_data(sk);
38362306a36Sopenharmony_ci	if (!gs)
38462306a36Sopenharmony_ci		goto drop;
38562306a36Sopenharmony_ci
38662306a36Sopenharmony_ci	geneve = geneve_lookup_skb(gs, skb);
38762306a36Sopenharmony_ci	if (!geneve)
38862306a36Sopenharmony_ci		goto drop;
38962306a36Sopenharmony_ci
39062306a36Sopenharmony_ci	inner_proto = geneveh->proto_type;
39162306a36Sopenharmony_ci
39262306a36Sopenharmony_ci	if (unlikely((!geneve->cfg.inner_proto_inherit &&
39362306a36Sopenharmony_ci		      inner_proto != htons(ETH_P_TEB)))) {
39462306a36Sopenharmony_ci		geneve->dev->stats.rx_dropped++;
39562306a36Sopenharmony_ci		goto drop;
39662306a36Sopenharmony_ci	}
39762306a36Sopenharmony_ci
39862306a36Sopenharmony_ci	opts_len = geneveh->opt_len * 4;
39962306a36Sopenharmony_ci	if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto,
40062306a36Sopenharmony_ci				 !net_eq(geneve->net, dev_net(geneve->dev)))) {
40162306a36Sopenharmony_ci		geneve->dev->stats.rx_dropped++;
40262306a36Sopenharmony_ci		goto drop;
40362306a36Sopenharmony_ci	}
40462306a36Sopenharmony_ci
40562306a36Sopenharmony_ci	geneve_rx(geneve, gs, skb);
40662306a36Sopenharmony_ci	return 0;
40762306a36Sopenharmony_ci
40862306a36Sopenharmony_cidrop:
40962306a36Sopenharmony_ci	/* Consume bad packet */
41062306a36Sopenharmony_ci	kfree_skb(skb);
41162306a36Sopenharmony_ci	return 0;
41262306a36Sopenharmony_ci}
41362306a36Sopenharmony_ci
41462306a36Sopenharmony_ci/* Callback from net/ipv{4,6}/udp.c to check that we have a tunnel for errors */
41562306a36Sopenharmony_cistatic int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
41662306a36Sopenharmony_ci{
41762306a36Sopenharmony_ci	struct genevehdr *geneveh;
41862306a36Sopenharmony_ci	struct geneve_sock *gs;
41962306a36Sopenharmony_ci	u8 zero_vni[3] = { 0 };
42062306a36Sopenharmony_ci	u8 *vni = zero_vni;
42162306a36Sopenharmony_ci
42262306a36Sopenharmony_ci	if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN))
42362306a36Sopenharmony_ci		return -EINVAL;
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	geneveh = geneve_hdr(skb);
42662306a36Sopenharmony_ci	if (geneveh->ver != GENEVE_VER)
42762306a36Sopenharmony_ci		return -EINVAL;
42862306a36Sopenharmony_ci
42962306a36Sopenharmony_ci	if (geneveh->proto_type != htons(ETH_P_TEB))
43062306a36Sopenharmony_ci		return -EINVAL;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	gs = rcu_dereference_sk_user_data(sk);
43362306a36Sopenharmony_ci	if (!gs)
43462306a36Sopenharmony_ci		return -ENOENT;
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	if (geneve_get_sk_family(gs) == AF_INET) {
43762306a36Sopenharmony_ci		struct iphdr *iph = ip_hdr(skb);
43862306a36Sopenharmony_ci		__be32 addr4 = 0;
43962306a36Sopenharmony_ci
44062306a36Sopenharmony_ci		if (!gs->collect_md) {
44162306a36Sopenharmony_ci			vni = geneve_hdr(skb)->vni;
44262306a36Sopenharmony_ci			addr4 = iph->daddr;
44362306a36Sopenharmony_ci		}
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci		return geneve_lookup(gs, addr4, vni) ? 0 : -ENOENT;
44662306a36Sopenharmony_ci	}
44762306a36Sopenharmony_ci
44862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
44962306a36Sopenharmony_ci	if (geneve_get_sk_family(gs) == AF_INET6) {
45062306a36Sopenharmony_ci		struct ipv6hdr *ip6h = ipv6_hdr(skb);
45162306a36Sopenharmony_ci		struct in6_addr addr6;
45262306a36Sopenharmony_ci
45362306a36Sopenharmony_ci		memset(&addr6, 0, sizeof(struct in6_addr));
45462306a36Sopenharmony_ci
45562306a36Sopenharmony_ci		if (!gs->collect_md) {
45662306a36Sopenharmony_ci			vni = geneve_hdr(skb)->vni;
45762306a36Sopenharmony_ci			addr6 = ip6h->daddr;
45862306a36Sopenharmony_ci		}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_ci		return geneve6_lookup(gs, addr6, vni) ? 0 : -ENOENT;
46162306a36Sopenharmony_ci	}
46262306a36Sopenharmony_ci#endif
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	return -EPFNOSUPPORT;
46562306a36Sopenharmony_ci}
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_cistatic struct socket *geneve_create_sock(struct net *net, bool ipv6,
46862306a36Sopenharmony_ci					 __be16 port, bool ipv6_rx_csum)
46962306a36Sopenharmony_ci{
47062306a36Sopenharmony_ci	struct socket *sock;
47162306a36Sopenharmony_ci	struct udp_port_cfg udp_conf;
47262306a36Sopenharmony_ci	int err;
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ci	memset(&udp_conf, 0, sizeof(udp_conf));
47562306a36Sopenharmony_ci
47662306a36Sopenharmony_ci	if (ipv6) {
47762306a36Sopenharmony_ci		udp_conf.family = AF_INET6;
47862306a36Sopenharmony_ci		udp_conf.ipv6_v6only = 1;
47962306a36Sopenharmony_ci		udp_conf.use_udp6_rx_checksums = ipv6_rx_csum;
48062306a36Sopenharmony_ci	} else {
48162306a36Sopenharmony_ci		udp_conf.family = AF_INET;
48262306a36Sopenharmony_ci		udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
48362306a36Sopenharmony_ci	}
48462306a36Sopenharmony_ci
48562306a36Sopenharmony_ci	udp_conf.local_udp_port = port;
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	/* Open UDP socket */
48862306a36Sopenharmony_ci	err = udp_sock_create(net, &udp_conf, &sock);
48962306a36Sopenharmony_ci	if (err < 0)
49062306a36Sopenharmony_ci		return ERR_PTR(err);
49162306a36Sopenharmony_ci
49262306a36Sopenharmony_ci	udp_allow_gso(sock->sk);
49362306a36Sopenharmony_ci	return sock;
49462306a36Sopenharmony_ci}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_cistatic int geneve_hlen(struct genevehdr *gh)
49762306a36Sopenharmony_ci{
49862306a36Sopenharmony_ci	return sizeof(*gh) + gh->opt_len * 4;
49962306a36Sopenharmony_ci}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_cistatic struct sk_buff *geneve_gro_receive(struct sock *sk,
50262306a36Sopenharmony_ci					  struct list_head *head,
50362306a36Sopenharmony_ci					  struct sk_buff *skb)
50462306a36Sopenharmony_ci{
50562306a36Sopenharmony_ci	struct sk_buff *pp = NULL;
50662306a36Sopenharmony_ci	struct sk_buff *p;
50762306a36Sopenharmony_ci	struct genevehdr *gh, *gh2;
50862306a36Sopenharmony_ci	unsigned int hlen, gh_len, off_gnv;
50962306a36Sopenharmony_ci	const struct packet_offload *ptype;
51062306a36Sopenharmony_ci	__be16 type;
51162306a36Sopenharmony_ci	int flush = 1;
51262306a36Sopenharmony_ci
51362306a36Sopenharmony_ci	off_gnv = skb_gro_offset(skb);
51462306a36Sopenharmony_ci	hlen = off_gnv + sizeof(*gh);
51562306a36Sopenharmony_ci	gh = skb_gro_header(skb, hlen, off_gnv);
51662306a36Sopenharmony_ci	if (unlikely(!gh))
51762306a36Sopenharmony_ci		goto out;
51862306a36Sopenharmony_ci
51962306a36Sopenharmony_ci	if (gh->ver != GENEVE_VER || gh->oam)
52062306a36Sopenharmony_ci		goto out;
52162306a36Sopenharmony_ci	gh_len = geneve_hlen(gh);
52262306a36Sopenharmony_ci
52362306a36Sopenharmony_ci	hlen = off_gnv + gh_len;
52462306a36Sopenharmony_ci	if (skb_gro_header_hard(skb, hlen)) {
52562306a36Sopenharmony_ci		gh = skb_gro_header_slow(skb, hlen, off_gnv);
52662306a36Sopenharmony_ci		if (unlikely(!gh))
52762306a36Sopenharmony_ci			goto out;
52862306a36Sopenharmony_ci	}
52962306a36Sopenharmony_ci
53062306a36Sopenharmony_ci	list_for_each_entry(p, head, list) {
53162306a36Sopenharmony_ci		if (!NAPI_GRO_CB(p)->same_flow)
53262306a36Sopenharmony_ci			continue;
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_ci		gh2 = (struct genevehdr *)(p->data + off_gnv);
53562306a36Sopenharmony_ci		if (gh->opt_len != gh2->opt_len ||
53662306a36Sopenharmony_ci		    memcmp(gh, gh2, gh_len)) {
53762306a36Sopenharmony_ci			NAPI_GRO_CB(p)->same_flow = 0;
53862306a36Sopenharmony_ci			continue;
53962306a36Sopenharmony_ci		}
54062306a36Sopenharmony_ci	}
54162306a36Sopenharmony_ci
54262306a36Sopenharmony_ci	skb_gro_pull(skb, gh_len);
54362306a36Sopenharmony_ci	skb_gro_postpull_rcsum(skb, gh, gh_len);
54462306a36Sopenharmony_ci	type = gh->proto_type;
54562306a36Sopenharmony_ci	if (likely(type == htons(ETH_P_TEB)))
54662306a36Sopenharmony_ci		return call_gro_receive(eth_gro_receive, head, skb);
54762306a36Sopenharmony_ci
54862306a36Sopenharmony_ci	ptype = gro_find_receive_by_type(type);
54962306a36Sopenharmony_ci	if (!ptype)
55062306a36Sopenharmony_ci		goto out;
55162306a36Sopenharmony_ci
55262306a36Sopenharmony_ci	pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
55362306a36Sopenharmony_ci	flush = 0;
55462306a36Sopenharmony_ci
55562306a36Sopenharmony_ciout:
55662306a36Sopenharmony_ci	skb_gro_flush_final(skb, pp, flush);
55762306a36Sopenharmony_ci
55862306a36Sopenharmony_ci	return pp;
55962306a36Sopenharmony_ci}
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_cistatic int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
56262306a36Sopenharmony_ci			       int nhoff)
56362306a36Sopenharmony_ci{
56462306a36Sopenharmony_ci	struct genevehdr *gh;
56562306a36Sopenharmony_ci	struct packet_offload *ptype;
56662306a36Sopenharmony_ci	__be16 type;
56762306a36Sopenharmony_ci	int gh_len;
56862306a36Sopenharmony_ci	int err = -ENOSYS;
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_ci	gh = (struct genevehdr *)(skb->data + nhoff);
57162306a36Sopenharmony_ci	gh_len = geneve_hlen(gh);
57262306a36Sopenharmony_ci	type = gh->proto_type;
57362306a36Sopenharmony_ci
57462306a36Sopenharmony_ci	/* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */
57562306a36Sopenharmony_ci	if (likely(type == htons(ETH_P_TEB)))
57662306a36Sopenharmony_ci		return eth_gro_complete(skb, nhoff + gh_len);
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	ptype = gro_find_complete_by_type(type);
57962306a36Sopenharmony_ci	if (ptype)
58062306a36Sopenharmony_ci		err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
58162306a36Sopenharmony_ci
58262306a36Sopenharmony_ci	skb_set_inner_mac_header(skb, nhoff + gh_len);
58362306a36Sopenharmony_ci
58462306a36Sopenharmony_ci	return err;
58562306a36Sopenharmony_ci}
58662306a36Sopenharmony_ci
58762306a36Sopenharmony_ci/* Create new listen socket if needed */
58862306a36Sopenharmony_cistatic struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
58962306a36Sopenharmony_ci						bool ipv6, bool ipv6_rx_csum)
59062306a36Sopenharmony_ci{
59162306a36Sopenharmony_ci	struct geneve_net *gn = net_generic(net, geneve_net_id);
59262306a36Sopenharmony_ci	struct geneve_sock *gs;
59362306a36Sopenharmony_ci	struct socket *sock;
59462306a36Sopenharmony_ci	struct udp_tunnel_sock_cfg tunnel_cfg;
59562306a36Sopenharmony_ci	int h;
59662306a36Sopenharmony_ci
59762306a36Sopenharmony_ci	gs = kzalloc(sizeof(*gs), GFP_KERNEL);
59862306a36Sopenharmony_ci	if (!gs)
59962306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
60062306a36Sopenharmony_ci
60162306a36Sopenharmony_ci	sock = geneve_create_sock(net, ipv6, port, ipv6_rx_csum);
60262306a36Sopenharmony_ci	if (IS_ERR(sock)) {
60362306a36Sopenharmony_ci		kfree(gs);
60462306a36Sopenharmony_ci		return ERR_CAST(sock);
60562306a36Sopenharmony_ci	}
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_ci	gs->sock = sock;
60862306a36Sopenharmony_ci	gs->refcnt = 1;
60962306a36Sopenharmony_ci	for (h = 0; h < VNI_HASH_SIZE; ++h)
61062306a36Sopenharmony_ci		INIT_HLIST_HEAD(&gs->vni_list[h]);
61162306a36Sopenharmony_ci
61262306a36Sopenharmony_ci	/* Initialize the geneve udp offloads structure */
61362306a36Sopenharmony_ci	udp_tunnel_notify_add_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
61462306a36Sopenharmony_ci
61562306a36Sopenharmony_ci	/* Mark socket as an encapsulation socket */
61662306a36Sopenharmony_ci	memset(&tunnel_cfg, 0, sizeof(tunnel_cfg));
61762306a36Sopenharmony_ci	tunnel_cfg.sk_user_data = gs;
61862306a36Sopenharmony_ci	tunnel_cfg.encap_type = 1;
61962306a36Sopenharmony_ci	tunnel_cfg.gro_receive = geneve_gro_receive;
62062306a36Sopenharmony_ci	tunnel_cfg.gro_complete = geneve_gro_complete;
62162306a36Sopenharmony_ci	tunnel_cfg.encap_rcv = geneve_udp_encap_recv;
62262306a36Sopenharmony_ci	tunnel_cfg.encap_err_lookup = geneve_udp_encap_err_lookup;
62362306a36Sopenharmony_ci	tunnel_cfg.encap_destroy = NULL;
62462306a36Sopenharmony_ci	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
62562306a36Sopenharmony_ci	list_add(&gs->list, &gn->sock_list);
62662306a36Sopenharmony_ci	return gs;
62762306a36Sopenharmony_ci}
62862306a36Sopenharmony_ci
62962306a36Sopenharmony_cistatic void __geneve_sock_release(struct geneve_sock *gs)
63062306a36Sopenharmony_ci{
63162306a36Sopenharmony_ci	if (!gs || --gs->refcnt)
63262306a36Sopenharmony_ci		return;
63362306a36Sopenharmony_ci
63462306a36Sopenharmony_ci	list_del(&gs->list);
63562306a36Sopenharmony_ci	udp_tunnel_notify_del_rx_port(gs->sock, UDP_TUNNEL_TYPE_GENEVE);
63662306a36Sopenharmony_ci	udp_tunnel_sock_release(gs->sock);
63762306a36Sopenharmony_ci	kfree_rcu(gs, rcu);
63862306a36Sopenharmony_ci}
63962306a36Sopenharmony_ci
64062306a36Sopenharmony_cistatic void geneve_sock_release(struct geneve_dev *geneve)
64162306a36Sopenharmony_ci{
64262306a36Sopenharmony_ci	struct geneve_sock *gs4 = rtnl_dereference(geneve->sock4);
64362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
64462306a36Sopenharmony_ci	struct geneve_sock *gs6 = rtnl_dereference(geneve->sock6);
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	rcu_assign_pointer(geneve->sock6, NULL);
64762306a36Sopenharmony_ci#endif
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	rcu_assign_pointer(geneve->sock4, NULL);
65062306a36Sopenharmony_ci	synchronize_net();
65162306a36Sopenharmony_ci
65262306a36Sopenharmony_ci	__geneve_sock_release(gs4);
65362306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
65462306a36Sopenharmony_ci	__geneve_sock_release(gs6);
65562306a36Sopenharmony_ci#endif
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_cistatic struct geneve_sock *geneve_find_sock(struct geneve_net *gn,
65962306a36Sopenharmony_ci					    sa_family_t family,
66062306a36Sopenharmony_ci					    __be16 dst_port)
66162306a36Sopenharmony_ci{
66262306a36Sopenharmony_ci	struct geneve_sock *gs;
66362306a36Sopenharmony_ci
66462306a36Sopenharmony_ci	list_for_each_entry(gs, &gn->sock_list, list) {
66562306a36Sopenharmony_ci		if (inet_sk(gs->sock->sk)->inet_sport == dst_port &&
66662306a36Sopenharmony_ci		    geneve_get_sk_family(gs) == family) {
66762306a36Sopenharmony_ci			return gs;
66862306a36Sopenharmony_ci		}
66962306a36Sopenharmony_ci	}
67062306a36Sopenharmony_ci	return NULL;
67162306a36Sopenharmony_ci}
67262306a36Sopenharmony_ci
67362306a36Sopenharmony_cistatic int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
67462306a36Sopenharmony_ci{
67562306a36Sopenharmony_ci	struct net *net = geneve->net;
67662306a36Sopenharmony_ci	struct geneve_net *gn = net_generic(net, geneve_net_id);
67762306a36Sopenharmony_ci	struct geneve_dev_node *node;
67862306a36Sopenharmony_ci	struct geneve_sock *gs;
67962306a36Sopenharmony_ci	__u8 vni[3];
68062306a36Sopenharmony_ci	__u32 hash;
68162306a36Sopenharmony_ci
68262306a36Sopenharmony_ci	gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst);
68362306a36Sopenharmony_ci	if (gs) {
68462306a36Sopenharmony_ci		gs->refcnt++;
68562306a36Sopenharmony_ci		goto out;
68662306a36Sopenharmony_ci	}
68762306a36Sopenharmony_ci
68862306a36Sopenharmony_ci	gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6,
68962306a36Sopenharmony_ci				  geneve->cfg.use_udp6_rx_checksums);
69062306a36Sopenharmony_ci	if (IS_ERR(gs))
69162306a36Sopenharmony_ci		return PTR_ERR(gs);
69262306a36Sopenharmony_ci
69362306a36Sopenharmony_ciout:
69462306a36Sopenharmony_ci	gs->collect_md = geneve->cfg.collect_md;
69562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
69662306a36Sopenharmony_ci	if (ipv6) {
69762306a36Sopenharmony_ci		rcu_assign_pointer(geneve->sock6, gs);
69862306a36Sopenharmony_ci		node = &geneve->hlist6;
69962306a36Sopenharmony_ci	} else
70062306a36Sopenharmony_ci#endif
70162306a36Sopenharmony_ci	{
70262306a36Sopenharmony_ci		rcu_assign_pointer(geneve->sock4, gs);
70362306a36Sopenharmony_ci		node = &geneve->hlist4;
70462306a36Sopenharmony_ci	}
70562306a36Sopenharmony_ci	node->geneve = geneve;
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni);
70862306a36Sopenharmony_ci	hash = geneve_net_vni_hash(vni);
70962306a36Sopenharmony_ci	hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
71062306a36Sopenharmony_ci	return 0;
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_cistatic int geneve_open(struct net_device *dev)
71462306a36Sopenharmony_ci{
71562306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
71662306a36Sopenharmony_ci	bool metadata = geneve->cfg.collect_md;
71762306a36Sopenharmony_ci	bool ipv4, ipv6;
71862306a36Sopenharmony_ci	int ret = 0;
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci	ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
72162306a36Sopenharmony_ci	ipv4 = !ipv6 || metadata;
72262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
72362306a36Sopenharmony_ci	if (ipv6) {
72462306a36Sopenharmony_ci		ret = geneve_sock_add(geneve, true);
72562306a36Sopenharmony_ci		if (ret < 0 && ret != -EAFNOSUPPORT)
72662306a36Sopenharmony_ci			ipv4 = false;
72762306a36Sopenharmony_ci	}
72862306a36Sopenharmony_ci#endif
72962306a36Sopenharmony_ci	if (ipv4)
73062306a36Sopenharmony_ci		ret = geneve_sock_add(geneve, false);
73162306a36Sopenharmony_ci	if (ret < 0)
73262306a36Sopenharmony_ci		geneve_sock_release(geneve);
73362306a36Sopenharmony_ci
73462306a36Sopenharmony_ci	return ret;
73562306a36Sopenharmony_ci}
73662306a36Sopenharmony_ci
73762306a36Sopenharmony_cistatic int geneve_stop(struct net_device *dev)
73862306a36Sopenharmony_ci{
73962306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	hlist_del_init_rcu(&geneve->hlist4.hlist);
74262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
74362306a36Sopenharmony_ci	hlist_del_init_rcu(&geneve->hlist6.hlist);
74462306a36Sopenharmony_ci#endif
74562306a36Sopenharmony_ci	geneve_sock_release(geneve);
74662306a36Sopenharmony_ci	return 0;
74762306a36Sopenharmony_ci}
74862306a36Sopenharmony_ci
74962306a36Sopenharmony_cistatic void geneve_build_header(struct genevehdr *geneveh,
75062306a36Sopenharmony_ci				const struct ip_tunnel_info *info,
75162306a36Sopenharmony_ci				__be16 inner_proto)
75262306a36Sopenharmony_ci{
75362306a36Sopenharmony_ci	geneveh->ver = GENEVE_VER;
75462306a36Sopenharmony_ci	geneveh->opt_len = info->options_len / 4;
75562306a36Sopenharmony_ci	geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
75662306a36Sopenharmony_ci	geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
75762306a36Sopenharmony_ci	geneveh->rsvd1 = 0;
75862306a36Sopenharmony_ci	tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
75962306a36Sopenharmony_ci	geneveh->proto_type = inner_proto;
76062306a36Sopenharmony_ci	geneveh->rsvd2 = 0;
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci	if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
76362306a36Sopenharmony_ci		ip_tunnel_info_opts_get(geneveh->options, info);
76462306a36Sopenharmony_ci}
76562306a36Sopenharmony_ci
76662306a36Sopenharmony_cistatic int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
76762306a36Sopenharmony_ci			    const struct ip_tunnel_info *info,
76862306a36Sopenharmony_ci			    bool xnet, int ip_hdr_len,
76962306a36Sopenharmony_ci			    bool inner_proto_inherit)
77062306a36Sopenharmony_ci{
77162306a36Sopenharmony_ci	bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
77262306a36Sopenharmony_ci	struct genevehdr *gnvh;
77362306a36Sopenharmony_ci	__be16 inner_proto;
77462306a36Sopenharmony_ci	int min_headroom;
77562306a36Sopenharmony_ci	int err;
77662306a36Sopenharmony_ci
77762306a36Sopenharmony_ci	skb_reset_mac_header(skb);
77862306a36Sopenharmony_ci	skb_scrub_packet(skb, xnet);
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len +
78162306a36Sopenharmony_ci		       GENEVE_BASE_HLEN + info->options_len + ip_hdr_len;
78262306a36Sopenharmony_ci	err = skb_cow_head(skb, min_headroom);
78362306a36Sopenharmony_ci	if (unlikely(err))
78462306a36Sopenharmony_ci		goto free_dst;
78562306a36Sopenharmony_ci
78662306a36Sopenharmony_ci	err = udp_tunnel_handle_offloads(skb, udp_sum);
78762306a36Sopenharmony_ci	if (err)
78862306a36Sopenharmony_ci		goto free_dst;
78962306a36Sopenharmony_ci
79062306a36Sopenharmony_ci	gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
79162306a36Sopenharmony_ci	inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB);
79262306a36Sopenharmony_ci	geneve_build_header(gnvh, info, inner_proto);
79362306a36Sopenharmony_ci	skb_set_inner_protocol(skb, inner_proto);
79462306a36Sopenharmony_ci	return 0;
79562306a36Sopenharmony_ci
79662306a36Sopenharmony_cifree_dst:
79762306a36Sopenharmony_ci	dst_release(dst);
79862306a36Sopenharmony_ci	return err;
79962306a36Sopenharmony_ci}
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_cistatic struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
80262306a36Sopenharmony_ci				       struct net_device *dev,
80362306a36Sopenharmony_ci				       struct geneve_sock *gs4,
80462306a36Sopenharmony_ci				       struct flowi4 *fl4,
80562306a36Sopenharmony_ci				       const struct ip_tunnel_info *info,
80662306a36Sopenharmony_ci				       __be16 dport, __be16 sport,
80762306a36Sopenharmony_ci				       __u8 *full_tos)
80862306a36Sopenharmony_ci{
80962306a36Sopenharmony_ci	bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
81062306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
81162306a36Sopenharmony_ci	struct dst_cache *dst_cache;
81262306a36Sopenharmony_ci	struct rtable *rt = NULL;
81362306a36Sopenharmony_ci	__u8 tos;
81462306a36Sopenharmony_ci
81562306a36Sopenharmony_ci	if (!gs4)
81662306a36Sopenharmony_ci		return ERR_PTR(-EIO);
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	memset(fl4, 0, sizeof(*fl4));
81962306a36Sopenharmony_ci	fl4->flowi4_mark = skb->mark;
82062306a36Sopenharmony_ci	fl4->flowi4_proto = IPPROTO_UDP;
82162306a36Sopenharmony_ci	fl4->daddr = info->key.u.ipv4.dst;
82262306a36Sopenharmony_ci	fl4->saddr = info->key.u.ipv4.src;
82362306a36Sopenharmony_ci	fl4->fl4_dport = dport;
82462306a36Sopenharmony_ci	fl4->fl4_sport = sport;
82562306a36Sopenharmony_ci	fl4->flowi4_flags = info->key.flow_flags;
82662306a36Sopenharmony_ci
82762306a36Sopenharmony_ci	tos = info->key.tos;
82862306a36Sopenharmony_ci	if ((tos == 1) && !geneve->cfg.collect_md) {
82962306a36Sopenharmony_ci		tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
83062306a36Sopenharmony_ci		use_cache = false;
83162306a36Sopenharmony_ci	}
83262306a36Sopenharmony_ci	fl4->flowi4_tos = RT_TOS(tos);
83362306a36Sopenharmony_ci	if (full_tos)
83462306a36Sopenharmony_ci		*full_tos = tos;
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	dst_cache = (struct dst_cache *)&info->dst_cache;
83762306a36Sopenharmony_ci	if (use_cache) {
83862306a36Sopenharmony_ci		rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
83962306a36Sopenharmony_ci		if (rt)
84062306a36Sopenharmony_ci			return rt;
84162306a36Sopenharmony_ci	}
84262306a36Sopenharmony_ci	rt = ip_route_output_key(geneve->net, fl4);
84362306a36Sopenharmony_ci	if (IS_ERR(rt)) {
84462306a36Sopenharmony_ci		netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
84562306a36Sopenharmony_ci		return ERR_PTR(-ENETUNREACH);
84662306a36Sopenharmony_ci	}
84762306a36Sopenharmony_ci	if (rt->dst.dev == dev) { /* is this necessary? */
84862306a36Sopenharmony_ci		netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
84962306a36Sopenharmony_ci		ip_rt_put(rt);
85062306a36Sopenharmony_ci		return ERR_PTR(-ELOOP);
85162306a36Sopenharmony_ci	}
85262306a36Sopenharmony_ci	if (use_cache)
85362306a36Sopenharmony_ci		dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
85462306a36Sopenharmony_ci	return rt;
85562306a36Sopenharmony_ci}
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
85862306a36Sopenharmony_cistatic struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
85962306a36Sopenharmony_ci					   struct net_device *dev,
86062306a36Sopenharmony_ci					   struct geneve_sock *gs6,
86162306a36Sopenharmony_ci					   struct flowi6 *fl6,
86262306a36Sopenharmony_ci					   const struct ip_tunnel_info *info,
86362306a36Sopenharmony_ci					   __be16 dport, __be16 sport)
86462306a36Sopenharmony_ci{
86562306a36Sopenharmony_ci	bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
86662306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
86762306a36Sopenharmony_ci	struct dst_entry *dst = NULL;
86862306a36Sopenharmony_ci	struct dst_cache *dst_cache;
86962306a36Sopenharmony_ci	__u8 prio;
87062306a36Sopenharmony_ci
87162306a36Sopenharmony_ci	if (!gs6)
87262306a36Sopenharmony_ci		return ERR_PTR(-EIO);
87362306a36Sopenharmony_ci
87462306a36Sopenharmony_ci	memset(fl6, 0, sizeof(*fl6));
87562306a36Sopenharmony_ci	fl6->flowi6_mark = skb->mark;
87662306a36Sopenharmony_ci	fl6->flowi6_proto = IPPROTO_UDP;
87762306a36Sopenharmony_ci	fl6->daddr = info->key.u.ipv6.dst;
87862306a36Sopenharmony_ci	fl6->saddr = info->key.u.ipv6.src;
87962306a36Sopenharmony_ci	fl6->fl6_dport = dport;
88062306a36Sopenharmony_ci	fl6->fl6_sport = sport;
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci	prio = info->key.tos;
88362306a36Sopenharmony_ci	if ((prio == 1) && !geneve->cfg.collect_md) {
88462306a36Sopenharmony_ci		prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
88562306a36Sopenharmony_ci		use_cache = false;
88662306a36Sopenharmony_ci	}
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci	fl6->flowlabel = ip6_make_flowinfo(prio, info->key.label);
88962306a36Sopenharmony_ci	dst_cache = (struct dst_cache *)&info->dst_cache;
89062306a36Sopenharmony_ci	if (use_cache) {
89162306a36Sopenharmony_ci		dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
89262306a36Sopenharmony_ci		if (dst)
89362306a36Sopenharmony_ci			return dst;
89462306a36Sopenharmony_ci	}
89562306a36Sopenharmony_ci	dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
89662306a36Sopenharmony_ci					      NULL);
89762306a36Sopenharmony_ci	if (IS_ERR(dst)) {
89862306a36Sopenharmony_ci		netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
89962306a36Sopenharmony_ci		return ERR_PTR(-ENETUNREACH);
90062306a36Sopenharmony_ci	}
90162306a36Sopenharmony_ci	if (dst->dev == dev) { /* is this necessary? */
90262306a36Sopenharmony_ci		netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
90362306a36Sopenharmony_ci		dst_release(dst);
90462306a36Sopenharmony_ci		return ERR_PTR(-ELOOP);
90562306a36Sopenharmony_ci	}
90662306a36Sopenharmony_ci
90762306a36Sopenharmony_ci	if (use_cache)
90862306a36Sopenharmony_ci		dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
90962306a36Sopenharmony_ci	return dst;
91062306a36Sopenharmony_ci}
91162306a36Sopenharmony_ci#endif
91262306a36Sopenharmony_ci
91362306a36Sopenharmony_cistatic int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
91462306a36Sopenharmony_ci			   struct geneve_dev *geneve,
91562306a36Sopenharmony_ci			   const struct ip_tunnel_info *info)
91662306a36Sopenharmony_ci{
91762306a36Sopenharmony_ci	bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
91862306a36Sopenharmony_ci	struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
91962306a36Sopenharmony_ci	const struct ip_tunnel_key *key = &info->key;
92062306a36Sopenharmony_ci	struct rtable *rt;
92162306a36Sopenharmony_ci	struct flowi4 fl4;
92262306a36Sopenharmony_ci	__u8 full_tos;
92362306a36Sopenharmony_ci	__u8 tos, ttl;
92462306a36Sopenharmony_ci	__be16 df = 0;
92562306a36Sopenharmony_ci	__be16 sport;
92662306a36Sopenharmony_ci	int err;
92762306a36Sopenharmony_ci
92862306a36Sopenharmony_ci	if (!pskb_inet_may_pull(skb))
92962306a36Sopenharmony_ci		return -EINVAL;
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
93262306a36Sopenharmony_ci	rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
93362306a36Sopenharmony_ci			      geneve->cfg.info.key.tp_dst, sport, &full_tos);
93462306a36Sopenharmony_ci	if (IS_ERR(rt))
93562306a36Sopenharmony_ci		return PTR_ERR(rt);
93662306a36Sopenharmony_ci
93762306a36Sopenharmony_ci	err = skb_tunnel_check_pmtu(skb, &rt->dst,
93862306a36Sopenharmony_ci				    GENEVE_IPV4_HLEN + info->options_len,
93962306a36Sopenharmony_ci				    netif_is_any_bridge_port(dev));
94062306a36Sopenharmony_ci	if (err < 0) {
94162306a36Sopenharmony_ci		dst_release(&rt->dst);
94262306a36Sopenharmony_ci		return err;
94362306a36Sopenharmony_ci	} else if (err) {
94462306a36Sopenharmony_ci		struct ip_tunnel_info *info;
94562306a36Sopenharmony_ci
94662306a36Sopenharmony_ci		info = skb_tunnel_info(skb);
94762306a36Sopenharmony_ci		if (info) {
94862306a36Sopenharmony_ci			struct ip_tunnel_info *unclone;
94962306a36Sopenharmony_ci
95062306a36Sopenharmony_ci			unclone = skb_tunnel_info_unclone(skb);
95162306a36Sopenharmony_ci			if (unlikely(!unclone)) {
95262306a36Sopenharmony_ci				dst_release(&rt->dst);
95362306a36Sopenharmony_ci				return -ENOMEM;
95462306a36Sopenharmony_ci			}
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci			unclone->key.u.ipv4.dst = fl4.saddr;
95762306a36Sopenharmony_ci			unclone->key.u.ipv4.src = fl4.daddr;
95862306a36Sopenharmony_ci		}
95962306a36Sopenharmony_ci
96062306a36Sopenharmony_ci		if (!pskb_may_pull(skb, ETH_HLEN)) {
96162306a36Sopenharmony_ci			dst_release(&rt->dst);
96262306a36Sopenharmony_ci			return -EINVAL;
96362306a36Sopenharmony_ci		}
96462306a36Sopenharmony_ci
96562306a36Sopenharmony_ci		skb->protocol = eth_type_trans(skb, geneve->dev);
96662306a36Sopenharmony_ci		__netif_rx(skb);
96762306a36Sopenharmony_ci		dst_release(&rt->dst);
96862306a36Sopenharmony_ci		return -EMSGSIZE;
96962306a36Sopenharmony_ci	}
97062306a36Sopenharmony_ci
97162306a36Sopenharmony_ci	if (geneve->cfg.collect_md) {
97262306a36Sopenharmony_ci		tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
97362306a36Sopenharmony_ci		ttl = key->ttl;
97462306a36Sopenharmony_ci
97562306a36Sopenharmony_ci		df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
97662306a36Sopenharmony_ci	} else {
97762306a36Sopenharmony_ci		tos = ip_tunnel_ecn_encap(full_tos, ip_hdr(skb), skb);
97862306a36Sopenharmony_ci		if (geneve->cfg.ttl_inherit)
97962306a36Sopenharmony_ci			ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
98062306a36Sopenharmony_ci		else
98162306a36Sopenharmony_ci			ttl = key->ttl;
98262306a36Sopenharmony_ci		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
98362306a36Sopenharmony_ci
98462306a36Sopenharmony_ci		if (geneve->cfg.df == GENEVE_DF_SET) {
98562306a36Sopenharmony_ci			df = htons(IP_DF);
98662306a36Sopenharmony_ci		} else if (geneve->cfg.df == GENEVE_DF_INHERIT) {
98762306a36Sopenharmony_ci			struct ethhdr *eth = eth_hdr(skb);
98862306a36Sopenharmony_ci
98962306a36Sopenharmony_ci			if (ntohs(eth->h_proto) == ETH_P_IPV6) {
99062306a36Sopenharmony_ci				df = htons(IP_DF);
99162306a36Sopenharmony_ci			} else if (ntohs(eth->h_proto) == ETH_P_IP) {
99262306a36Sopenharmony_ci				struct iphdr *iph = ip_hdr(skb);
99362306a36Sopenharmony_ci
99462306a36Sopenharmony_ci				if (iph->frag_off & htons(IP_DF))
99562306a36Sopenharmony_ci					df = htons(IP_DF);
99662306a36Sopenharmony_ci			}
99762306a36Sopenharmony_ci		}
99862306a36Sopenharmony_ci	}
99962306a36Sopenharmony_ci
100062306a36Sopenharmony_ci	err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr),
100162306a36Sopenharmony_ci			       geneve->cfg.inner_proto_inherit);
100262306a36Sopenharmony_ci	if (unlikely(err))
100362306a36Sopenharmony_ci		return err;
100462306a36Sopenharmony_ci
100562306a36Sopenharmony_ci	udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
100662306a36Sopenharmony_ci			    tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
100762306a36Sopenharmony_ci			    !net_eq(geneve->net, dev_net(geneve->dev)),
100862306a36Sopenharmony_ci			    !(info->key.tun_flags & TUNNEL_CSUM));
100962306a36Sopenharmony_ci	return 0;
101062306a36Sopenharmony_ci}
101162306a36Sopenharmony_ci
101262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
101362306a36Sopenharmony_cistatic int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
101462306a36Sopenharmony_ci			    struct geneve_dev *geneve,
101562306a36Sopenharmony_ci			    const struct ip_tunnel_info *info)
101662306a36Sopenharmony_ci{
101762306a36Sopenharmony_ci	bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
101862306a36Sopenharmony_ci	struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
101962306a36Sopenharmony_ci	const struct ip_tunnel_key *key = &info->key;
102062306a36Sopenharmony_ci	struct dst_entry *dst = NULL;
102162306a36Sopenharmony_ci	struct flowi6 fl6;
102262306a36Sopenharmony_ci	__u8 prio, ttl;
102362306a36Sopenharmony_ci	__be16 sport;
102462306a36Sopenharmony_ci	int err;
102562306a36Sopenharmony_ci
102662306a36Sopenharmony_ci	if (!pskb_inet_may_pull(skb))
102762306a36Sopenharmony_ci		return -EINVAL;
102862306a36Sopenharmony_ci
102962306a36Sopenharmony_ci	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
103062306a36Sopenharmony_ci	dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
103162306a36Sopenharmony_ci				geneve->cfg.info.key.tp_dst, sport);
103262306a36Sopenharmony_ci	if (IS_ERR(dst))
103362306a36Sopenharmony_ci		return PTR_ERR(dst);
103462306a36Sopenharmony_ci
103562306a36Sopenharmony_ci	err = skb_tunnel_check_pmtu(skb, dst,
103662306a36Sopenharmony_ci				    GENEVE_IPV6_HLEN + info->options_len,
103762306a36Sopenharmony_ci				    netif_is_any_bridge_port(dev));
103862306a36Sopenharmony_ci	if (err < 0) {
103962306a36Sopenharmony_ci		dst_release(dst);
104062306a36Sopenharmony_ci		return err;
104162306a36Sopenharmony_ci	} else if (err) {
104262306a36Sopenharmony_ci		struct ip_tunnel_info *info = skb_tunnel_info(skb);
104362306a36Sopenharmony_ci
104462306a36Sopenharmony_ci		if (info) {
104562306a36Sopenharmony_ci			struct ip_tunnel_info *unclone;
104662306a36Sopenharmony_ci
104762306a36Sopenharmony_ci			unclone = skb_tunnel_info_unclone(skb);
104862306a36Sopenharmony_ci			if (unlikely(!unclone)) {
104962306a36Sopenharmony_ci				dst_release(dst);
105062306a36Sopenharmony_ci				return -ENOMEM;
105162306a36Sopenharmony_ci			}
105262306a36Sopenharmony_ci
105362306a36Sopenharmony_ci			unclone->key.u.ipv6.dst = fl6.saddr;
105462306a36Sopenharmony_ci			unclone->key.u.ipv6.src = fl6.daddr;
105562306a36Sopenharmony_ci		}
105662306a36Sopenharmony_ci
105762306a36Sopenharmony_ci		if (!pskb_may_pull(skb, ETH_HLEN)) {
105862306a36Sopenharmony_ci			dst_release(dst);
105962306a36Sopenharmony_ci			return -EINVAL;
106062306a36Sopenharmony_ci		}
106162306a36Sopenharmony_ci
106262306a36Sopenharmony_ci		skb->protocol = eth_type_trans(skb, geneve->dev);
106362306a36Sopenharmony_ci		__netif_rx(skb);
106462306a36Sopenharmony_ci		dst_release(dst);
106562306a36Sopenharmony_ci		return -EMSGSIZE;
106662306a36Sopenharmony_ci	}
106762306a36Sopenharmony_ci
106862306a36Sopenharmony_ci	if (geneve->cfg.collect_md) {
106962306a36Sopenharmony_ci		prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
107062306a36Sopenharmony_ci		ttl = key->ttl;
107162306a36Sopenharmony_ci	} else {
107262306a36Sopenharmony_ci		prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
107362306a36Sopenharmony_ci					   ip_hdr(skb), skb);
107462306a36Sopenharmony_ci		if (geneve->cfg.ttl_inherit)
107562306a36Sopenharmony_ci			ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
107662306a36Sopenharmony_ci		else
107762306a36Sopenharmony_ci			ttl = key->ttl;
107862306a36Sopenharmony_ci		ttl = ttl ? : ip6_dst_hoplimit(dst);
107962306a36Sopenharmony_ci	}
108062306a36Sopenharmony_ci	err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr),
108162306a36Sopenharmony_ci			       geneve->cfg.inner_proto_inherit);
108262306a36Sopenharmony_ci	if (unlikely(err))
108362306a36Sopenharmony_ci		return err;
108462306a36Sopenharmony_ci
108562306a36Sopenharmony_ci	udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
108662306a36Sopenharmony_ci			     &fl6.saddr, &fl6.daddr, prio, ttl,
108762306a36Sopenharmony_ci			     info->key.label, sport, geneve->cfg.info.key.tp_dst,
108862306a36Sopenharmony_ci			     !(info->key.tun_flags & TUNNEL_CSUM));
108962306a36Sopenharmony_ci	return 0;
109062306a36Sopenharmony_ci}
109162306a36Sopenharmony_ci#endif
109262306a36Sopenharmony_ci
109362306a36Sopenharmony_cistatic netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
109462306a36Sopenharmony_ci{
109562306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
109662306a36Sopenharmony_ci	struct ip_tunnel_info *info = NULL;
109762306a36Sopenharmony_ci	int err;
109862306a36Sopenharmony_ci
109962306a36Sopenharmony_ci	if (geneve->cfg.collect_md) {
110062306a36Sopenharmony_ci		info = skb_tunnel_info(skb);
110162306a36Sopenharmony_ci		if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
110262306a36Sopenharmony_ci			netdev_dbg(dev, "no tunnel metadata\n");
110362306a36Sopenharmony_ci			dev_kfree_skb(skb);
110462306a36Sopenharmony_ci			dev->stats.tx_dropped++;
110562306a36Sopenharmony_ci			return NETDEV_TX_OK;
110662306a36Sopenharmony_ci		}
110762306a36Sopenharmony_ci	} else {
110862306a36Sopenharmony_ci		info = &geneve->cfg.info;
110962306a36Sopenharmony_ci	}
111062306a36Sopenharmony_ci
111162306a36Sopenharmony_ci	rcu_read_lock();
111262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
111362306a36Sopenharmony_ci	if (info->mode & IP_TUNNEL_INFO_IPV6)
111462306a36Sopenharmony_ci		err = geneve6_xmit_skb(skb, dev, geneve, info);
111562306a36Sopenharmony_ci	else
111662306a36Sopenharmony_ci#endif
111762306a36Sopenharmony_ci		err = geneve_xmit_skb(skb, dev, geneve, info);
111862306a36Sopenharmony_ci	rcu_read_unlock();
111962306a36Sopenharmony_ci
112062306a36Sopenharmony_ci	if (likely(!err))
112162306a36Sopenharmony_ci		return NETDEV_TX_OK;
112262306a36Sopenharmony_ci
112362306a36Sopenharmony_ci	if (err != -EMSGSIZE)
112462306a36Sopenharmony_ci		dev_kfree_skb(skb);
112562306a36Sopenharmony_ci
112662306a36Sopenharmony_ci	if (err == -ELOOP)
112762306a36Sopenharmony_ci		dev->stats.collisions++;
112862306a36Sopenharmony_ci	else if (err == -ENETUNREACH)
112962306a36Sopenharmony_ci		dev->stats.tx_carrier_errors++;
113062306a36Sopenharmony_ci
113162306a36Sopenharmony_ci	dev->stats.tx_errors++;
113262306a36Sopenharmony_ci	return NETDEV_TX_OK;
113362306a36Sopenharmony_ci}
113462306a36Sopenharmony_ci
113562306a36Sopenharmony_cistatic int geneve_change_mtu(struct net_device *dev, int new_mtu)
113662306a36Sopenharmony_ci{
113762306a36Sopenharmony_ci	if (new_mtu > dev->max_mtu)
113862306a36Sopenharmony_ci		new_mtu = dev->max_mtu;
113962306a36Sopenharmony_ci	else if (new_mtu < dev->min_mtu)
114062306a36Sopenharmony_ci		new_mtu = dev->min_mtu;
114162306a36Sopenharmony_ci
114262306a36Sopenharmony_ci	dev->mtu = new_mtu;
114362306a36Sopenharmony_ci	return 0;
114462306a36Sopenharmony_ci}
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_cistatic int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
114762306a36Sopenharmony_ci{
114862306a36Sopenharmony_ci	struct ip_tunnel_info *info = skb_tunnel_info(skb);
114962306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
115062306a36Sopenharmony_ci	__be16 sport;
115162306a36Sopenharmony_ci
115262306a36Sopenharmony_ci	if (ip_tunnel_info_af(info) == AF_INET) {
115362306a36Sopenharmony_ci		struct rtable *rt;
115462306a36Sopenharmony_ci		struct flowi4 fl4;
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci		struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
115762306a36Sopenharmony_ci		sport = udp_flow_src_port(geneve->net, skb,
115862306a36Sopenharmony_ci					  1, USHRT_MAX, true);
115962306a36Sopenharmony_ci
116062306a36Sopenharmony_ci		rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
116162306a36Sopenharmony_ci				      geneve->cfg.info.key.tp_dst, sport, NULL);
116262306a36Sopenharmony_ci		if (IS_ERR(rt))
116362306a36Sopenharmony_ci			return PTR_ERR(rt);
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_ci		ip_rt_put(rt);
116662306a36Sopenharmony_ci		info->key.u.ipv4.src = fl4.saddr;
116762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
116862306a36Sopenharmony_ci	} else if (ip_tunnel_info_af(info) == AF_INET6) {
116962306a36Sopenharmony_ci		struct dst_entry *dst;
117062306a36Sopenharmony_ci		struct flowi6 fl6;
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci		struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
117362306a36Sopenharmony_ci		sport = udp_flow_src_port(geneve->net, skb,
117462306a36Sopenharmony_ci					  1, USHRT_MAX, true);
117562306a36Sopenharmony_ci
117662306a36Sopenharmony_ci		dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
117762306a36Sopenharmony_ci					geneve->cfg.info.key.tp_dst, sport);
117862306a36Sopenharmony_ci		if (IS_ERR(dst))
117962306a36Sopenharmony_ci			return PTR_ERR(dst);
118062306a36Sopenharmony_ci
118162306a36Sopenharmony_ci		dst_release(dst);
118262306a36Sopenharmony_ci		info->key.u.ipv6.src = fl6.saddr;
118362306a36Sopenharmony_ci#endif
118462306a36Sopenharmony_ci	} else {
118562306a36Sopenharmony_ci		return -EINVAL;
118662306a36Sopenharmony_ci	}
118762306a36Sopenharmony_ci
118862306a36Sopenharmony_ci	info->key.tp_src = sport;
118962306a36Sopenharmony_ci	info->key.tp_dst = geneve->cfg.info.key.tp_dst;
119062306a36Sopenharmony_ci	return 0;
119162306a36Sopenharmony_ci}
119262306a36Sopenharmony_ci
119362306a36Sopenharmony_cistatic const struct net_device_ops geneve_netdev_ops = {
119462306a36Sopenharmony_ci	.ndo_init		= geneve_init,
119562306a36Sopenharmony_ci	.ndo_uninit		= geneve_uninit,
119662306a36Sopenharmony_ci	.ndo_open		= geneve_open,
119762306a36Sopenharmony_ci	.ndo_stop		= geneve_stop,
119862306a36Sopenharmony_ci	.ndo_start_xmit		= geneve_xmit,
119962306a36Sopenharmony_ci	.ndo_get_stats64	= dev_get_tstats64,
120062306a36Sopenharmony_ci	.ndo_change_mtu		= geneve_change_mtu,
120162306a36Sopenharmony_ci	.ndo_validate_addr	= eth_validate_addr,
120262306a36Sopenharmony_ci	.ndo_set_mac_address	= eth_mac_addr,
120362306a36Sopenharmony_ci	.ndo_fill_metadata_dst	= geneve_fill_metadata_dst,
120462306a36Sopenharmony_ci};
120562306a36Sopenharmony_ci
120662306a36Sopenharmony_cistatic void geneve_get_drvinfo(struct net_device *dev,
120762306a36Sopenharmony_ci			       struct ethtool_drvinfo *drvinfo)
120862306a36Sopenharmony_ci{
120962306a36Sopenharmony_ci	strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
121062306a36Sopenharmony_ci	strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
121162306a36Sopenharmony_ci}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_cistatic const struct ethtool_ops geneve_ethtool_ops = {
121462306a36Sopenharmony_ci	.get_drvinfo	= geneve_get_drvinfo,
121562306a36Sopenharmony_ci	.get_link	= ethtool_op_get_link,
121662306a36Sopenharmony_ci};
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci/* Info for udev, that this is a virtual tunnel endpoint */
121962306a36Sopenharmony_cistatic struct device_type geneve_type = {
122062306a36Sopenharmony_ci	.name = "geneve",
122162306a36Sopenharmony_ci};
122262306a36Sopenharmony_ci
122362306a36Sopenharmony_ci/* Calls the ndo_udp_tunnel_add of the caller in order to
122462306a36Sopenharmony_ci * supply the listening GENEVE udp ports. Callers are expected
122562306a36Sopenharmony_ci * to implement the ndo_udp_tunnel_add.
122662306a36Sopenharmony_ci */
122762306a36Sopenharmony_cistatic void geneve_offload_rx_ports(struct net_device *dev, bool push)
122862306a36Sopenharmony_ci{
122962306a36Sopenharmony_ci	struct net *net = dev_net(dev);
123062306a36Sopenharmony_ci	struct geneve_net *gn = net_generic(net, geneve_net_id);
123162306a36Sopenharmony_ci	struct geneve_sock *gs;
123262306a36Sopenharmony_ci
123362306a36Sopenharmony_ci	rcu_read_lock();
123462306a36Sopenharmony_ci	list_for_each_entry_rcu(gs, &gn->sock_list, list) {
123562306a36Sopenharmony_ci		if (push) {
123662306a36Sopenharmony_ci			udp_tunnel_push_rx_port(dev, gs->sock,
123762306a36Sopenharmony_ci						UDP_TUNNEL_TYPE_GENEVE);
123862306a36Sopenharmony_ci		} else {
123962306a36Sopenharmony_ci			udp_tunnel_drop_rx_port(dev, gs->sock,
124062306a36Sopenharmony_ci						UDP_TUNNEL_TYPE_GENEVE);
124162306a36Sopenharmony_ci		}
124262306a36Sopenharmony_ci	}
124362306a36Sopenharmony_ci	rcu_read_unlock();
124462306a36Sopenharmony_ci}
124562306a36Sopenharmony_ci
124662306a36Sopenharmony_ci/* Initialize the device structure. */
124762306a36Sopenharmony_cistatic void geneve_setup(struct net_device *dev)
124862306a36Sopenharmony_ci{
124962306a36Sopenharmony_ci	ether_setup(dev);
125062306a36Sopenharmony_ci
125162306a36Sopenharmony_ci	dev->netdev_ops = &geneve_netdev_ops;
125262306a36Sopenharmony_ci	dev->ethtool_ops = &geneve_ethtool_ops;
125362306a36Sopenharmony_ci	dev->needs_free_netdev = true;
125462306a36Sopenharmony_ci
125562306a36Sopenharmony_ci	SET_NETDEV_DEVTYPE(dev, &geneve_type);
125662306a36Sopenharmony_ci
125762306a36Sopenharmony_ci	dev->features    |= NETIF_F_LLTX;
125862306a36Sopenharmony_ci	dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
125962306a36Sopenharmony_ci	dev->features    |= NETIF_F_RXCSUM;
126062306a36Sopenharmony_ci	dev->features    |= NETIF_F_GSO_SOFTWARE;
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_ci	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
126362306a36Sopenharmony_ci	dev->hw_features |= NETIF_F_RXCSUM;
126462306a36Sopenharmony_ci	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_ci	/* MTU range: 68 - (something less than 65535) */
126762306a36Sopenharmony_ci	dev->min_mtu = ETH_MIN_MTU;
126862306a36Sopenharmony_ci	/* The max_mtu calculation does not take account of GENEVE
126962306a36Sopenharmony_ci	 * options, to avoid excluding potentially valid
127062306a36Sopenharmony_ci	 * configurations. This will be further reduced by IPvX hdr size.
127162306a36Sopenharmony_ci	 */
127262306a36Sopenharmony_ci	dev->max_mtu = IP_MAX_MTU - GENEVE_BASE_HLEN - dev->hard_header_len;
127362306a36Sopenharmony_ci
127462306a36Sopenharmony_ci	netif_keep_dst(dev);
127562306a36Sopenharmony_ci	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
127662306a36Sopenharmony_ci	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
127762306a36Sopenharmony_ci	eth_hw_addr_random(dev);
127862306a36Sopenharmony_ci}
127962306a36Sopenharmony_ci
128062306a36Sopenharmony_cistatic const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
128162306a36Sopenharmony_ci	[IFLA_GENEVE_UNSPEC]		= { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT },
128262306a36Sopenharmony_ci	[IFLA_GENEVE_ID]		= { .type = NLA_U32 },
128362306a36Sopenharmony_ci	[IFLA_GENEVE_REMOTE]		= { .len = sizeof_field(struct iphdr, daddr) },
128462306a36Sopenharmony_ci	[IFLA_GENEVE_REMOTE6]		= { .len = sizeof(struct in6_addr) },
128562306a36Sopenharmony_ci	[IFLA_GENEVE_TTL]		= { .type = NLA_U8 },
128662306a36Sopenharmony_ci	[IFLA_GENEVE_TOS]		= { .type = NLA_U8 },
128762306a36Sopenharmony_ci	[IFLA_GENEVE_LABEL]		= { .type = NLA_U32 },
128862306a36Sopenharmony_ci	[IFLA_GENEVE_PORT]		= { .type = NLA_U16 },
128962306a36Sopenharmony_ci	[IFLA_GENEVE_COLLECT_METADATA]	= { .type = NLA_FLAG },
129062306a36Sopenharmony_ci	[IFLA_GENEVE_UDP_CSUM]		= { .type = NLA_U8 },
129162306a36Sopenharmony_ci	[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]	= { .type = NLA_U8 },
129262306a36Sopenharmony_ci	[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
129362306a36Sopenharmony_ci	[IFLA_GENEVE_TTL_INHERIT]	= { .type = NLA_U8 },
129462306a36Sopenharmony_ci	[IFLA_GENEVE_DF]		= { .type = NLA_U8 },
129562306a36Sopenharmony_ci	[IFLA_GENEVE_INNER_PROTO_INHERIT]	= { .type = NLA_FLAG },
129662306a36Sopenharmony_ci};
129762306a36Sopenharmony_ci
129862306a36Sopenharmony_cistatic int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
129962306a36Sopenharmony_ci			   struct netlink_ext_ack *extack)
130062306a36Sopenharmony_ci{
130162306a36Sopenharmony_ci	if (tb[IFLA_ADDRESS]) {
130262306a36Sopenharmony_ci		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
130362306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
130462306a36Sopenharmony_ci					    "Provided link layer address is not Ethernet");
130562306a36Sopenharmony_ci			return -EINVAL;
130662306a36Sopenharmony_ci		}
130762306a36Sopenharmony_ci
130862306a36Sopenharmony_ci		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
130962306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_ADDRESS],
131062306a36Sopenharmony_ci					    "Provided Ethernet address is not unicast");
131162306a36Sopenharmony_ci			return -EADDRNOTAVAIL;
131262306a36Sopenharmony_ci		}
131362306a36Sopenharmony_ci	}
131462306a36Sopenharmony_ci
131562306a36Sopenharmony_ci	if (!data) {
131662306a36Sopenharmony_ci		NL_SET_ERR_MSG(extack,
131762306a36Sopenharmony_ci			       "Not enough attributes provided to perform the operation");
131862306a36Sopenharmony_ci		return -EINVAL;
131962306a36Sopenharmony_ci	}
132062306a36Sopenharmony_ci
132162306a36Sopenharmony_ci	if (data[IFLA_GENEVE_ID]) {
132262306a36Sopenharmony_ci		__u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
132362306a36Sopenharmony_ci
132462306a36Sopenharmony_ci		if (vni >= GENEVE_N_VID) {
132562306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_ID],
132662306a36Sopenharmony_ci					    "Geneve ID must be lower than 16777216");
132762306a36Sopenharmony_ci			return -ERANGE;
132862306a36Sopenharmony_ci		}
132962306a36Sopenharmony_ci	}
133062306a36Sopenharmony_ci
133162306a36Sopenharmony_ci	if (data[IFLA_GENEVE_DF]) {
133262306a36Sopenharmony_ci		enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci		if (df < 0 || df > GENEVE_DF_MAX) {
133562306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF],
133662306a36Sopenharmony_ci					    "Invalid DF attribute");
133762306a36Sopenharmony_ci			return -EINVAL;
133862306a36Sopenharmony_ci		}
133962306a36Sopenharmony_ci	}
134062306a36Sopenharmony_ci
134162306a36Sopenharmony_ci	return 0;
134262306a36Sopenharmony_ci}
134362306a36Sopenharmony_ci
134462306a36Sopenharmony_cistatic struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
134562306a36Sopenharmony_ci					  const struct ip_tunnel_info *info,
134662306a36Sopenharmony_ci					  bool *tun_on_same_port,
134762306a36Sopenharmony_ci					  bool *tun_collect_md)
134862306a36Sopenharmony_ci{
134962306a36Sopenharmony_ci	struct geneve_dev *geneve, *t = NULL;
135062306a36Sopenharmony_ci
135162306a36Sopenharmony_ci	*tun_on_same_port = false;
135262306a36Sopenharmony_ci	*tun_collect_md = false;
135362306a36Sopenharmony_ci	list_for_each_entry(geneve, &gn->geneve_list, next) {
135462306a36Sopenharmony_ci		if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) {
135562306a36Sopenharmony_ci			*tun_collect_md = geneve->cfg.collect_md;
135662306a36Sopenharmony_ci			*tun_on_same_port = true;
135762306a36Sopenharmony_ci		}
135862306a36Sopenharmony_ci		if (info->key.tun_id == geneve->cfg.info.key.tun_id &&
135962306a36Sopenharmony_ci		    info->key.tp_dst == geneve->cfg.info.key.tp_dst &&
136062306a36Sopenharmony_ci		    !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u)))
136162306a36Sopenharmony_ci			t = geneve;
136262306a36Sopenharmony_ci	}
136362306a36Sopenharmony_ci	return t;
136462306a36Sopenharmony_ci}
136562306a36Sopenharmony_ci
136662306a36Sopenharmony_cistatic bool is_tnl_info_zero(const struct ip_tunnel_info *info)
136762306a36Sopenharmony_ci{
136862306a36Sopenharmony_ci	return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
136962306a36Sopenharmony_ci		 info->key.ttl || info->key.label || info->key.tp_src ||
137062306a36Sopenharmony_ci		 memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
137162306a36Sopenharmony_ci}
137262306a36Sopenharmony_ci
137362306a36Sopenharmony_cistatic bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
137462306a36Sopenharmony_ci				  struct ip_tunnel_info *b)
137562306a36Sopenharmony_ci{
137662306a36Sopenharmony_ci	if (ip_tunnel_info_af(a) == AF_INET)
137762306a36Sopenharmony_ci		return a->key.u.ipv4.dst == b->key.u.ipv4.dst;
137862306a36Sopenharmony_ci	else
137962306a36Sopenharmony_ci		return ipv6_addr_equal(&a->key.u.ipv6.dst, &b->key.u.ipv6.dst);
138062306a36Sopenharmony_ci}
138162306a36Sopenharmony_ci
138262306a36Sopenharmony_cistatic int geneve_configure(struct net *net, struct net_device *dev,
138362306a36Sopenharmony_ci			    struct netlink_ext_ack *extack,
138462306a36Sopenharmony_ci			    const struct geneve_config *cfg)
138562306a36Sopenharmony_ci{
138662306a36Sopenharmony_ci	struct geneve_net *gn = net_generic(net, geneve_net_id);
138762306a36Sopenharmony_ci	struct geneve_dev *t, *geneve = netdev_priv(dev);
138862306a36Sopenharmony_ci	const struct ip_tunnel_info *info = &cfg->info;
138962306a36Sopenharmony_ci	bool tun_collect_md, tun_on_same_port;
139062306a36Sopenharmony_ci	int err, encap_len;
139162306a36Sopenharmony_ci
139262306a36Sopenharmony_ci	if (cfg->collect_md && !is_tnl_info_zero(info)) {
139362306a36Sopenharmony_ci		NL_SET_ERR_MSG(extack,
139462306a36Sopenharmony_ci			       "Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
139562306a36Sopenharmony_ci		return -EINVAL;
139662306a36Sopenharmony_ci	}
139762306a36Sopenharmony_ci
139862306a36Sopenharmony_ci	geneve->net = net;
139962306a36Sopenharmony_ci	geneve->dev = dev;
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci	t = geneve_find_dev(gn, info, &tun_on_same_port, &tun_collect_md);
140262306a36Sopenharmony_ci	if (t)
140362306a36Sopenharmony_ci		return -EBUSY;
140462306a36Sopenharmony_ci
140562306a36Sopenharmony_ci	/* make enough headroom for basic scenario */
140662306a36Sopenharmony_ci	encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
140762306a36Sopenharmony_ci	if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) {
140862306a36Sopenharmony_ci		encap_len += sizeof(struct iphdr);
140962306a36Sopenharmony_ci		dev->max_mtu -= sizeof(struct iphdr);
141062306a36Sopenharmony_ci	} else {
141162306a36Sopenharmony_ci		encap_len += sizeof(struct ipv6hdr);
141262306a36Sopenharmony_ci		dev->max_mtu -= sizeof(struct ipv6hdr);
141362306a36Sopenharmony_ci	}
141462306a36Sopenharmony_ci	dev->needed_headroom = encap_len + ETH_HLEN;
141562306a36Sopenharmony_ci
141662306a36Sopenharmony_ci	if (cfg->collect_md) {
141762306a36Sopenharmony_ci		if (tun_on_same_port) {
141862306a36Sopenharmony_ci			NL_SET_ERR_MSG(extack,
141962306a36Sopenharmony_ci				       "There can be only one externally controlled device on a destination port");
142062306a36Sopenharmony_ci			return -EPERM;
142162306a36Sopenharmony_ci		}
142262306a36Sopenharmony_ci	} else {
142362306a36Sopenharmony_ci		if (tun_collect_md) {
142462306a36Sopenharmony_ci			NL_SET_ERR_MSG(extack,
142562306a36Sopenharmony_ci				       "There already exists an externally controlled device on this destination port");
142662306a36Sopenharmony_ci			return -EPERM;
142762306a36Sopenharmony_ci		}
142862306a36Sopenharmony_ci	}
142962306a36Sopenharmony_ci
143062306a36Sopenharmony_ci	dst_cache_reset(&geneve->cfg.info.dst_cache);
143162306a36Sopenharmony_ci	memcpy(&geneve->cfg, cfg, sizeof(*cfg));
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_ci	if (geneve->cfg.inner_proto_inherit) {
143462306a36Sopenharmony_ci		dev->header_ops = NULL;
143562306a36Sopenharmony_ci		dev->type = ARPHRD_NONE;
143662306a36Sopenharmony_ci		dev->hard_header_len = 0;
143762306a36Sopenharmony_ci		dev->addr_len = 0;
143862306a36Sopenharmony_ci		dev->flags = IFF_POINTOPOINT | IFF_NOARP;
143962306a36Sopenharmony_ci	}
144062306a36Sopenharmony_ci
144162306a36Sopenharmony_ci	err = register_netdevice(dev);
144262306a36Sopenharmony_ci	if (err)
144362306a36Sopenharmony_ci		return err;
144462306a36Sopenharmony_ci
144562306a36Sopenharmony_ci	list_add(&geneve->next, &gn->geneve_list);
144662306a36Sopenharmony_ci	return 0;
144762306a36Sopenharmony_ci}
144862306a36Sopenharmony_ci
144962306a36Sopenharmony_cistatic void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
145062306a36Sopenharmony_ci{
145162306a36Sopenharmony_ci	memset(info, 0, sizeof(*info));
145262306a36Sopenharmony_ci	info->key.tp_dst = htons(dst_port);
145362306a36Sopenharmony_ci}
145462306a36Sopenharmony_ci
145562306a36Sopenharmony_cistatic int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
145662306a36Sopenharmony_ci			  struct netlink_ext_ack *extack,
145762306a36Sopenharmony_ci			  struct geneve_config *cfg, bool changelink)
145862306a36Sopenharmony_ci{
145962306a36Sopenharmony_ci	struct ip_tunnel_info *info = &cfg->info;
146062306a36Sopenharmony_ci	int attrtype;
146162306a36Sopenharmony_ci
146262306a36Sopenharmony_ci	if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
146362306a36Sopenharmony_ci		NL_SET_ERR_MSG(extack,
146462306a36Sopenharmony_ci			       "Cannot specify both IPv4 and IPv6 Remote addresses");
146562306a36Sopenharmony_ci		return -EINVAL;
146662306a36Sopenharmony_ci	}
146762306a36Sopenharmony_ci
146862306a36Sopenharmony_ci	if (data[IFLA_GENEVE_REMOTE]) {
146962306a36Sopenharmony_ci		if (changelink && (ip_tunnel_info_af(info) == AF_INET6)) {
147062306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_REMOTE;
147162306a36Sopenharmony_ci			goto change_notsup;
147262306a36Sopenharmony_ci		}
147362306a36Sopenharmony_ci
147462306a36Sopenharmony_ci		info->key.u.ipv4.dst =
147562306a36Sopenharmony_ci			nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
147662306a36Sopenharmony_ci
147762306a36Sopenharmony_ci		if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
147862306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
147962306a36Sopenharmony_ci					    "Remote IPv4 address cannot be Multicast");
148062306a36Sopenharmony_ci			return -EINVAL;
148162306a36Sopenharmony_ci		}
148262306a36Sopenharmony_ci	}
148362306a36Sopenharmony_ci
148462306a36Sopenharmony_ci	if (data[IFLA_GENEVE_REMOTE6]) {
148562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
148662306a36Sopenharmony_ci		if (changelink && (ip_tunnel_info_af(info) == AF_INET)) {
148762306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_REMOTE6;
148862306a36Sopenharmony_ci			goto change_notsup;
148962306a36Sopenharmony_ci		}
149062306a36Sopenharmony_ci
149162306a36Sopenharmony_ci		info->mode = IP_TUNNEL_INFO_IPV6;
149262306a36Sopenharmony_ci		info->key.u.ipv6.dst =
149362306a36Sopenharmony_ci			nla_get_in6_addr(data[IFLA_GENEVE_REMOTE6]);
149462306a36Sopenharmony_ci
149562306a36Sopenharmony_ci		if (ipv6_addr_type(&info->key.u.ipv6.dst) &
149662306a36Sopenharmony_ci		    IPV6_ADDR_LINKLOCAL) {
149762306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
149862306a36Sopenharmony_ci					    "Remote IPv6 address cannot be link-local");
149962306a36Sopenharmony_ci			return -EINVAL;
150062306a36Sopenharmony_ci		}
150162306a36Sopenharmony_ci		if (ipv6_addr_is_multicast(&info->key.u.ipv6.dst)) {
150262306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
150362306a36Sopenharmony_ci					    "Remote IPv6 address cannot be Multicast");
150462306a36Sopenharmony_ci			return -EINVAL;
150562306a36Sopenharmony_ci		}
150662306a36Sopenharmony_ci		info->key.tun_flags |= TUNNEL_CSUM;
150762306a36Sopenharmony_ci		cfg->use_udp6_rx_checksums = true;
150862306a36Sopenharmony_ci#else
150962306a36Sopenharmony_ci		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
151062306a36Sopenharmony_ci				    "IPv6 support not enabled in the kernel");
151162306a36Sopenharmony_ci		return -EPFNOSUPPORT;
151262306a36Sopenharmony_ci#endif
151362306a36Sopenharmony_ci	}
151462306a36Sopenharmony_ci
151562306a36Sopenharmony_ci	if (data[IFLA_GENEVE_ID]) {
151662306a36Sopenharmony_ci		__u32 vni;
151762306a36Sopenharmony_ci		__u8 tvni[3];
151862306a36Sopenharmony_ci		__be64 tunid;
151962306a36Sopenharmony_ci
152062306a36Sopenharmony_ci		vni = nla_get_u32(data[IFLA_GENEVE_ID]);
152162306a36Sopenharmony_ci		tvni[0] = (vni & 0x00ff0000) >> 16;
152262306a36Sopenharmony_ci		tvni[1] = (vni & 0x0000ff00) >> 8;
152362306a36Sopenharmony_ci		tvni[2] =  vni & 0x000000ff;
152462306a36Sopenharmony_ci
152562306a36Sopenharmony_ci		tunid = vni_to_tunnel_id(tvni);
152662306a36Sopenharmony_ci		if (changelink && (tunid != info->key.tun_id)) {
152762306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_ID;
152862306a36Sopenharmony_ci			goto change_notsup;
152962306a36Sopenharmony_ci		}
153062306a36Sopenharmony_ci		info->key.tun_id = tunid;
153162306a36Sopenharmony_ci	}
153262306a36Sopenharmony_ci
153362306a36Sopenharmony_ci	if (data[IFLA_GENEVE_TTL_INHERIT]) {
153462306a36Sopenharmony_ci		if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
153562306a36Sopenharmony_ci			cfg->ttl_inherit = true;
153662306a36Sopenharmony_ci		else
153762306a36Sopenharmony_ci			cfg->ttl_inherit = false;
153862306a36Sopenharmony_ci	} else if (data[IFLA_GENEVE_TTL]) {
153962306a36Sopenharmony_ci		info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
154062306a36Sopenharmony_ci		cfg->ttl_inherit = false;
154162306a36Sopenharmony_ci	}
154262306a36Sopenharmony_ci
154362306a36Sopenharmony_ci	if (data[IFLA_GENEVE_TOS])
154462306a36Sopenharmony_ci		info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
154562306a36Sopenharmony_ci
154662306a36Sopenharmony_ci	if (data[IFLA_GENEVE_DF])
154762306a36Sopenharmony_ci		cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]);
154862306a36Sopenharmony_ci
154962306a36Sopenharmony_ci	if (data[IFLA_GENEVE_LABEL]) {
155062306a36Sopenharmony_ci		info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
155162306a36Sopenharmony_ci				  IPV6_FLOWLABEL_MASK;
155262306a36Sopenharmony_ci		if (info->key.label && (!(info->mode & IP_TUNNEL_INFO_IPV6))) {
155362306a36Sopenharmony_ci			NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_LABEL],
155462306a36Sopenharmony_ci					    "Label attribute only applies for IPv6 Geneve devices");
155562306a36Sopenharmony_ci			return -EINVAL;
155662306a36Sopenharmony_ci		}
155762306a36Sopenharmony_ci	}
155862306a36Sopenharmony_ci
155962306a36Sopenharmony_ci	if (data[IFLA_GENEVE_PORT]) {
156062306a36Sopenharmony_ci		if (changelink) {
156162306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_PORT;
156262306a36Sopenharmony_ci			goto change_notsup;
156362306a36Sopenharmony_ci		}
156462306a36Sopenharmony_ci		info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
156562306a36Sopenharmony_ci	}
156662306a36Sopenharmony_ci
156762306a36Sopenharmony_ci	if (data[IFLA_GENEVE_COLLECT_METADATA]) {
156862306a36Sopenharmony_ci		if (changelink) {
156962306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_COLLECT_METADATA;
157062306a36Sopenharmony_ci			goto change_notsup;
157162306a36Sopenharmony_ci		}
157262306a36Sopenharmony_ci		cfg->collect_md = true;
157362306a36Sopenharmony_ci	}
157462306a36Sopenharmony_ci
157562306a36Sopenharmony_ci	if (data[IFLA_GENEVE_UDP_CSUM]) {
157662306a36Sopenharmony_ci		if (changelink) {
157762306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_UDP_CSUM;
157862306a36Sopenharmony_ci			goto change_notsup;
157962306a36Sopenharmony_ci		}
158062306a36Sopenharmony_ci		if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
158162306a36Sopenharmony_ci			info->key.tun_flags |= TUNNEL_CSUM;
158262306a36Sopenharmony_ci	}
158362306a36Sopenharmony_ci
158462306a36Sopenharmony_ci	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
158562306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
158662306a36Sopenharmony_ci		if (changelink) {
158762306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_TX;
158862306a36Sopenharmony_ci			goto change_notsup;
158962306a36Sopenharmony_ci		}
159062306a36Sopenharmony_ci		if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
159162306a36Sopenharmony_ci			info->key.tun_flags &= ~TUNNEL_CSUM;
159262306a36Sopenharmony_ci#else
159362306a36Sopenharmony_ci		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
159462306a36Sopenharmony_ci				    "IPv6 support not enabled in the kernel");
159562306a36Sopenharmony_ci		return -EPFNOSUPPORT;
159662306a36Sopenharmony_ci#endif
159762306a36Sopenharmony_ci	}
159862306a36Sopenharmony_ci
159962306a36Sopenharmony_ci	if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]) {
160062306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
160162306a36Sopenharmony_ci		if (changelink) {
160262306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_UDP_ZERO_CSUM6_RX;
160362306a36Sopenharmony_ci			goto change_notsup;
160462306a36Sopenharmony_ci		}
160562306a36Sopenharmony_ci		if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
160662306a36Sopenharmony_ci			cfg->use_udp6_rx_checksums = false;
160762306a36Sopenharmony_ci#else
160862306a36Sopenharmony_ci		NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
160962306a36Sopenharmony_ci				    "IPv6 support not enabled in the kernel");
161062306a36Sopenharmony_ci		return -EPFNOSUPPORT;
161162306a36Sopenharmony_ci#endif
161262306a36Sopenharmony_ci	}
161362306a36Sopenharmony_ci
161462306a36Sopenharmony_ci	if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) {
161562306a36Sopenharmony_ci		if (changelink) {
161662306a36Sopenharmony_ci			attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT;
161762306a36Sopenharmony_ci			goto change_notsup;
161862306a36Sopenharmony_ci		}
161962306a36Sopenharmony_ci		cfg->inner_proto_inherit = true;
162062306a36Sopenharmony_ci	}
162162306a36Sopenharmony_ci
162262306a36Sopenharmony_ci	return 0;
162362306a36Sopenharmony_cichange_notsup:
162462306a36Sopenharmony_ci	NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
162562306a36Sopenharmony_ci			    "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported");
162662306a36Sopenharmony_ci	return -EOPNOTSUPP;
162762306a36Sopenharmony_ci}
162862306a36Sopenharmony_ci
162962306a36Sopenharmony_cistatic void geneve_link_config(struct net_device *dev,
163062306a36Sopenharmony_ci			       struct ip_tunnel_info *info, struct nlattr *tb[])
163162306a36Sopenharmony_ci{
163262306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
163362306a36Sopenharmony_ci	int ldev_mtu = 0;
163462306a36Sopenharmony_ci
163562306a36Sopenharmony_ci	if (tb[IFLA_MTU]) {
163662306a36Sopenharmony_ci		geneve_change_mtu(dev, nla_get_u32(tb[IFLA_MTU]));
163762306a36Sopenharmony_ci		return;
163862306a36Sopenharmony_ci	}
163962306a36Sopenharmony_ci
164062306a36Sopenharmony_ci	switch (ip_tunnel_info_af(info)) {
164162306a36Sopenharmony_ci	case AF_INET: {
164262306a36Sopenharmony_ci		struct flowi4 fl4 = { .daddr = info->key.u.ipv4.dst };
164362306a36Sopenharmony_ci		struct rtable *rt = ip_route_output_key(geneve->net, &fl4);
164462306a36Sopenharmony_ci
164562306a36Sopenharmony_ci		if (!IS_ERR(rt) && rt->dst.dev) {
164662306a36Sopenharmony_ci			ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV4_HLEN;
164762306a36Sopenharmony_ci			ip_rt_put(rt);
164862306a36Sopenharmony_ci		}
164962306a36Sopenharmony_ci		break;
165062306a36Sopenharmony_ci	}
165162306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
165262306a36Sopenharmony_ci	case AF_INET6: {
165362306a36Sopenharmony_ci		struct rt6_info *rt;
165462306a36Sopenharmony_ci
165562306a36Sopenharmony_ci		if (!__in6_dev_get(dev))
165662306a36Sopenharmony_ci			break;
165762306a36Sopenharmony_ci
165862306a36Sopenharmony_ci		rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
165962306a36Sopenharmony_ci				NULL, 0);
166062306a36Sopenharmony_ci
166162306a36Sopenharmony_ci		if (rt && rt->dst.dev)
166262306a36Sopenharmony_ci			ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
166362306a36Sopenharmony_ci		ip6_rt_put(rt);
166462306a36Sopenharmony_ci		break;
166562306a36Sopenharmony_ci	}
166662306a36Sopenharmony_ci#endif
166762306a36Sopenharmony_ci	}
166862306a36Sopenharmony_ci
166962306a36Sopenharmony_ci	if (ldev_mtu <= 0)
167062306a36Sopenharmony_ci		return;
167162306a36Sopenharmony_ci
167262306a36Sopenharmony_ci	geneve_change_mtu(dev, ldev_mtu - info->options_len);
167362306a36Sopenharmony_ci}
167462306a36Sopenharmony_ci
167562306a36Sopenharmony_cistatic int geneve_newlink(struct net *net, struct net_device *dev,
167662306a36Sopenharmony_ci			  struct nlattr *tb[], struct nlattr *data[],
167762306a36Sopenharmony_ci			  struct netlink_ext_ack *extack)
167862306a36Sopenharmony_ci{
167962306a36Sopenharmony_ci	struct geneve_config cfg = {
168062306a36Sopenharmony_ci		.df = GENEVE_DF_UNSET,
168162306a36Sopenharmony_ci		.use_udp6_rx_checksums = false,
168262306a36Sopenharmony_ci		.ttl_inherit = false,
168362306a36Sopenharmony_ci		.collect_md = false,
168462306a36Sopenharmony_ci	};
168562306a36Sopenharmony_ci	int err;
168662306a36Sopenharmony_ci
168762306a36Sopenharmony_ci	init_tnl_info(&cfg.info, GENEVE_UDP_PORT);
168862306a36Sopenharmony_ci	err = geneve_nl2info(tb, data, extack, &cfg, false);
168962306a36Sopenharmony_ci	if (err)
169062306a36Sopenharmony_ci		return err;
169162306a36Sopenharmony_ci
169262306a36Sopenharmony_ci	err = geneve_configure(net, dev, extack, &cfg);
169362306a36Sopenharmony_ci	if (err)
169462306a36Sopenharmony_ci		return err;
169562306a36Sopenharmony_ci
169662306a36Sopenharmony_ci	geneve_link_config(dev, &cfg.info, tb);
169762306a36Sopenharmony_ci
169862306a36Sopenharmony_ci	return 0;
169962306a36Sopenharmony_ci}
170062306a36Sopenharmony_ci
170162306a36Sopenharmony_ci/* Quiesces the geneve device data path for both TX and RX.
170262306a36Sopenharmony_ci *
170362306a36Sopenharmony_ci * On transmit geneve checks for non-NULL geneve_sock before it proceeds.
170462306a36Sopenharmony_ci * So, if we set that socket to NULL under RCU and wait for synchronize_net()
170562306a36Sopenharmony_ci * to complete for the existing set of in-flight packets to be transmitted,
170662306a36Sopenharmony_ci * then we would have quiesced the transmit data path. All the future packets
170762306a36Sopenharmony_ci * will get dropped until we unquiesce the data path.
170862306a36Sopenharmony_ci *
170962306a36Sopenharmony_ci * On receive geneve dereference the geneve_sock stashed in the socket. So,
171062306a36Sopenharmony_ci * if we set that to NULL under RCU and wait for synchronize_net() to
171162306a36Sopenharmony_ci * complete, then we would have quiesced the receive data path.
171262306a36Sopenharmony_ci */
171362306a36Sopenharmony_cistatic void geneve_quiesce(struct geneve_dev *geneve, struct geneve_sock **gs4,
171462306a36Sopenharmony_ci			   struct geneve_sock **gs6)
171562306a36Sopenharmony_ci{
171662306a36Sopenharmony_ci	*gs4 = rtnl_dereference(geneve->sock4);
171762306a36Sopenharmony_ci	rcu_assign_pointer(geneve->sock4, NULL);
171862306a36Sopenharmony_ci	if (*gs4)
171962306a36Sopenharmony_ci		rcu_assign_sk_user_data((*gs4)->sock->sk, NULL);
172062306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
172162306a36Sopenharmony_ci	*gs6 = rtnl_dereference(geneve->sock6);
172262306a36Sopenharmony_ci	rcu_assign_pointer(geneve->sock6, NULL);
172362306a36Sopenharmony_ci	if (*gs6)
172462306a36Sopenharmony_ci		rcu_assign_sk_user_data((*gs6)->sock->sk, NULL);
172562306a36Sopenharmony_ci#else
172662306a36Sopenharmony_ci	*gs6 = NULL;
172762306a36Sopenharmony_ci#endif
172862306a36Sopenharmony_ci	synchronize_net();
172962306a36Sopenharmony_ci}
173062306a36Sopenharmony_ci
173162306a36Sopenharmony_ci/* Resumes the geneve device data path for both TX and RX. */
173262306a36Sopenharmony_cistatic void geneve_unquiesce(struct geneve_dev *geneve, struct geneve_sock *gs4,
173362306a36Sopenharmony_ci			     struct geneve_sock __maybe_unused *gs6)
173462306a36Sopenharmony_ci{
173562306a36Sopenharmony_ci	rcu_assign_pointer(geneve->sock4, gs4);
173662306a36Sopenharmony_ci	if (gs4)
173762306a36Sopenharmony_ci		rcu_assign_sk_user_data(gs4->sock->sk, gs4);
173862306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
173962306a36Sopenharmony_ci	rcu_assign_pointer(geneve->sock6, gs6);
174062306a36Sopenharmony_ci	if (gs6)
174162306a36Sopenharmony_ci		rcu_assign_sk_user_data(gs6->sock->sk, gs6);
174262306a36Sopenharmony_ci#endif
174362306a36Sopenharmony_ci	synchronize_net();
174462306a36Sopenharmony_ci}
174562306a36Sopenharmony_ci
174662306a36Sopenharmony_cistatic int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
174762306a36Sopenharmony_ci			     struct nlattr *data[],
174862306a36Sopenharmony_ci			     struct netlink_ext_ack *extack)
174962306a36Sopenharmony_ci{
175062306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
175162306a36Sopenharmony_ci	struct geneve_sock *gs4, *gs6;
175262306a36Sopenharmony_ci	struct geneve_config cfg;
175362306a36Sopenharmony_ci	int err;
175462306a36Sopenharmony_ci
175562306a36Sopenharmony_ci	/* If the geneve device is configured for metadata (or externally
175662306a36Sopenharmony_ci	 * controlled, for example, OVS), then nothing can be changed.
175762306a36Sopenharmony_ci	 */
175862306a36Sopenharmony_ci	if (geneve->cfg.collect_md)
175962306a36Sopenharmony_ci		return -EOPNOTSUPP;
176062306a36Sopenharmony_ci
176162306a36Sopenharmony_ci	/* Start with the existing info. */
176262306a36Sopenharmony_ci	memcpy(&cfg, &geneve->cfg, sizeof(cfg));
176362306a36Sopenharmony_ci	err = geneve_nl2info(tb, data, extack, &cfg, true);
176462306a36Sopenharmony_ci	if (err)
176562306a36Sopenharmony_ci		return err;
176662306a36Sopenharmony_ci
176762306a36Sopenharmony_ci	if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) {
176862306a36Sopenharmony_ci		dst_cache_reset(&cfg.info.dst_cache);
176962306a36Sopenharmony_ci		geneve_link_config(dev, &cfg.info, tb);
177062306a36Sopenharmony_ci	}
177162306a36Sopenharmony_ci
177262306a36Sopenharmony_ci	geneve_quiesce(geneve, &gs4, &gs6);
177362306a36Sopenharmony_ci	memcpy(&geneve->cfg, &cfg, sizeof(cfg));
177462306a36Sopenharmony_ci	geneve_unquiesce(geneve, gs4, gs6);
177562306a36Sopenharmony_ci
177662306a36Sopenharmony_ci	return 0;
177762306a36Sopenharmony_ci}
177862306a36Sopenharmony_ci
177962306a36Sopenharmony_cistatic void geneve_dellink(struct net_device *dev, struct list_head *head)
178062306a36Sopenharmony_ci{
178162306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
178262306a36Sopenharmony_ci
178362306a36Sopenharmony_ci	list_del(&geneve->next);
178462306a36Sopenharmony_ci	unregister_netdevice_queue(dev, head);
178562306a36Sopenharmony_ci}
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_cistatic size_t geneve_get_size(const struct net_device *dev)
178862306a36Sopenharmony_ci{
178962306a36Sopenharmony_ci	return nla_total_size(sizeof(__u32)) +	/* IFLA_GENEVE_ID */
179062306a36Sopenharmony_ci		nla_total_size(sizeof(struct in6_addr)) + /* IFLA_GENEVE_REMOTE{6} */
179162306a36Sopenharmony_ci		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
179262306a36Sopenharmony_ci		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TOS */
179362306a36Sopenharmony_ci		nla_total_size(sizeof(__u8)) +	/* IFLA_GENEVE_DF */
179462306a36Sopenharmony_ci		nla_total_size(sizeof(__be32)) +  /* IFLA_GENEVE_LABEL */
179562306a36Sopenharmony_ci		nla_total_size(sizeof(__be16)) +  /* IFLA_GENEVE_PORT */
179662306a36Sopenharmony_ci		nla_total_size(0) +	 /* IFLA_GENEVE_COLLECT_METADATA */
179762306a36Sopenharmony_ci		nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_CSUM */
179862306a36Sopenharmony_ci		nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
179962306a36Sopenharmony_ci		nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
180062306a36Sopenharmony_ci		nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
180162306a36Sopenharmony_ci		nla_total_size(0) +	 /* IFLA_GENEVE_INNER_PROTO_INHERIT */
180262306a36Sopenharmony_ci		0;
180362306a36Sopenharmony_ci}
180462306a36Sopenharmony_ci
180562306a36Sopenharmony_cistatic int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
180662306a36Sopenharmony_ci{
180762306a36Sopenharmony_ci	struct geneve_dev *geneve = netdev_priv(dev);
180862306a36Sopenharmony_ci	struct ip_tunnel_info *info = &geneve->cfg.info;
180962306a36Sopenharmony_ci	bool ttl_inherit = geneve->cfg.ttl_inherit;
181062306a36Sopenharmony_ci	bool metadata = geneve->cfg.collect_md;
181162306a36Sopenharmony_ci	__u8 tmp_vni[3];
181262306a36Sopenharmony_ci	__u32 vni;
181362306a36Sopenharmony_ci
181462306a36Sopenharmony_ci	tunnel_id_to_vni(info->key.tun_id, tmp_vni);
181562306a36Sopenharmony_ci	vni = (tmp_vni[0] << 16) | (tmp_vni[1] << 8) | tmp_vni[2];
181662306a36Sopenharmony_ci	if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
181762306a36Sopenharmony_ci		goto nla_put_failure;
181862306a36Sopenharmony_ci
181962306a36Sopenharmony_ci	if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
182062306a36Sopenharmony_ci		if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
182162306a36Sopenharmony_ci				    info->key.u.ipv4.dst))
182262306a36Sopenharmony_ci			goto nla_put_failure;
182362306a36Sopenharmony_ci		if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
182462306a36Sopenharmony_ci			       !!(info->key.tun_flags & TUNNEL_CSUM)))
182562306a36Sopenharmony_ci			goto nla_put_failure;
182662306a36Sopenharmony_ci
182762306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
182862306a36Sopenharmony_ci	} else if (!metadata) {
182962306a36Sopenharmony_ci		if (nla_put_in6_addr(skb, IFLA_GENEVE_REMOTE6,
183062306a36Sopenharmony_ci				     &info->key.u.ipv6.dst))
183162306a36Sopenharmony_ci			goto nla_put_failure;
183262306a36Sopenharmony_ci		if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
183362306a36Sopenharmony_ci			       !(info->key.tun_flags & TUNNEL_CSUM)))
183462306a36Sopenharmony_ci			goto nla_put_failure;
183562306a36Sopenharmony_ci#endif
183662306a36Sopenharmony_ci	}
183762306a36Sopenharmony_ci
183862306a36Sopenharmony_ci	if (nla_put_u8(skb, IFLA_GENEVE_TTL, info->key.ttl) ||
183962306a36Sopenharmony_ci	    nla_put_u8(skb, IFLA_GENEVE_TOS, info->key.tos) ||
184062306a36Sopenharmony_ci	    nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
184162306a36Sopenharmony_ci		goto nla_put_failure;
184262306a36Sopenharmony_ci
184362306a36Sopenharmony_ci	if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df))
184462306a36Sopenharmony_ci		goto nla_put_failure;
184562306a36Sopenharmony_ci
184662306a36Sopenharmony_ci	if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
184762306a36Sopenharmony_ci		goto nla_put_failure;
184862306a36Sopenharmony_ci
184962306a36Sopenharmony_ci	if (metadata && nla_put_flag(skb, IFLA_GENEVE_COLLECT_METADATA))
185062306a36Sopenharmony_ci		goto nla_put_failure;
185162306a36Sopenharmony_ci
185262306a36Sopenharmony_ci#if IS_ENABLED(CONFIG_IPV6)
185362306a36Sopenharmony_ci	if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
185462306a36Sopenharmony_ci		       !geneve->cfg.use_udp6_rx_checksums))
185562306a36Sopenharmony_ci		goto nla_put_failure;
185662306a36Sopenharmony_ci#endif
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_ci	if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
185962306a36Sopenharmony_ci		goto nla_put_failure;
186062306a36Sopenharmony_ci
186162306a36Sopenharmony_ci	if (geneve->cfg.inner_proto_inherit &&
186262306a36Sopenharmony_ci	    nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT))
186362306a36Sopenharmony_ci		goto nla_put_failure;
186462306a36Sopenharmony_ci
186562306a36Sopenharmony_ci	return 0;
186662306a36Sopenharmony_ci
186762306a36Sopenharmony_cinla_put_failure:
186862306a36Sopenharmony_ci	return -EMSGSIZE;
186962306a36Sopenharmony_ci}
187062306a36Sopenharmony_ci
187162306a36Sopenharmony_cistatic struct rtnl_link_ops geneve_link_ops __read_mostly = {
187262306a36Sopenharmony_ci	.kind		= "geneve",
187362306a36Sopenharmony_ci	.maxtype	= IFLA_GENEVE_MAX,
187462306a36Sopenharmony_ci	.policy		= geneve_policy,
187562306a36Sopenharmony_ci	.priv_size	= sizeof(struct geneve_dev),
187662306a36Sopenharmony_ci	.setup		= geneve_setup,
187762306a36Sopenharmony_ci	.validate	= geneve_validate,
187862306a36Sopenharmony_ci	.newlink	= geneve_newlink,
187962306a36Sopenharmony_ci	.changelink	= geneve_changelink,
188062306a36Sopenharmony_ci	.dellink	= geneve_dellink,
188162306a36Sopenharmony_ci	.get_size	= geneve_get_size,
188262306a36Sopenharmony_ci	.fill_info	= geneve_fill_info,
188362306a36Sopenharmony_ci};
188462306a36Sopenharmony_ci
188562306a36Sopenharmony_cistruct net_device *geneve_dev_create_fb(struct net *net, const char *name,
188662306a36Sopenharmony_ci					u8 name_assign_type, u16 dst_port)
188762306a36Sopenharmony_ci{
188862306a36Sopenharmony_ci	struct nlattr *tb[IFLA_MAX + 1];
188962306a36Sopenharmony_ci	struct net_device *dev;
189062306a36Sopenharmony_ci	LIST_HEAD(list_kill);
189162306a36Sopenharmony_ci	int err;
189262306a36Sopenharmony_ci	struct geneve_config cfg = {
189362306a36Sopenharmony_ci		.df = GENEVE_DF_UNSET,
189462306a36Sopenharmony_ci		.use_udp6_rx_checksums = true,
189562306a36Sopenharmony_ci		.ttl_inherit = false,
189662306a36Sopenharmony_ci		.collect_md = true,
189762306a36Sopenharmony_ci	};
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ci	memset(tb, 0, sizeof(tb));
190062306a36Sopenharmony_ci	dev = rtnl_create_link(net, name, name_assign_type,
190162306a36Sopenharmony_ci			       &geneve_link_ops, tb, NULL);
190262306a36Sopenharmony_ci	if (IS_ERR(dev))
190362306a36Sopenharmony_ci		return dev;
190462306a36Sopenharmony_ci
190562306a36Sopenharmony_ci	init_tnl_info(&cfg.info, dst_port);
190662306a36Sopenharmony_ci	err = geneve_configure(net, dev, NULL, &cfg);
190762306a36Sopenharmony_ci	if (err) {
190862306a36Sopenharmony_ci		free_netdev(dev);
190962306a36Sopenharmony_ci		return ERR_PTR(err);
191062306a36Sopenharmony_ci	}
191162306a36Sopenharmony_ci
191262306a36Sopenharmony_ci	/* openvswitch users expect packet sizes to be unrestricted,
191362306a36Sopenharmony_ci	 * so set the largest MTU we can.
191462306a36Sopenharmony_ci	 */
191562306a36Sopenharmony_ci	err = geneve_change_mtu(dev, IP_MAX_MTU);
191662306a36Sopenharmony_ci	if (err)
191762306a36Sopenharmony_ci		goto err;
191862306a36Sopenharmony_ci
191962306a36Sopenharmony_ci	err = rtnl_configure_link(dev, NULL, 0, NULL);
192062306a36Sopenharmony_ci	if (err < 0)
192162306a36Sopenharmony_ci		goto err;
192262306a36Sopenharmony_ci
192362306a36Sopenharmony_ci	return dev;
192462306a36Sopenharmony_cierr:
192562306a36Sopenharmony_ci	geneve_dellink(dev, &list_kill);
192662306a36Sopenharmony_ci	unregister_netdevice_many(&list_kill);
192762306a36Sopenharmony_ci	return ERR_PTR(err);
192862306a36Sopenharmony_ci}
192962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(geneve_dev_create_fb);
193062306a36Sopenharmony_ci
193162306a36Sopenharmony_cistatic int geneve_netdevice_event(struct notifier_block *unused,
193262306a36Sopenharmony_ci				  unsigned long event, void *ptr)
193362306a36Sopenharmony_ci{
193462306a36Sopenharmony_ci	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
193562306a36Sopenharmony_ci
193662306a36Sopenharmony_ci	if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
193762306a36Sopenharmony_ci		geneve_offload_rx_ports(dev, true);
193862306a36Sopenharmony_ci	else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
193962306a36Sopenharmony_ci		geneve_offload_rx_ports(dev, false);
194062306a36Sopenharmony_ci
194162306a36Sopenharmony_ci	return NOTIFY_DONE;
194262306a36Sopenharmony_ci}
194362306a36Sopenharmony_ci
194462306a36Sopenharmony_cistatic struct notifier_block geneve_notifier_block __read_mostly = {
194562306a36Sopenharmony_ci	.notifier_call = geneve_netdevice_event,
194662306a36Sopenharmony_ci};
194762306a36Sopenharmony_ci
194862306a36Sopenharmony_cistatic __net_init int geneve_init_net(struct net *net)
194962306a36Sopenharmony_ci{
195062306a36Sopenharmony_ci	struct geneve_net *gn = net_generic(net, geneve_net_id);
195162306a36Sopenharmony_ci
195262306a36Sopenharmony_ci	INIT_LIST_HEAD(&gn->geneve_list);
195362306a36Sopenharmony_ci	INIT_LIST_HEAD(&gn->sock_list);
195462306a36Sopenharmony_ci	return 0;
195562306a36Sopenharmony_ci}
195662306a36Sopenharmony_ci
195762306a36Sopenharmony_cistatic void geneve_destroy_tunnels(struct net *net, struct list_head *head)
195862306a36Sopenharmony_ci{
195962306a36Sopenharmony_ci	struct geneve_net *gn = net_generic(net, geneve_net_id);
196062306a36Sopenharmony_ci	struct geneve_dev *geneve, *next;
196162306a36Sopenharmony_ci	struct net_device *dev, *aux;
196262306a36Sopenharmony_ci
196362306a36Sopenharmony_ci	/* gather any geneve devices that were moved into this ns */
196462306a36Sopenharmony_ci	for_each_netdev_safe(net, dev, aux)
196562306a36Sopenharmony_ci		if (dev->rtnl_link_ops == &geneve_link_ops)
196662306a36Sopenharmony_ci			unregister_netdevice_queue(dev, head);
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	/* now gather any other geneve devices that were created in this ns */
196962306a36Sopenharmony_ci	list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
197062306a36Sopenharmony_ci		/* If geneve->dev is in the same netns, it was already added
197162306a36Sopenharmony_ci		 * to the list by the previous loop.
197262306a36Sopenharmony_ci		 */
197362306a36Sopenharmony_ci		if (!net_eq(dev_net(geneve->dev), net))
197462306a36Sopenharmony_ci			unregister_netdevice_queue(geneve->dev, head);
197562306a36Sopenharmony_ci	}
197662306a36Sopenharmony_ci}
197762306a36Sopenharmony_ci
197862306a36Sopenharmony_cistatic void __net_exit geneve_exit_batch_net(struct list_head *net_list)
197962306a36Sopenharmony_ci{
198062306a36Sopenharmony_ci	struct net *net;
198162306a36Sopenharmony_ci	LIST_HEAD(list);
198262306a36Sopenharmony_ci
198362306a36Sopenharmony_ci	rtnl_lock();
198462306a36Sopenharmony_ci	list_for_each_entry(net, net_list, exit_list)
198562306a36Sopenharmony_ci		geneve_destroy_tunnels(net, &list);
198662306a36Sopenharmony_ci
198762306a36Sopenharmony_ci	/* unregister the devices gathered above */
198862306a36Sopenharmony_ci	unregister_netdevice_many(&list);
198962306a36Sopenharmony_ci	rtnl_unlock();
199062306a36Sopenharmony_ci
199162306a36Sopenharmony_ci	list_for_each_entry(net, net_list, exit_list) {
199262306a36Sopenharmony_ci		const struct geneve_net *gn = net_generic(net, geneve_net_id);
199362306a36Sopenharmony_ci
199462306a36Sopenharmony_ci		WARN_ON_ONCE(!list_empty(&gn->sock_list));
199562306a36Sopenharmony_ci	}
199662306a36Sopenharmony_ci}
199762306a36Sopenharmony_ci
199862306a36Sopenharmony_cistatic struct pernet_operations geneve_net_ops = {
199962306a36Sopenharmony_ci	.init = geneve_init_net,
200062306a36Sopenharmony_ci	.exit_batch = geneve_exit_batch_net,
200162306a36Sopenharmony_ci	.id   = &geneve_net_id,
200262306a36Sopenharmony_ci	.size = sizeof(struct geneve_net),
200362306a36Sopenharmony_ci};
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_cistatic int __init geneve_init_module(void)
200662306a36Sopenharmony_ci{
200762306a36Sopenharmony_ci	int rc;
200862306a36Sopenharmony_ci
200962306a36Sopenharmony_ci	rc = register_pernet_subsys(&geneve_net_ops);
201062306a36Sopenharmony_ci	if (rc)
201162306a36Sopenharmony_ci		goto out1;
201262306a36Sopenharmony_ci
201362306a36Sopenharmony_ci	rc = register_netdevice_notifier(&geneve_notifier_block);
201462306a36Sopenharmony_ci	if (rc)
201562306a36Sopenharmony_ci		goto out2;
201662306a36Sopenharmony_ci
201762306a36Sopenharmony_ci	rc = rtnl_link_register(&geneve_link_ops);
201862306a36Sopenharmony_ci	if (rc)
201962306a36Sopenharmony_ci		goto out3;
202062306a36Sopenharmony_ci
202162306a36Sopenharmony_ci	return 0;
202262306a36Sopenharmony_ciout3:
202362306a36Sopenharmony_ci	unregister_netdevice_notifier(&geneve_notifier_block);
202462306a36Sopenharmony_ciout2:
202562306a36Sopenharmony_ci	unregister_pernet_subsys(&geneve_net_ops);
202662306a36Sopenharmony_ciout1:
202762306a36Sopenharmony_ci	return rc;
202862306a36Sopenharmony_ci}
202962306a36Sopenharmony_cilate_initcall(geneve_init_module);
203062306a36Sopenharmony_ci
203162306a36Sopenharmony_cistatic void __exit geneve_cleanup_module(void)
203262306a36Sopenharmony_ci{
203362306a36Sopenharmony_ci	rtnl_link_unregister(&geneve_link_ops);
203462306a36Sopenharmony_ci	unregister_netdevice_notifier(&geneve_notifier_block);
203562306a36Sopenharmony_ci	unregister_pernet_subsys(&geneve_net_ops);
203662306a36Sopenharmony_ci}
203762306a36Sopenharmony_cimodule_exit(geneve_cleanup_module);
203862306a36Sopenharmony_ci
203962306a36Sopenharmony_ciMODULE_LICENSE("GPL");
204062306a36Sopenharmony_ciMODULE_VERSION(GENEVE_NETDEV_VER);
204162306a36Sopenharmony_ciMODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
204262306a36Sopenharmony_ciMODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
204362306a36Sopenharmony_ciMODULE_ALIAS_RTNL_LINK("geneve");
2044