162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
262306a36Sopenharmony_ci/*
362306a36Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
462306a36Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
562306a36Sopenharmony_ci *		interface as the means of communication with the user level.
662306a36Sopenharmony_ci *
762306a36Sopenharmony_ci *		PACKET - implements raw packet sockets.
862306a36Sopenharmony_ci *
962306a36Sopenharmony_ci * Authors:	Ross Biro
1062306a36Sopenharmony_ci *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
1162306a36Sopenharmony_ci *		Alan Cox, <gw4pts@gw4pts.ampr.org>
1262306a36Sopenharmony_ci *
1362306a36Sopenharmony_ci * Fixes:
1462306a36Sopenharmony_ci *		Alan Cox	:	verify_area() now used correctly
1562306a36Sopenharmony_ci *		Alan Cox	:	new skbuff lists, look ma no backlogs!
1662306a36Sopenharmony_ci *		Alan Cox	:	tidied skbuff lists.
1762306a36Sopenharmony_ci *		Alan Cox	:	Now uses generic datagram routines I
1862306a36Sopenharmony_ci *					added. Also fixed the peek/read crash
1962306a36Sopenharmony_ci *					from all old Linux datagram code.
2062306a36Sopenharmony_ci *		Alan Cox	:	Uses the improved datagram code.
2162306a36Sopenharmony_ci *		Alan Cox	:	Added NULL's for socket options.
2262306a36Sopenharmony_ci *		Alan Cox	:	Re-commented the code.
2362306a36Sopenharmony_ci *		Alan Cox	:	Use new kernel side addressing
2462306a36Sopenharmony_ci *		Rob Janssen	:	Correct MTU usage.
2562306a36Sopenharmony_ci *		Dave Platt	:	Counter leaks caused by incorrect
2662306a36Sopenharmony_ci *					interrupt locking and some slightly
2762306a36Sopenharmony_ci *					dubious gcc output. Can you read
2862306a36Sopenharmony_ci *					compiler: it said _VOLATILE_
2962306a36Sopenharmony_ci *	Richard Kooijman	:	Timestamp fixes.
3062306a36Sopenharmony_ci *		Alan Cox	:	New buffers. Use sk->mac.raw.
3162306a36Sopenharmony_ci *		Alan Cox	:	sendmsg/recvmsg support.
3262306a36Sopenharmony_ci *		Alan Cox	:	Protocol setting support
3362306a36Sopenharmony_ci *	Alexey Kuznetsov	:	Untied from IPv4 stack.
3462306a36Sopenharmony_ci *	Cyrus Durgin		:	Fixed kerneld for kmod.
3562306a36Sopenharmony_ci *	Michal Ostrowski        :       Module initialization cleanup.
3662306a36Sopenharmony_ci *         Ulises Alonso        :       Frame number limit removal and
3762306a36Sopenharmony_ci *                                      packet_set_ring memory leak.
3862306a36Sopenharmony_ci *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
3962306a36Sopenharmony_ci *					The convention is that longer addresses
4062306a36Sopenharmony_ci *					will simply extend the hardware address
4162306a36Sopenharmony_ci *					byte arrays at the end of sockaddr_ll
4262306a36Sopenharmony_ci *					and packet_mreq.
4362306a36Sopenharmony_ci *		Johann Baudy	:	Added TX RING.
4462306a36Sopenharmony_ci *		Chetan Loke	:	Implemented TPACKET_V3 block abstraction
4562306a36Sopenharmony_ci *					layer.
4662306a36Sopenharmony_ci *					Copyright (C) 2011, <lokec@ccs.neu.edu>
4762306a36Sopenharmony_ci */
4862306a36Sopenharmony_ci
4962306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci#include <linux/ethtool.h>
5262306a36Sopenharmony_ci#include <linux/filter.h>
5362306a36Sopenharmony_ci#include <linux/types.h>
5462306a36Sopenharmony_ci#include <linux/mm.h>
5562306a36Sopenharmony_ci#include <linux/capability.h>
5662306a36Sopenharmony_ci#include <linux/fcntl.h>
5762306a36Sopenharmony_ci#include <linux/socket.h>
5862306a36Sopenharmony_ci#include <linux/in.h>
5962306a36Sopenharmony_ci#include <linux/inet.h>
6062306a36Sopenharmony_ci#include <linux/netdevice.h>
6162306a36Sopenharmony_ci#include <linux/if_packet.h>
6262306a36Sopenharmony_ci#include <linux/wireless.h>
6362306a36Sopenharmony_ci#include <linux/kernel.h>
6462306a36Sopenharmony_ci#include <linux/kmod.h>
6562306a36Sopenharmony_ci#include <linux/slab.h>
6662306a36Sopenharmony_ci#include <linux/vmalloc.h>
6762306a36Sopenharmony_ci#include <net/net_namespace.h>
6862306a36Sopenharmony_ci#include <net/ip.h>
6962306a36Sopenharmony_ci#include <net/protocol.h>
7062306a36Sopenharmony_ci#include <linux/skbuff.h>
7162306a36Sopenharmony_ci#include <net/sock.h>
7262306a36Sopenharmony_ci#include <linux/errno.h>
7362306a36Sopenharmony_ci#include <linux/timer.h>
7462306a36Sopenharmony_ci#include <linux/uaccess.h>
7562306a36Sopenharmony_ci#include <asm/ioctls.h>
7662306a36Sopenharmony_ci#include <asm/page.h>
7762306a36Sopenharmony_ci#include <asm/cacheflush.h>
7862306a36Sopenharmony_ci#include <asm/io.h>
7962306a36Sopenharmony_ci#include <linux/proc_fs.h>
8062306a36Sopenharmony_ci#include <linux/seq_file.h>
8162306a36Sopenharmony_ci#include <linux/poll.h>
8262306a36Sopenharmony_ci#include <linux/module.h>
8362306a36Sopenharmony_ci#include <linux/init.h>
8462306a36Sopenharmony_ci#include <linux/mutex.h>
8562306a36Sopenharmony_ci#include <linux/if_vlan.h>
8662306a36Sopenharmony_ci#include <linux/virtio_net.h>
8762306a36Sopenharmony_ci#include <linux/errqueue.h>
8862306a36Sopenharmony_ci#include <linux/net_tstamp.h>
8962306a36Sopenharmony_ci#include <linux/percpu.h>
9062306a36Sopenharmony_ci#ifdef CONFIG_INET
9162306a36Sopenharmony_ci#include <net/inet_common.h>
9262306a36Sopenharmony_ci#endif
9362306a36Sopenharmony_ci#include <linux/bpf.h>
9462306a36Sopenharmony_ci#include <net/compat.h>
9562306a36Sopenharmony_ci#include <linux/netfilter_netdev.h>
9662306a36Sopenharmony_ci
9762306a36Sopenharmony_ci#include "internal.h"
9862306a36Sopenharmony_ci
9962306a36Sopenharmony_ci/*
10062306a36Sopenharmony_ci   Assumptions:
10162306a36Sopenharmony_ci   - If the device has no dev->header_ops->create, there is no LL header
10262306a36Sopenharmony_ci     visible above the device. In this case, its hard_header_len should be 0.
10362306a36Sopenharmony_ci     The device may prepend its own header internally. In this case, its
10462306a36Sopenharmony_ci     needed_headroom should be set to the space needed for it to add its
10562306a36Sopenharmony_ci     internal header.
10662306a36Sopenharmony_ci     For example, a WiFi driver pretending to be an Ethernet driver should
10762306a36Sopenharmony_ci     set its hard_header_len to be the Ethernet header length, and set its
10862306a36Sopenharmony_ci     needed_headroom to be (the real WiFi header length - the fake Ethernet
10962306a36Sopenharmony_ci     header length).
11062306a36Sopenharmony_ci   - packet socket receives packets with pulled ll header,
11162306a36Sopenharmony_ci     so that SOCK_RAW should push it back.
11262306a36Sopenharmony_ci
11362306a36Sopenharmony_ciOn receive:
11462306a36Sopenharmony_ci-----------
11562306a36Sopenharmony_ci
11662306a36Sopenharmony_ciIncoming, dev_has_header(dev) == true
11762306a36Sopenharmony_ci   mac_header -> ll header
11862306a36Sopenharmony_ci   data       -> data
11962306a36Sopenharmony_ci
12062306a36Sopenharmony_ciOutgoing, dev_has_header(dev) == true
12162306a36Sopenharmony_ci   mac_header -> ll header
12262306a36Sopenharmony_ci   data       -> ll header
12362306a36Sopenharmony_ci
12462306a36Sopenharmony_ciIncoming, dev_has_header(dev) == false
12562306a36Sopenharmony_ci   mac_header -> data
12662306a36Sopenharmony_ci     However drivers often make it point to the ll header.
12762306a36Sopenharmony_ci     This is incorrect because the ll header should be invisible to us.
12862306a36Sopenharmony_ci   data       -> data
12962306a36Sopenharmony_ci
13062306a36Sopenharmony_ciOutgoing, dev_has_header(dev) == false
13162306a36Sopenharmony_ci   mac_header -> data. ll header is invisible to us.
13262306a36Sopenharmony_ci   data       -> data
13362306a36Sopenharmony_ci
13462306a36Sopenharmony_ciResume
13562306a36Sopenharmony_ci  If dev_has_header(dev) == false we are unable to restore the ll header,
13662306a36Sopenharmony_ci    because it is invisible to us.
13762306a36Sopenharmony_ci
13862306a36Sopenharmony_ci
13962306a36Sopenharmony_ciOn transmit:
14062306a36Sopenharmony_ci------------
14162306a36Sopenharmony_ci
14262306a36Sopenharmony_cidev_has_header(dev) == true
14362306a36Sopenharmony_ci   mac_header -> ll header
14462306a36Sopenharmony_ci   data       -> ll header
14562306a36Sopenharmony_ci
14662306a36Sopenharmony_cidev_has_header(dev) == false (ll header is invisible to us)
14762306a36Sopenharmony_ci   mac_header -> data
14862306a36Sopenharmony_ci   data       -> data
14962306a36Sopenharmony_ci
15062306a36Sopenharmony_ci   We should set network_header on output to the correct position,
15162306a36Sopenharmony_ci   packet classifier depends on it.
15262306a36Sopenharmony_ci */
15362306a36Sopenharmony_ci
15462306a36Sopenharmony_ci/* Private packet socket structures. */
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_ci/* identical to struct packet_mreq except it has
15762306a36Sopenharmony_ci * a longer address field.
15862306a36Sopenharmony_ci */
15962306a36Sopenharmony_cistruct packet_mreq_max {
16062306a36Sopenharmony_ci	int		mr_ifindex;
16162306a36Sopenharmony_ci	unsigned short	mr_type;
16262306a36Sopenharmony_ci	unsigned short	mr_alen;
16362306a36Sopenharmony_ci	unsigned char	mr_address[MAX_ADDR_LEN];
16462306a36Sopenharmony_ci};
16562306a36Sopenharmony_ci
16662306a36Sopenharmony_ciunion tpacket_uhdr {
16762306a36Sopenharmony_ci	struct tpacket_hdr  *h1;
16862306a36Sopenharmony_ci	struct tpacket2_hdr *h2;
16962306a36Sopenharmony_ci	struct tpacket3_hdr *h3;
17062306a36Sopenharmony_ci	void *raw;
17162306a36Sopenharmony_ci};
17262306a36Sopenharmony_ci
17362306a36Sopenharmony_cistatic int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
17462306a36Sopenharmony_ci		int closing, int tx_ring);
17562306a36Sopenharmony_ci
17662306a36Sopenharmony_ci#define V3_ALIGNMENT	(8)
17762306a36Sopenharmony_ci
17862306a36Sopenharmony_ci#define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
17962306a36Sopenharmony_ci
18062306a36Sopenharmony_ci#define BLK_PLUS_PRIV(sz_of_priv) \
18162306a36Sopenharmony_ci	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
18262306a36Sopenharmony_ci
18362306a36Sopenharmony_ci#define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)
18462306a36Sopenharmony_ci#define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)
18562306a36Sopenharmony_ci#define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)
18662306a36Sopenharmony_ci#define BLOCK_LEN(x)		((x)->hdr.bh1.blk_len)
18762306a36Sopenharmony_ci#define BLOCK_SNUM(x)		((x)->hdr.bh1.seq_num)
18862306a36Sopenharmony_ci#define BLOCK_O2PRIV(x)	((x)->offset_to_priv)
18962306a36Sopenharmony_ci
19062306a36Sopenharmony_cistruct packet_sock;
19162306a36Sopenharmony_cistatic int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
19262306a36Sopenharmony_ci		       struct packet_type *pt, struct net_device *orig_dev);
19362306a36Sopenharmony_ci
19462306a36Sopenharmony_cistatic void *packet_previous_frame(struct packet_sock *po,
19562306a36Sopenharmony_ci		struct packet_ring_buffer *rb,
19662306a36Sopenharmony_ci		int status);
19762306a36Sopenharmony_cistatic void packet_increment_head(struct packet_ring_buffer *buff);
19862306a36Sopenharmony_cistatic int prb_curr_blk_in_use(struct tpacket_block_desc *);
19962306a36Sopenharmony_cistatic void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
20062306a36Sopenharmony_ci			struct packet_sock *);
20162306a36Sopenharmony_cistatic void prb_retire_current_block(struct tpacket_kbdq_core *,
20262306a36Sopenharmony_ci		struct packet_sock *, unsigned int status);
20362306a36Sopenharmony_cistatic int prb_queue_frozen(struct tpacket_kbdq_core *);
20462306a36Sopenharmony_cistatic void prb_open_block(struct tpacket_kbdq_core *,
20562306a36Sopenharmony_ci		struct tpacket_block_desc *);
20662306a36Sopenharmony_cistatic void prb_retire_rx_blk_timer_expired(struct timer_list *);
20762306a36Sopenharmony_cistatic void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
20862306a36Sopenharmony_cistatic void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
20962306a36Sopenharmony_cistatic void prb_clear_rxhash(struct tpacket_kbdq_core *,
21062306a36Sopenharmony_ci		struct tpacket3_hdr *);
21162306a36Sopenharmony_cistatic void prb_fill_vlan_info(struct tpacket_kbdq_core *,
21262306a36Sopenharmony_ci		struct tpacket3_hdr *);
21362306a36Sopenharmony_cistatic void packet_flush_mclist(struct sock *sk);
21462306a36Sopenharmony_cistatic u16 packet_pick_tx_queue(struct sk_buff *skb);
21562306a36Sopenharmony_ci
21662306a36Sopenharmony_cistruct packet_skb_cb {
21762306a36Sopenharmony_ci	union {
21862306a36Sopenharmony_ci		struct sockaddr_pkt pkt;
21962306a36Sopenharmony_ci		union {
22062306a36Sopenharmony_ci			/* Trick: alias skb original length with
22162306a36Sopenharmony_ci			 * ll.sll_family and ll.protocol in order
22262306a36Sopenharmony_ci			 * to save room.
22362306a36Sopenharmony_ci			 */
22462306a36Sopenharmony_ci			unsigned int origlen;
22562306a36Sopenharmony_ci			struct sockaddr_ll ll;
22662306a36Sopenharmony_ci		};
22762306a36Sopenharmony_ci	} sa;
22862306a36Sopenharmony_ci};
22962306a36Sopenharmony_ci
23062306a36Sopenharmony_ci#define vio_le() virtio_legacy_is_little_endian()
23162306a36Sopenharmony_ci
23262306a36Sopenharmony_ci#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
23362306a36Sopenharmony_ci
23462306a36Sopenharmony_ci#define GET_PBDQC_FROM_RB(x)	((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
23562306a36Sopenharmony_ci#define GET_PBLOCK_DESC(x, bid)	\
23662306a36Sopenharmony_ci	((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
23762306a36Sopenharmony_ci#define GET_CURR_PBLOCK_DESC_FROM_CORE(x)	\
23862306a36Sopenharmony_ci	((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
23962306a36Sopenharmony_ci#define GET_NEXT_PRB_BLK_NUM(x) \
24062306a36Sopenharmony_ci	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
24162306a36Sopenharmony_ci	((x)->kactive_blk_num+1) : 0)
24262306a36Sopenharmony_ci
24362306a36Sopenharmony_cistatic void __fanout_unlink(struct sock *sk, struct packet_sock *po);
24462306a36Sopenharmony_cistatic void __fanout_link(struct sock *sk, struct packet_sock *po);
24562306a36Sopenharmony_ci
24662306a36Sopenharmony_ci#ifdef CONFIG_NETFILTER_EGRESS
24762306a36Sopenharmony_cistatic noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb)
24862306a36Sopenharmony_ci{
24962306a36Sopenharmony_ci	struct sk_buff *next, *head = NULL, *tail;
25062306a36Sopenharmony_ci	int rc;
25162306a36Sopenharmony_ci
25262306a36Sopenharmony_ci	rcu_read_lock();
25362306a36Sopenharmony_ci	for (; skb != NULL; skb = next) {
25462306a36Sopenharmony_ci		next = skb->next;
25562306a36Sopenharmony_ci		skb_mark_not_on_list(skb);
25662306a36Sopenharmony_ci
25762306a36Sopenharmony_ci		if (!nf_hook_egress(skb, &rc, skb->dev))
25862306a36Sopenharmony_ci			continue;
25962306a36Sopenharmony_ci
26062306a36Sopenharmony_ci		if (!head)
26162306a36Sopenharmony_ci			head = skb;
26262306a36Sopenharmony_ci		else
26362306a36Sopenharmony_ci			tail->next = skb;
26462306a36Sopenharmony_ci
26562306a36Sopenharmony_ci		tail = skb;
26662306a36Sopenharmony_ci	}
26762306a36Sopenharmony_ci	rcu_read_unlock();
26862306a36Sopenharmony_ci
26962306a36Sopenharmony_ci	return head;
27062306a36Sopenharmony_ci}
27162306a36Sopenharmony_ci#endif
27262306a36Sopenharmony_ci
27362306a36Sopenharmony_cistatic int packet_xmit(const struct packet_sock *po, struct sk_buff *skb)
27462306a36Sopenharmony_ci{
27562306a36Sopenharmony_ci	if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS))
27662306a36Sopenharmony_ci		return dev_queue_xmit(skb);
27762306a36Sopenharmony_ci
27862306a36Sopenharmony_ci#ifdef CONFIG_NETFILTER_EGRESS
27962306a36Sopenharmony_ci	if (nf_hook_egress_active()) {
28062306a36Sopenharmony_ci		skb = nf_hook_direct_egress(skb);
28162306a36Sopenharmony_ci		if (!skb)
28262306a36Sopenharmony_ci			return NET_XMIT_DROP;
28362306a36Sopenharmony_ci	}
28462306a36Sopenharmony_ci#endif
28562306a36Sopenharmony_ci	return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
28662306a36Sopenharmony_ci}
28762306a36Sopenharmony_ci
28862306a36Sopenharmony_cistatic struct net_device *packet_cached_dev_get(struct packet_sock *po)
28962306a36Sopenharmony_ci{
29062306a36Sopenharmony_ci	struct net_device *dev;
29162306a36Sopenharmony_ci
29262306a36Sopenharmony_ci	rcu_read_lock();
29362306a36Sopenharmony_ci	dev = rcu_dereference(po->cached_dev);
29462306a36Sopenharmony_ci	dev_hold(dev);
29562306a36Sopenharmony_ci	rcu_read_unlock();
29662306a36Sopenharmony_ci
29762306a36Sopenharmony_ci	return dev;
29862306a36Sopenharmony_ci}
29962306a36Sopenharmony_ci
30062306a36Sopenharmony_cistatic void packet_cached_dev_assign(struct packet_sock *po,
30162306a36Sopenharmony_ci				     struct net_device *dev)
30262306a36Sopenharmony_ci{
30362306a36Sopenharmony_ci	rcu_assign_pointer(po->cached_dev, dev);
30462306a36Sopenharmony_ci}
30562306a36Sopenharmony_ci
30662306a36Sopenharmony_cistatic void packet_cached_dev_reset(struct packet_sock *po)
30762306a36Sopenharmony_ci{
30862306a36Sopenharmony_ci	RCU_INIT_POINTER(po->cached_dev, NULL);
30962306a36Sopenharmony_ci}
31062306a36Sopenharmony_ci
31162306a36Sopenharmony_cistatic u16 packet_pick_tx_queue(struct sk_buff *skb)
31262306a36Sopenharmony_ci{
31362306a36Sopenharmony_ci	struct net_device *dev = skb->dev;
31462306a36Sopenharmony_ci	const struct net_device_ops *ops = dev->netdev_ops;
31562306a36Sopenharmony_ci	int cpu = raw_smp_processor_id();
31662306a36Sopenharmony_ci	u16 queue_index;
31762306a36Sopenharmony_ci
31862306a36Sopenharmony_ci#ifdef CONFIG_XPS
31962306a36Sopenharmony_ci	skb->sender_cpu = cpu + 1;
32062306a36Sopenharmony_ci#endif
32162306a36Sopenharmony_ci	skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
32262306a36Sopenharmony_ci	if (ops->ndo_select_queue) {
32362306a36Sopenharmony_ci		queue_index = ops->ndo_select_queue(dev, skb, NULL);
32462306a36Sopenharmony_ci		queue_index = netdev_cap_txqueue(dev, queue_index);
32562306a36Sopenharmony_ci	} else {
32662306a36Sopenharmony_ci		queue_index = netdev_pick_tx(dev, skb, NULL);
32762306a36Sopenharmony_ci	}
32862306a36Sopenharmony_ci
32962306a36Sopenharmony_ci	return queue_index;
33062306a36Sopenharmony_ci}
33162306a36Sopenharmony_ci
33262306a36Sopenharmony_ci/* __register_prot_hook must be invoked through register_prot_hook
33362306a36Sopenharmony_ci * or from a context in which asynchronous accesses to the packet
33462306a36Sopenharmony_ci * socket is not possible (packet_create()).
33562306a36Sopenharmony_ci */
33662306a36Sopenharmony_cistatic void __register_prot_hook(struct sock *sk)
33762306a36Sopenharmony_ci{
33862306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
33962306a36Sopenharmony_ci
34062306a36Sopenharmony_ci	if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
34162306a36Sopenharmony_ci		if (po->fanout)
34262306a36Sopenharmony_ci			__fanout_link(sk, po);
34362306a36Sopenharmony_ci		else
34462306a36Sopenharmony_ci			dev_add_pack(&po->prot_hook);
34562306a36Sopenharmony_ci
34662306a36Sopenharmony_ci		sock_hold(sk);
34762306a36Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1);
34862306a36Sopenharmony_ci	}
34962306a36Sopenharmony_ci}
35062306a36Sopenharmony_ci
35162306a36Sopenharmony_cistatic void register_prot_hook(struct sock *sk)
35262306a36Sopenharmony_ci{
35362306a36Sopenharmony_ci	lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
35462306a36Sopenharmony_ci	__register_prot_hook(sk);
35562306a36Sopenharmony_ci}
35662306a36Sopenharmony_ci
35762306a36Sopenharmony_ci/* If the sync parameter is true, we will temporarily drop
35862306a36Sopenharmony_ci * the po->bind_lock and do a synchronize_net to make sure no
35962306a36Sopenharmony_ci * asynchronous packet processing paths still refer to the elements
36062306a36Sopenharmony_ci * of po->prot_hook.  If the sync parameter is false, it is the
36162306a36Sopenharmony_ci * callers responsibility to take care of this.
36262306a36Sopenharmony_ci */
36362306a36Sopenharmony_cistatic void __unregister_prot_hook(struct sock *sk, bool sync)
36462306a36Sopenharmony_ci{
36562306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
36662306a36Sopenharmony_ci
36762306a36Sopenharmony_ci	lockdep_assert_held_once(&po->bind_lock);
36862306a36Sopenharmony_ci
36962306a36Sopenharmony_ci	packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0);
37062306a36Sopenharmony_ci
37162306a36Sopenharmony_ci	if (po->fanout)
37262306a36Sopenharmony_ci		__fanout_unlink(sk, po);
37362306a36Sopenharmony_ci	else
37462306a36Sopenharmony_ci		__dev_remove_pack(&po->prot_hook);
37562306a36Sopenharmony_ci
37662306a36Sopenharmony_ci	__sock_put(sk);
37762306a36Sopenharmony_ci
37862306a36Sopenharmony_ci	if (sync) {
37962306a36Sopenharmony_ci		spin_unlock(&po->bind_lock);
38062306a36Sopenharmony_ci		synchronize_net();
38162306a36Sopenharmony_ci		spin_lock(&po->bind_lock);
38262306a36Sopenharmony_ci	}
38362306a36Sopenharmony_ci}
38462306a36Sopenharmony_ci
38562306a36Sopenharmony_cistatic void unregister_prot_hook(struct sock *sk, bool sync)
38662306a36Sopenharmony_ci{
38762306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
38862306a36Sopenharmony_ci
38962306a36Sopenharmony_ci	if (packet_sock_flag(po, PACKET_SOCK_RUNNING))
39062306a36Sopenharmony_ci		__unregister_prot_hook(sk, sync);
39162306a36Sopenharmony_ci}
39262306a36Sopenharmony_ci
39362306a36Sopenharmony_cistatic inline struct page * __pure pgv_to_page(void *addr)
39462306a36Sopenharmony_ci{
39562306a36Sopenharmony_ci	if (is_vmalloc_addr(addr))
39662306a36Sopenharmony_ci		return vmalloc_to_page(addr);
39762306a36Sopenharmony_ci	return virt_to_page(addr);
39862306a36Sopenharmony_ci}
39962306a36Sopenharmony_ci
40062306a36Sopenharmony_cistatic void __packet_set_status(struct packet_sock *po, void *frame, int status)
40162306a36Sopenharmony_ci{
40262306a36Sopenharmony_ci	union tpacket_uhdr h;
40362306a36Sopenharmony_ci
40462306a36Sopenharmony_ci	/* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
40562306a36Sopenharmony_ci
40662306a36Sopenharmony_ci	h.raw = frame;
40762306a36Sopenharmony_ci	switch (po->tp_version) {
40862306a36Sopenharmony_ci	case TPACKET_V1:
40962306a36Sopenharmony_ci		WRITE_ONCE(h.h1->tp_status, status);
41062306a36Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
41162306a36Sopenharmony_ci		break;
41262306a36Sopenharmony_ci	case TPACKET_V2:
41362306a36Sopenharmony_ci		WRITE_ONCE(h.h2->tp_status, status);
41462306a36Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
41562306a36Sopenharmony_ci		break;
41662306a36Sopenharmony_ci	case TPACKET_V3:
41762306a36Sopenharmony_ci		WRITE_ONCE(h.h3->tp_status, status);
41862306a36Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h3->tp_status));
41962306a36Sopenharmony_ci		break;
42062306a36Sopenharmony_ci	default:
42162306a36Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
42262306a36Sopenharmony_ci		BUG();
42362306a36Sopenharmony_ci	}
42462306a36Sopenharmony_ci
42562306a36Sopenharmony_ci	smp_wmb();
42662306a36Sopenharmony_ci}
42762306a36Sopenharmony_ci
42862306a36Sopenharmony_cistatic int __packet_get_status(const struct packet_sock *po, void *frame)
42962306a36Sopenharmony_ci{
43062306a36Sopenharmony_ci	union tpacket_uhdr h;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	smp_rmb();
43362306a36Sopenharmony_ci
43462306a36Sopenharmony_ci	/* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
43562306a36Sopenharmony_ci
43662306a36Sopenharmony_ci	h.raw = frame;
43762306a36Sopenharmony_ci	switch (po->tp_version) {
43862306a36Sopenharmony_ci	case TPACKET_V1:
43962306a36Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
44062306a36Sopenharmony_ci		return READ_ONCE(h.h1->tp_status);
44162306a36Sopenharmony_ci	case TPACKET_V2:
44262306a36Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
44362306a36Sopenharmony_ci		return READ_ONCE(h.h2->tp_status);
44462306a36Sopenharmony_ci	case TPACKET_V3:
44562306a36Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h3->tp_status));
44662306a36Sopenharmony_ci		return READ_ONCE(h.h3->tp_status);
44762306a36Sopenharmony_ci	default:
44862306a36Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
44962306a36Sopenharmony_ci		BUG();
45062306a36Sopenharmony_ci		return 0;
45162306a36Sopenharmony_ci	}
45262306a36Sopenharmony_ci}
45362306a36Sopenharmony_ci
45462306a36Sopenharmony_cistatic __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
45562306a36Sopenharmony_ci				   unsigned int flags)
45662306a36Sopenharmony_ci{
45762306a36Sopenharmony_ci	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
45862306a36Sopenharmony_ci
45962306a36Sopenharmony_ci	if (shhwtstamps &&
46062306a36Sopenharmony_ci	    (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
46162306a36Sopenharmony_ci	    ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
46262306a36Sopenharmony_ci		return TP_STATUS_TS_RAW_HARDWARE;
46362306a36Sopenharmony_ci
46462306a36Sopenharmony_ci	if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
46562306a36Sopenharmony_ci	    ktime_to_timespec64_cond(skb_tstamp(skb), ts))
46662306a36Sopenharmony_ci		return TP_STATUS_TS_SOFTWARE;
46762306a36Sopenharmony_ci
46862306a36Sopenharmony_ci	return 0;
46962306a36Sopenharmony_ci}
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_cistatic __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
47262306a36Sopenharmony_ci				    struct sk_buff *skb)
47362306a36Sopenharmony_ci{
47462306a36Sopenharmony_ci	union tpacket_uhdr h;
47562306a36Sopenharmony_ci	struct timespec64 ts;
47662306a36Sopenharmony_ci	__u32 ts_status;
47762306a36Sopenharmony_ci
47862306a36Sopenharmony_ci	if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp))))
47962306a36Sopenharmony_ci		return 0;
48062306a36Sopenharmony_ci
48162306a36Sopenharmony_ci	h.raw = frame;
48262306a36Sopenharmony_ci	/*
48362306a36Sopenharmony_ci	 * versions 1 through 3 overflow the timestamps in y2106, since they
48462306a36Sopenharmony_ci	 * all store the seconds in a 32-bit unsigned integer.
48562306a36Sopenharmony_ci	 * If we create a version 4, that should have a 64-bit timestamp,
48662306a36Sopenharmony_ci	 * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
48762306a36Sopenharmony_ci	 * nanoseconds.
48862306a36Sopenharmony_ci	 */
48962306a36Sopenharmony_ci	switch (po->tp_version) {
49062306a36Sopenharmony_ci	case TPACKET_V1:
49162306a36Sopenharmony_ci		h.h1->tp_sec = ts.tv_sec;
49262306a36Sopenharmony_ci		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
49362306a36Sopenharmony_ci		break;
49462306a36Sopenharmony_ci	case TPACKET_V2:
49562306a36Sopenharmony_ci		h.h2->tp_sec = ts.tv_sec;
49662306a36Sopenharmony_ci		h.h2->tp_nsec = ts.tv_nsec;
49762306a36Sopenharmony_ci		break;
49862306a36Sopenharmony_ci	case TPACKET_V3:
49962306a36Sopenharmony_ci		h.h3->tp_sec = ts.tv_sec;
50062306a36Sopenharmony_ci		h.h3->tp_nsec = ts.tv_nsec;
50162306a36Sopenharmony_ci		break;
50262306a36Sopenharmony_ci	default:
50362306a36Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
50462306a36Sopenharmony_ci		BUG();
50562306a36Sopenharmony_ci	}
50662306a36Sopenharmony_ci
50762306a36Sopenharmony_ci	/* one flush is safe, as both fields always lie on the same cacheline */
50862306a36Sopenharmony_ci	flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
50962306a36Sopenharmony_ci	smp_wmb();
51062306a36Sopenharmony_ci
51162306a36Sopenharmony_ci	return ts_status;
51262306a36Sopenharmony_ci}
51362306a36Sopenharmony_ci
51462306a36Sopenharmony_cistatic void *packet_lookup_frame(const struct packet_sock *po,
51562306a36Sopenharmony_ci				 const struct packet_ring_buffer *rb,
51662306a36Sopenharmony_ci				 unsigned int position,
51762306a36Sopenharmony_ci				 int status)
51862306a36Sopenharmony_ci{
51962306a36Sopenharmony_ci	unsigned int pg_vec_pos, frame_offset;
52062306a36Sopenharmony_ci	union tpacket_uhdr h;
52162306a36Sopenharmony_ci
52262306a36Sopenharmony_ci	pg_vec_pos = position / rb->frames_per_block;
52362306a36Sopenharmony_ci	frame_offset = position % rb->frames_per_block;
52462306a36Sopenharmony_ci
52562306a36Sopenharmony_ci	h.raw = rb->pg_vec[pg_vec_pos].buffer +
52662306a36Sopenharmony_ci		(frame_offset * rb->frame_size);
52762306a36Sopenharmony_ci
52862306a36Sopenharmony_ci	if (status != __packet_get_status(po, h.raw))
52962306a36Sopenharmony_ci		return NULL;
53062306a36Sopenharmony_ci
53162306a36Sopenharmony_ci	return h.raw;
53262306a36Sopenharmony_ci}
53362306a36Sopenharmony_ci
53462306a36Sopenharmony_cistatic void *packet_current_frame(struct packet_sock *po,
53562306a36Sopenharmony_ci		struct packet_ring_buffer *rb,
53662306a36Sopenharmony_ci		int status)
53762306a36Sopenharmony_ci{
53862306a36Sopenharmony_ci	return packet_lookup_frame(po, rb, rb->head, status);
53962306a36Sopenharmony_ci}
54062306a36Sopenharmony_ci
54162306a36Sopenharmony_cistatic void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
54262306a36Sopenharmony_ci{
54362306a36Sopenharmony_ci	del_timer_sync(&pkc->retire_blk_timer);
54462306a36Sopenharmony_ci}
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_cistatic void prb_shutdown_retire_blk_timer(struct packet_sock *po,
54762306a36Sopenharmony_ci		struct sk_buff_head *rb_queue)
54862306a36Sopenharmony_ci{
54962306a36Sopenharmony_ci	struct tpacket_kbdq_core *pkc;
55062306a36Sopenharmony_ci
55162306a36Sopenharmony_ci	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
55262306a36Sopenharmony_ci
55362306a36Sopenharmony_ci	spin_lock_bh(&rb_queue->lock);
55462306a36Sopenharmony_ci	pkc->delete_blk_timer = 1;
55562306a36Sopenharmony_ci	spin_unlock_bh(&rb_queue->lock);
55662306a36Sopenharmony_ci
55762306a36Sopenharmony_ci	prb_del_retire_blk_timer(pkc);
55862306a36Sopenharmony_ci}
55962306a36Sopenharmony_ci
56062306a36Sopenharmony_cistatic void prb_setup_retire_blk_timer(struct packet_sock *po)
56162306a36Sopenharmony_ci{
56262306a36Sopenharmony_ci	struct tpacket_kbdq_core *pkc;
56362306a36Sopenharmony_ci
56462306a36Sopenharmony_ci	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
56562306a36Sopenharmony_ci	timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
56662306a36Sopenharmony_ci		    0);
56762306a36Sopenharmony_ci	pkc->retire_blk_timer.expires = jiffies;
56862306a36Sopenharmony_ci}
56962306a36Sopenharmony_ci
57062306a36Sopenharmony_cistatic int prb_calc_retire_blk_tmo(struct packet_sock *po,
57162306a36Sopenharmony_ci				int blk_size_in_bytes)
57262306a36Sopenharmony_ci{
57362306a36Sopenharmony_ci	struct net_device *dev;
57462306a36Sopenharmony_ci	unsigned int mbits, div;
57562306a36Sopenharmony_ci	struct ethtool_link_ksettings ecmd;
57662306a36Sopenharmony_ci	int err;
57762306a36Sopenharmony_ci
57862306a36Sopenharmony_ci	rtnl_lock();
57962306a36Sopenharmony_ci	dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
58062306a36Sopenharmony_ci	if (unlikely(!dev)) {
58162306a36Sopenharmony_ci		rtnl_unlock();
58262306a36Sopenharmony_ci		return DEFAULT_PRB_RETIRE_TOV;
58362306a36Sopenharmony_ci	}
58462306a36Sopenharmony_ci	err = __ethtool_get_link_ksettings(dev, &ecmd);
58562306a36Sopenharmony_ci	rtnl_unlock();
58662306a36Sopenharmony_ci	if (err)
58762306a36Sopenharmony_ci		return DEFAULT_PRB_RETIRE_TOV;
58862306a36Sopenharmony_ci
58962306a36Sopenharmony_ci	/* If the link speed is so slow you don't really
59062306a36Sopenharmony_ci	 * need to worry about perf anyways
59162306a36Sopenharmony_ci	 */
59262306a36Sopenharmony_ci	if (ecmd.base.speed < SPEED_1000 ||
59362306a36Sopenharmony_ci	    ecmd.base.speed == SPEED_UNKNOWN)
59462306a36Sopenharmony_ci		return DEFAULT_PRB_RETIRE_TOV;
59562306a36Sopenharmony_ci
59662306a36Sopenharmony_ci	div = ecmd.base.speed / 1000;
59762306a36Sopenharmony_ci	mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
59862306a36Sopenharmony_ci
59962306a36Sopenharmony_ci	if (div)
60062306a36Sopenharmony_ci		mbits /= div;
60162306a36Sopenharmony_ci
60262306a36Sopenharmony_ci	if (div)
60362306a36Sopenharmony_ci		return mbits + 1;
60462306a36Sopenharmony_ci	return mbits;
60562306a36Sopenharmony_ci}
60662306a36Sopenharmony_ci
60762306a36Sopenharmony_cistatic void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
60862306a36Sopenharmony_ci			union tpacket_req_u *req_u)
60962306a36Sopenharmony_ci{
61062306a36Sopenharmony_ci	p1->feature_req_word = req_u->req3.tp_feature_req_word;
61162306a36Sopenharmony_ci}
61262306a36Sopenharmony_ci
61362306a36Sopenharmony_cistatic void init_prb_bdqc(struct packet_sock *po,
61462306a36Sopenharmony_ci			struct packet_ring_buffer *rb,
61562306a36Sopenharmony_ci			struct pgv *pg_vec,
61662306a36Sopenharmony_ci			union tpacket_req_u *req_u)
61762306a36Sopenharmony_ci{
61862306a36Sopenharmony_ci	struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
61962306a36Sopenharmony_ci	struct tpacket_block_desc *pbd;
62062306a36Sopenharmony_ci
62162306a36Sopenharmony_ci	memset(p1, 0x0, sizeof(*p1));
62262306a36Sopenharmony_ci
62362306a36Sopenharmony_ci	p1->knxt_seq_num = 1;
62462306a36Sopenharmony_ci	p1->pkbdq = pg_vec;
62562306a36Sopenharmony_ci	pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
62662306a36Sopenharmony_ci	p1->pkblk_start	= pg_vec[0].buffer;
62762306a36Sopenharmony_ci	p1->kblk_size = req_u->req3.tp_block_size;
62862306a36Sopenharmony_ci	p1->knum_blocks	= req_u->req3.tp_block_nr;
62962306a36Sopenharmony_ci	p1->hdrlen = po->tp_hdrlen;
63062306a36Sopenharmony_ci	p1->version = po->tp_version;
63162306a36Sopenharmony_ci	p1->last_kactive_blk_num = 0;
63262306a36Sopenharmony_ci	po->stats.stats3.tp_freeze_q_cnt = 0;
63362306a36Sopenharmony_ci	if (req_u->req3.tp_retire_blk_tov)
63462306a36Sopenharmony_ci		p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
63562306a36Sopenharmony_ci	else
63662306a36Sopenharmony_ci		p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
63762306a36Sopenharmony_ci						req_u->req3.tp_block_size);
63862306a36Sopenharmony_ci	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
63962306a36Sopenharmony_ci	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
64062306a36Sopenharmony_ci	rwlock_init(&p1->blk_fill_in_prog_lock);
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci	p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
64362306a36Sopenharmony_ci	prb_init_ft_ops(p1, req_u);
64462306a36Sopenharmony_ci	prb_setup_retire_blk_timer(po);
64562306a36Sopenharmony_ci	prb_open_block(p1, pbd);
64662306a36Sopenharmony_ci}
64762306a36Sopenharmony_ci
64862306a36Sopenharmony_ci/*  Do NOT update the last_blk_num first.
64962306a36Sopenharmony_ci *  Assumes sk_buff_head lock is held.
65062306a36Sopenharmony_ci */
65162306a36Sopenharmony_cistatic void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
65262306a36Sopenharmony_ci{
65362306a36Sopenharmony_ci	mod_timer(&pkc->retire_blk_timer,
65462306a36Sopenharmony_ci			jiffies + pkc->tov_in_jiffies);
65562306a36Sopenharmony_ci	pkc->last_kactive_blk_num = pkc->kactive_blk_num;
65662306a36Sopenharmony_ci}
65762306a36Sopenharmony_ci
65862306a36Sopenharmony_ci/*
65962306a36Sopenharmony_ci * Timer logic:
66062306a36Sopenharmony_ci * 1) We refresh the timer only when we open a block.
66162306a36Sopenharmony_ci *    By doing this we don't waste cycles refreshing the timer
66262306a36Sopenharmony_ci *	  on packet-by-packet basis.
66362306a36Sopenharmony_ci *
66462306a36Sopenharmony_ci * With a 1MB block-size, on a 1Gbps line, it will take
66562306a36Sopenharmony_ci * i) ~8 ms to fill a block + ii) memcpy etc.
66662306a36Sopenharmony_ci * In this cut we are not accounting for the memcpy time.
66762306a36Sopenharmony_ci *
66862306a36Sopenharmony_ci * So, if the user sets the 'tmo' to 10ms then the timer
66962306a36Sopenharmony_ci * will never fire while the block is still getting filled
67062306a36Sopenharmony_ci * (which is what we want). However, the user could choose
67162306a36Sopenharmony_ci * to close a block early and that's fine.
67262306a36Sopenharmony_ci *
67362306a36Sopenharmony_ci * But when the timer does fire, we check whether or not to refresh it.
67462306a36Sopenharmony_ci * Since the tmo granularity is in msecs, it is not too expensive
67562306a36Sopenharmony_ci * to refresh the timer, lets say every '8' msecs.
67662306a36Sopenharmony_ci * Either the user can set the 'tmo' or we can derive it based on
67762306a36Sopenharmony_ci * a) line-speed and b) block-size.
67862306a36Sopenharmony_ci * prb_calc_retire_blk_tmo() calculates the tmo.
67962306a36Sopenharmony_ci *
68062306a36Sopenharmony_ci */
68162306a36Sopenharmony_cistatic void prb_retire_rx_blk_timer_expired(struct timer_list *t)
68262306a36Sopenharmony_ci{
68362306a36Sopenharmony_ci	struct packet_sock *po =
68462306a36Sopenharmony_ci		from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
68562306a36Sopenharmony_ci	struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
68662306a36Sopenharmony_ci	unsigned int frozen;
68762306a36Sopenharmony_ci	struct tpacket_block_desc *pbd;
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_ci	spin_lock(&po->sk.sk_receive_queue.lock);
69062306a36Sopenharmony_ci
69162306a36Sopenharmony_ci	frozen = prb_queue_frozen(pkc);
69262306a36Sopenharmony_ci	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
69362306a36Sopenharmony_ci
69462306a36Sopenharmony_ci	if (unlikely(pkc->delete_blk_timer))
69562306a36Sopenharmony_ci		goto out;
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ci	/* We only need to plug the race when the block is partially filled.
69862306a36Sopenharmony_ci	 * tpacket_rcv:
69962306a36Sopenharmony_ci	 *		lock(); increment BLOCK_NUM_PKTS; unlock()
70062306a36Sopenharmony_ci	 *		copy_bits() is in progress ...
70162306a36Sopenharmony_ci	 *		timer fires on other cpu:
70262306a36Sopenharmony_ci	 *		we can't retire the current block because copy_bits
70362306a36Sopenharmony_ci	 *		is in progress.
70462306a36Sopenharmony_ci	 *
70562306a36Sopenharmony_ci	 */
70662306a36Sopenharmony_ci	if (BLOCK_NUM_PKTS(pbd)) {
70762306a36Sopenharmony_ci		/* Waiting for skb_copy_bits to finish... */
70862306a36Sopenharmony_ci		write_lock(&pkc->blk_fill_in_prog_lock);
70962306a36Sopenharmony_ci		write_unlock(&pkc->blk_fill_in_prog_lock);
71062306a36Sopenharmony_ci	}
71162306a36Sopenharmony_ci
71262306a36Sopenharmony_ci	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
71362306a36Sopenharmony_ci		if (!frozen) {
71462306a36Sopenharmony_ci			if (!BLOCK_NUM_PKTS(pbd)) {
71562306a36Sopenharmony_ci				/* An empty block. Just refresh the timer. */
71662306a36Sopenharmony_ci				goto refresh_timer;
71762306a36Sopenharmony_ci			}
71862306a36Sopenharmony_ci			prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
71962306a36Sopenharmony_ci			if (!prb_dispatch_next_block(pkc, po))
72062306a36Sopenharmony_ci				goto refresh_timer;
72162306a36Sopenharmony_ci			else
72262306a36Sopenharmony_ci				goto out;
72362306a36Sopenharmony_ci		} else {
72462306a36Sopenharmony_ci			/* Case 1. Queue was frozen because user-space was
72562306a36Sopenharmony_ci			 *	   lagging behind.
72662306a36Sopenharmony_ci			 */
72762306a36Sopenharmony_ci			if (prb_curr_blk_in_use(pbd)) {
72862306a36Sopenharmony_ci				/*
72962306a36Sopenharmony_ci				 * Ok, user-space is still behind.
73062306a36Sopenharmony_ci				 * So just refresh the timer.
73162306a36Sopenharmony_ci				 */
73262306a36Sopenharmony_ci				goto refresh_timer;
73362306a36Sopenharmony_ci			} else {
73462306a36Sopenharmony_ci			       /* Case 2. queue was frozen,user-space caught up,
73562306a36Sopenharmony_ci				* now the link went idle && the timer fired.
73662306a36Sopenharmony_ci				* We don't have a block to close.So we open this
73762306a36Sopenharmony_ci				* block and restart the timer.
73862306a36Sopenharmony_ci				* opening a block thaws the queue,restarts timer
73962306a36Sopenharmony_ci				* Thawing/timer-refresh is a side effect.
74062306a36Sopenharmony_ci				*/
74162306a36Sopenharmony_ci				prb_open_block(pkc, pbd);
74262306a36Sopenharmony_ci				goto out;
74362306a36Sopenharmony_ci			}
74462306a36Sopenharmony_ci		}
74562306a36Sopenharmony_ci	}
74662306a36Sopenharmony_ci
74762306a36Sopenharmony_cirefresh_timer:
74862306a36Sopenharmony_ci	_prb_refresh_rx_retire_blk_timer(pkc);
74962306a36Sopenharmony_ci
75062306a36Sopenharmony_ciout:
75162306a36Sopenharmony_ci	spin_unlock(&po->sk.sk_receive_queue.lock);
75262306a36Sopenharmony_ci}
75362306a36Sopenharmony_ci
75462306a36Sopenharmony_cistatic void prb_flush_block(struct tpacket_kbdq_core *pkc1,
75562306a36Sopenharmony_ci		struct tpacket_block_desc *pbd1, __u32 status)
75662306a36Sopenharmony_ci{
75762306a36Sopenharmony_ci	/* Flush everything minus the block header */
75862306a36Sopenharmony_ci
75962306a36Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
76062306a36Sopenharmony_ci	u8 *start, *end;
76162306a36Sopenharmony_ci
76262306a36Sopenharmony_ci	start = (u8 *)pbd1;
76362306a36Sopenharmony_ci
76462306a36Sopenharmony_ci	/* Skip the block header(we know header WILL fit in 4K) */
76562306a36Sopenharmony_ci	start += PAGE_SIZE;
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
76862306a36Sopenharmony_ci	for (; start < end; start += PAGE_SIZE)
76962306a36Sopenharmony_ci		flush_dcache_page(pgv_to_page(start));
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ci	smp_wmb();
77262306a36Sopenharmony_ci#endif
77362306a36Sopenharmony_ci
77462306a36Sopenharmony_ci	/* Now update the block status. */
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	BLOCK_STATUS(pbd1) = status;
77762306a36Sopenharmony_ci
77862306a36Sopenharmony_ci	/* Flush the block header */
77962306a36Sopenharmony_ci
78062306a36Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
78162306a36Sopenharmony_ci	start = (u8 *)pbd1;
78262306a36Sopenharmony_ci	flush_dcache_page(pgv_to_page(start));
78362306a36Sopenharmony_ci
78462306a36Sopenharmony_ci	smp_wmb();
78562306a36Sopenharmony_ci#endif
78662306a36Sopenharmony_ci}
78762306a36Sopenharmony_ci
78862306a36Sopenharmony_ci/*
78962306a36Sopenharmony_ci * Side effect:
79062306a36Sopenharmony_ci *
79162306a36Sopenharmony_ci * 1) flush the block
79262306a36Sopenharmony_ci * 2) Increment active_blk_num
79362306a36Sopenharmony_ci *
79462306a36Sopenharmony_ci * Note:We DONT refresh the timer on purpose.
79562306a36Sopenharmony_ci *	Because almost always the next block will be opened.
79662306a36Sopenharmony_ci */
79762306a36Sopenharmony_cistatic void prb_close_block(struct tpacket_kbdq_core *pkc1,
79862306a36Sopenharmony_ci		struct tpacket_block_desc *pbd1,
79962306a36Sopenharmony_ci		struct packet_sock *po, unsigned int stat)
80062306a36Sopenharmony_ci{
80162306a36Sopenharmony_ci	__u32 status = TP_STATUS_USER | stat;
80262306a36Sopenharmony_ci
80362306a36Sopenharmony_ci	struct tpacket3_hdr *last_pkt;
80462306a36Sopenharmony_ci	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
80562306a36Sopenharmony_ci	struct sock *sk = &po->sk;
80662306a36Sopenharmony_ci
80762306a36Sopenharmony_ci	if (atomic_read(&po->tp_drops))
80862306a36Sopenharmony_ci		status |= TP_STATUS_LOSING;
80962306a36Sopenharmony_ci
81062306a36Sopenharmony_ci	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
81162306a36Sopenharmony_ci	last_pkt->tp_next_offset = 0;
81262306a36Sopenharmony_ci
81362306a36Sopenharmony_ci	/* Get the ts of the last pkt */
81462306a36Sopenharmony_ci	if (BLOCK_NUM_PKTS(pbd1)) {
81562306a36Sopenharmony_ci		h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
81662306a36Sopenharmony_ci		h1->ts_last_pkt.ts_nsec	= last_pkt->tp_nsec;
81762306a36Sopenharmony_ci	} else {
81862306a36Sopenharmony_ci		/* Ok, we tmo'd - so get the current time.
81962306a36Sopenharmony_ci		 *
82062306a36Sopenharmony_ci		 * It shouldn't really happen as we don't close empty
82162306a36Sopenharmony_ci		 * blocks. See prb_retire_rx_blk_timer_expired().
82262306a36Sopenharmony_ci		 */
82362306a36Sopenharmony_ci		struct timespec64 ts;
82462306a36Sopenharmony_ci		ktime_get_real_ts64(&ts);
82562306a36Sopenharmony_ci		h1->ts_last_pkt.ts_sec = ts.tv_sec;
82662306a36Sopenharmony_ci		h1->ts_last_pkt.ts_nsec	= ts.tv_nsec;
82762306a36Sopenharmony_ci	}
82862306a36Sopenharmony_ci
82962306a36Sopenharmony_ci	smp_wmb();
83062306a36Sopenharmony_ci
83162306a36Sopenharmony_ci	/* Flush the block */
83262306a36Sopenharmony_ci	prb_flush_block(pkc1, pbd1, status);
83362306a36Sopenharmony_ci
83462306a36Sopenharmony_ci	sk->sk_data_ready(sk);
83562306a36Sopenharmony_ci
83662306a36Sopenharmony_ci	pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
83762306a36Sopenharmony_ci}
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_cistatic void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
84062306a36Sopenharmony_ci{
84162306a36Sopenharmony_ci	pkc->reset_pending_on_curr_blk = 0;
84262306a36Sopenharmony_ci}
84362306a36Sopenharmony_ci
84462306a36Sopenharmony_ci/*
84562306a36Sopenharmony_ci * Side effect of opening a block:
84662306a36Sopenharmony_ci *
84762306a36Sopenharmony_ci * 1) prb_queue is thawed.
84862306a36Sopenharmony_ci * 2) retire_blk_timer is refreshed.
84962306a36Sopenharmony_ci *
85062306a36Sopenharmony_ci */
85162306a36Sopenharmony_cistatic void prb_open_block(struct tpacket_kbdq_core *pkc1,
85262306a36Sopenharmony_ci	struct tpacket_block_desc *pbd1)
85362306a36Sopenharmony_ci{
85462306a36Sopenharmony_ci	struct timespec64 ts;
85562306a36Sopenharmony_ci	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	smp_rmb();
85862306a36Sopenharmony_ci
85962306a36Sopenharmony_ci	/* We could have just memset this but we will lose the
86062306a36Sopenharmony_ci	 * flexibility of making the priv area sticky
86162306a36Sopenharmony_ci	 */
86262306a36Sopenharmony_ci
86362306a36Sopenharmony_ci	BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
86462306a36Sopenharmony_ci	BLOCK_NUM_PKTS(pbd1) = 0;
86562306a36Sopenharmony_ci	BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	ktime_get_real_ts64(&ts);
86862306a36Sopenharmony_ci
86962306a36Sopenharmony_ci	h1->ts_first_pkt.ts_sec = ts.tv_sec;
87062306a36Sopenharmony_ci	h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
87162306a36Sopenharmony_ci
87262306a36Sopenharmony_ci	pkc1->pkblk_start = (char *)pbd1;
87362306a36Sopenharmony_ci	pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
87462306a36Sopenharmony_ci
87562306a36Sopenharmony_ci	BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
87662306a36Sopenharmony_ci	BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci	pbd1->version = pkc1->version;
87962306a36Sopenharmony_ci	pkc1->prev = pkc1->nxt_offset;
88062306a36Sopenharmony_ci	pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
88162306a36Sopenharmony_ci
88262306a36Sopenharmony_ci	prb_thaw_queue(pkc1);
88362306a36Sopenharmony_ci	_prb_refresh_rx_retire_blk_timer(pkc1);
88462306a36Sopenharmony_ci
88562306a36Sopenharmony_ci	smp_wmb();
88662306a36Sopenharmony_ci}
88762306a36Sopenharmony_ci
88862306a36Sopenharmony_ci/*
88962306a36Sopenharmony_ci * Queue freeze logic:
89062306a36Sopenharmony_ci * 1) Assume tp_block_nr = 8 blocks.
89162306a36Sopenharmony_ci * 2) At time 't0', user opens Rx ring.
89262306a36Sopenharmony_ci * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
89362306a36Sopenharmony_ci * 4) user-space is either sleeping or processing block '0'.
89462306a36Sopenharmony_ci * 5) tpacket_rcv is currently filling block '7', since there is no space left,
89562306a36Sopenharmony_ci *    it will close block-7,loop around and try to fill block '0'.
89662306a36Sopenharmony_ci *    call-flow:
89762306a36Sopenharmony_ci *    __packet_lookup_frame_in_block
89862306a36Sopenharmony_ci *      prb_retire_current_block()
89962306a36Sopenharmony_ci *      prb_dispatch_next_block()
90062306a36Sopenharmony_ci *        |->(BLOCK_STATUS == USER) evaluates to true
90162306a36Sopenharmony_ci *    5.1) Since block-0 is currently in-use, we just freeze the queue.
90262306a36Sopenharmony_ci * 6) Now there are two cases:
90362306a36Sopenharmony_ci *    6.1) Link goes idle right after the queue is frozen.
90462306a36Sopenharmony_ci *         But remember, the last open_block() refreshed the timer.
90562306a36Sopenharmony_ci *         When this timer expires,it will refresh itself so that we can
90662306a36Sopenharmony_ci *         re-open block-0 in near future.
90762306a36Sopenharmony_ci *    6.2) Link is busy and keeps on receiving packets. This is a simple
90862306a36Sopenharmony_ci *         case and __packet_lookup_frame_in_block will check if block-0
90962306a36Sopenharmony_ci *         is free and can now be re-used.
91062306a36Sopenharmony_ci */
91162306a36Sopenharmony_cistatic void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
91262306a36Sopenharmony_ci				  struct packet_sock *po)
91362306a36Sopenharmony_ci{
91462306a36Sopenharmony_ci	pkc->reset_pending_on_curr_blk = 1;
91562306a36Sopenharmony_ci	po->stats.stats3.tp_freeze_q_cnt++;
91662306a36Sopenharmony_ci}
91762306a36Sopenharmony_ci
91862306a36Sopenharmony_ci#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
91962306a36Sopenharmony_ci
92062306a36Sopenharmony_ci/*
92162306a36Sopenharmony_ci * If the next block is free then we will dispatch it
92262306a36Sopenharmony_ci * and return a good offset.
92362306a36Sopenharmony_ci * Else, we will freeze the queue.
92462306a36Sopenharmony_ci * So, caller must check the return value.
92562306a36Sopenharmony_ci */
92662306a36Sopenharmony_cistatic void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
92762306a36Sopenharmony_ci		struct packet_sock *po)
92862306a36Sopenharmony_ci{
92962306a36Sopenharmony_ci	struct tpacket_block_desc *pbd;
93062306a36Sopenharmony_ci
93162306a36Sopenharmony_ci	smp_rmb();
93262306a36Sopenharmony_ci
93362306a36Sopenharmony_ci	/* 1. Get current block num */
93462306a36Sopenharmony_ci	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
93562306a36Sopenharmony_ci
93662306a36Sopenharmony_ci	/* 2. If this block is currently in_use then freeze the queue */
93762306a36Sopenharmony_ci	if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
93862306a36Sopenharmony_ci		prb_freeze_queue(pkc, po);
93962306a36Sopenharmony_ci		return NULL;
94062306a36Sopenharmony_ci	}
94162306a36Sopenharmony_ci
94262306a36Sopenharmony_ci	/*
94362306a36Sopenharmony_ci	 * 3.
94462306a36Sopenharmony_ci	 * open this block and return the offset where the first packet
94562306a36Sopenharmony_ci	 * needs to get stored.
94662306a36Sopenharmony_ci	 */
94762306a36Sopenharmony_ci	prb_open_block(pkc, pbd);
94862306a36Sopenharmony_ci	return (void *)pkc->nxt_offset;
94962306a36Sopenharmony_ci}
95062306a36Sopenharmony_ci
95162306a36Sopenharmony_cistatic void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
95262306a36Sopenharmony_ci		struct packet_sock *po, unsigned int status)
95362306a36Sopenharmony_ci{
95462306a36Sopenharmony_ci	struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
95562306a36Sopenharmony_ci
95662306a36Sopenharmony_ci	/* retire/close the current block */
95762306a36Sopenharmony_ci	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
95862306a36Sopenharmony_ci		/*
95962306a36Sopenharmony_ci		 * Plug the case where copy_bits() is in progress on
96062306a36Sopenharmony_ci		 * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
96162306a36Sopenharmony_ci		 * have space to copy the pkt in the current block and
96262306a36Sopenharmony_ci		 * called prb_retire_current_block()
96362306a36Sopenharmony_ci		 *
96462306a36Sopenharmony_ci		 * We don't need to worry about the TMO case because
96562306a36Sopenharmony_ci		 * the timer-handler already handled this case.
96662306a36Sopenharmony_ci		 */
96762306a36Sopenharmony_ci		if (!(status & TP_STATUS_BLK_TMO)) {
96862306a36Sopenharmony_ci			/* Waiting for skb_copy_bits to finish... */
96962306a36Sopenharmony_ci			write_lock(&pkc->blk_fill_in_prog_lock);
97062306a36Sopenharmony_ci			write_unlock(&pkc->blk_fill_in_prog_lock);
97162306a36Sopenharmony_ci		}
97262306a36Sopenharmony_ci		prb_close_block(pkc, pbd, po, status);
97362306a36Sopenharmony_ci		return;
97462306a36Sopenharmony_ci	}
97562306a36Sopenharmony_ci}
97662306a36Sopenharmony_ci
97762306a36Sopenharmony_cistatic int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
97862306a36Sopenharmony_ci{
97962306a36Sopenharmony_ci	return TP_STATUS_USER & BLOCK_STATUS(pbd);
98062306a36Sopenharmony_ci}
98162306a36Sopenharmony_ci
98262306a36Sopenharmony_cistatic int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
98362306a36Sopenharmony_ci{
98462306a36Sopenharmony_ci	return pkc->reset_pending_on_curr_blk;
98562306a36Sopenharmony_ci}
98662306a36Sopenharmony_ci
98762306a36Sopenharmony_cistatic void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
98862306a36Sopenharmony_ci	__releases(&pkc->blk_fill_in_prog_lock)
98962306a36Sopenharmony_ci{
99062306a36Sopenharmony_ci	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
99162306a36Sopenharmony_ci
99262306a36Sopenharmony_ci	read_unlock(&pkc->blk_fill_in_prog_lock);
99362306a36Sopenharmony_ci}
99462306a36Sopenharmony_ci
99562306a36Sopenharmony_cistatic void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
99662306a36Sopenharmony_ci			struct tpacket3_hdr *ppd)
99762306a36Sopenharmony_ci{
99862306a36Sopenharmony_ci	ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
99962306a36Sopenharmony_ci}
100062306a36Sopenharmony_ci
100162306a36Sopenharmony_cistatic void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
100262306a36Sopenharmony_ci			struct tpacket3_hdr *ppd)
100362306a36Sopenharmony_ci{
100462306a36Sopenharmony_ci	ppd->hv1.tp_rxhash = 0;
100562306a36Sopenharmony_ci}
100662306a36Sopenharmony_ci
100762306a36Sopenharmony_cistatic void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
100862306a36Sopenharmony_ci			struct tpacket3_hdr *ppd)
100962306a36Sopenharmony_ci{
101062306a36Sopenharmony_ci	if (skb_vlan_tag_present(pkc->skb)) {
101162306a36Sopenharmony_ci		ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
101262306a36Sopenharmony_ci		ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
101362306a36Sopenharmony_ci		ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
101462306a36Sopenharmony_ci	} else {
101562306a36Sopenharmony_ci		ppd->hv1.tp_vlan_tci = 0;
101662306a36Sopenharmony_ci		ppd->hv1.tp_vlan_tpid = 0;
101762306a36Sopenharmony_ci		ppd->tp_status = TP_STATUS_AVAILABLE;
101862306a36Sopenharmony_ci	}
101962306a36Sopenharmony_ci}
102062306a36Sopenharmony_ci
102162306a36Sopenharmony_cistatic void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
102262306a36Sopenharmony_ci			struct tpacket3_hdr *ppd)
102362306a36Sopenharmony_ci{
102462306a36Sopenharmony_ci	ppd->hv1.tp_padding = 0;
102562306a36Sopenharmony_ci	prb_fill_vlan_info(pkc, ppd);
102662306a36Sopenharmony_ci
102762306a36Sopenharmony_ci	if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
102862306a36Sopenharmony_ci		prb_fill_rxhash(pkc, ppd);
102962306a36Sopenharmony_ci	else
103062306a36Sopenharmony_ci		prb_clear_rxhash(pkc, ppd);
103162306a36Sopenharmony_ci}
103262306a36Sopenharmony_ci
103362306a36Sopenharmony_cistatic void prb_fill_curr_block(char *curr,
103462306a36Sopenharmony_ci				struct tpacket_kbdq_core *pkc,
103562306a36Sopenharmony_ci				struct tpacket_block_desc *pbd,
103662306a36Sopenharmony_ci				unsigned int len)
103762306a36Sopenharmony_ci	__acquires(&pkc->blk_fill_in_prog_lock)
103862306a36Sopenharmony_ci{
103962306a36Sopenharmony_ci	struct tpacket3_hdr *ppd;
104062306a36Sopenharmony_ci
104162306a36Sopenharmony_ci	ppd  = (struct tpacket3_hdr *)curr;
104262306a36Sopenharmony_ci	ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
104362306a36Sopenharmony_ci	pkc->prev = curr;
104462306a36Sopenharmony_ci	pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
104562306a36Sopenharmony_ci	BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
104662306a36Sopenharmony_ci	BLOCK_NUM_PKTS(pbd) += 1;
104762306a36Sopenharmony_ci	read_lock(&pkc->blk_fill_in_prog_lock);
104862306a36Sopenharmony_ci	prb_run_all_ft_ops(pkc, ppd);
104962306a36Sopenharmony_ci}
105062306a36Sopenharmony_ci
105162306a36Sopenharmony_ci/* Assumes caller has the sk->rx_queue.lock */
105262306a36Sopenharmony_cistatic void *__packet_lookup_frame_in_block(struct packet_sock *po,
105362306a36Sopenharmony_ci					    struct sk_buff *skb,
105462306a36Sopenharmony_ci					    unsigned int len
105562306a36Sopenharmony_ci					    )
105662306a36Sopenharmony_ci{
105762306a36Sopenharmony_ci	struct tpacket_kbdq_core *pkc;
105862306a36Sopenharmony_ci	struct tpacket_block_desc *pbd;
105962306a36Sopenharmony_ci	char *curr, *end;
106062306a36Sopenharmony_ci
106162306a36Sopenharmony_ci	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
106262306a36Sopenharmony_ci	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
106362306a36Sopenharmony_ci
106462306a36Sopenharmony_ci	/* Queue is frozen when user space is lagging behind */
106562306a36Sopenharmony_ci	if (prb_queue_frozen(pkc)) {
106662306a36Sopenharmony_ci		/*
106762306a36Sopenharmony_ci		 * Check if that last block which caused the queue to freeze,
106862306a36Sopenharmony_ci		 * is still in_use by user-space.
106962306a36Sopenharmony_ci		 */
107062306a36Sopenharmony_ci		if (prb_curr_blk_in_use(pbd)) {
107162306a36Sopenharmony_ci			/* Can't record this packet */
107262306a36Sopenharmony_ci			return NULL;
107362306a36Sopenharmony_ci		} else {
107462306a36Sopenharmony_ci			/*
107562306a36Sopenharmony_ci			 * Ok, the block was released by user-space.
107662306a36Sopenharmony_ci			 * Now let's open that block.
107762306a36Sopenharmony_ci			 * opening a block also thaws the queue.
107862306a36Sopenharmony_ci			 * Thawing is a side effect.
107962306a36Sopenharmony_ci			 */
108062306a36Sopenharmony_ci			prb_open_block(pkc, pbd);
108162306a36Sopenharmony_ci		}
108262306a36Sopenharmony_ci	}
108362306a36Sopenharmony_ci
108462306a36Sopenharmony_ci	smp_mb();
108562306a36Sopenharmony_ci	curr = pkc->nxt_offset;
108662306a36Sopenharmony_ci	pkc->skb = skb;
108762306a36Sopenharmony_ci	end = (char *)pbd + pkc->kblk_size;
108862306a36Sopenharmony_ci
108962306a36Sopenharmony_ci	/* first try the current block */
109062306a36Sopenharmony_ci	if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
109162306a36Sopenharmony_ci		prb_fill_curr_block(curr, pkc, pbd, len);
109262306a36Sopenharmony_ci		return (void *)curr;
109362306a36Sopenharmony_ci	}
109462306a36Sopenharmony_ci
109562306a36Sopenharmony_ci	/* Ok, close the current block */
109662306a36Sopenharmony_ci	prb_retire_current_block(pkc, po, 0);
109762306a36Sopenharmony_ci
109862306a36Sopenharmony_ci	/* Now, try to dispatch the next block */
109962306a36Sopenharmony_ci	curr = (char *)prb_dispatch_next_block(pkc, po);
110062306a36Sopenharmony_ci	if (curr) {
110162306a36Sopenharmony_ci		pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
110262306a36Sopenharmony_ci		prb_fill_curr_block(curr, pkc, pbd, len);
110362306a36Sopenharmony_ci		return (void *)curr;
110462306a36Sopenharmony_ci	}
110562306a36Sopenharmony_ci
110662306a36Sopenharmony_ci	/*
110762306a36Sopenharmony_ci	 * No free blocks are available.user_space hasn't caught up yet.
110862306a36Sopenharmony_ci	 * Queue was just frozen and now this packet will get dropped.
110962306a36Sopenharmony_ci	 */
111062306a36Sopenharmony_ci	return NULL;
111162306a36Sopenharmony_ci}
111262306a36Sopenharmony_ci
111362306a36Sopenharmony_cistatic void *packet_current_rx_frame(struct packet_sock *po,
111462306a36Sopenharmony_ci					    struct sk_buff *skb,
111562306a36Sopenharmony_ci					    int status, unsigned int len)
111662306a36Sopenharmony_ci{
111762306a36Sopenharmony_ci	char *curr = NULL;
111862306a36Sopenharmony_ci	switch (po->tp_version) {
111962306a36Sopenharmony_ci	case TPACKET_V1:
112062306a36Sopenharmony_ci	case TPACKET_V2:
112162306a36Sopenharmony_ci		curr = packet_lookup_frame(po, &po->rx_ring,
112262306a36Sopenharmony_ci					po->rx_ring.head, status);
112362306a36Sopenharmony_ci		return curr;
112462306a36Sopenharmony_ci	case TPACKET_V3:
112562306a36Sopenharmony_ci		return __packet_lookup_frame_in_block(po, skb, len);
112662306a36Sopenharmony_ci	default:
112762306a36Sopenharmony_ci		WARN(1, "TPACKET version not supported\n");
112862306a36Sopenharmony_ci		BUG();
112962306a36Sopenharmony_ci		return NULL;
113062306a36Sopenharmony_ci	}
113162306a36Sopenharmony_ci}
113262306a36Sopenharmony_ci
113362306a36Sopenharmony_cistatic void *prb_lookup_block(const struct packet_sock *po,
113462306a36Sopenharmony_ci			      const struct packet_ring_buffer *rb,
113562306a36Sopenharmony_ci			      unsigned int idx,
113662306a36Sopenharmony_ci			      int status)
113762306a36Sopenharmony_ci{
113862306a36Sopenharmony_ci	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
113962306a36Sopenharmony_ci	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
114062306a36Sopenharmony_ci
114162306a36Sopenharmony_ci	if (status != BLOCK_STATUS(pbd))
114262306a36Sopenharmony_ci		return NULL;
114362306a36Sopenharmony_ci	return pbd;
114462306a36Sopenharmony_ci}
114562306a36Sopenharmony_ci
114662306a36Sopenharmony_cistatic int prb_previous_blk_num(struct packet_ring_buffer *rb)
114762306a36Sopenharmony_ci{
114862306a36Sopenharmony_ci	unsigned int prev;
114962306a36Sopenharmony_ci	if (rb->prb_bdqc.kactive_blk_num)
115062306a36Sopenharmony_ci		prev = rb->prb_bdqc.kactive_blk_num-1;
115162306a36Sopenharmony_ci	else
115262306a36Sopenharmony_ci		prev = rb->prb_bdqc.knum_blocks-1;
115362306a36Sopenharmony_ci	return prev;
115462306a36Sopenharmony_ci}
115562306a36Sopenharmony_ci
115662306a36Sopenharmony_ci/* Assumes caller has held the rx_queue.lock */
115762306a36Sopenharmony_cistatic void *__prb_previous_block(struct packet_sock *po,
115862306a36Sopenharmony_ci					 struct packet_ring_buffer *rb,
115962306a36Sopenharmony_ci					 int status)
116062306a36Sopenharmony_ci{
116162306a36Sopenharmony_ci	unsigned int previous = prb_previous_blk_num(rb);
116262306a36Sopenharmony_ci	return prb_lookup_block(po, rb, previous, status);
116362306a36Sopenharmony_ci}
116462306a36Sopenharmony_ci
116562306a36Sopenharmony_cistatic void *packet_previous_rx_frame(struct packet_sock *po,
116662306a36Sopenharmony_ci					     struct packet_ring_buffer *rb,
116762306a36Sopenharmony_ci					     int status)
116862306a36Sopenharmony_ci{
116962306a36Sopenharmony_ci	if (po->tp_version <= TPACKET_V2)
117062306a36Sopenharmony_ci		return packet_previous_frame(po, rb, status);
117162306a36Sopenharmony_ci
117262306a36Sopenharmony_ci	return __prb_previous_block(po, rb, status);
117362306a36Sopenharmony_ci}
117462306a36Sopenharmony_ci
117562306a36Sopenharmony_cistatic void packet_increment_rx_head(struct packet_sock *po,
117662306a36Sopenharmony_ci					    struct packet_ring_buffer *rb)
117762306a36Sopenharmony_ci{
117862306a36Sopenharmony_ci	switch (po->tp_version) {
117962306a36Sopenharmony_ci	case TPACKET_V1:
118062306a36Sopenharmony_ci	case TPACKET_V2:
118162306a36Sopenharmony_ci		return packet_increment_head(rb);
118262306a36Sopenharmony_ci	case TPACKET_V3:
118362306a36Sopenharmony_ci	default:
118462306a36Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
118562306a36Sopenharmony_ci		BUG();
118662306a36Sopenharmony_ci		return;
118762306a36Sopenharmony_ci	}
118862306a36Sopenharmony_ci}
118962306a36Sopenharmony_ci
119062306a36Sopenharmony_cistatic void *packet_previous_frame(struct packet_sock *po,
119162306a36Sopenharmony_ci		struct packet_ring_buffer *rb,
119262306a36Sopenharmony_ci		int status)
119362306a36Sopenharmony_ci{
119462306a36Sopenharmony_ci	unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
119562306a36Sopenharmony_ci	return packet_lookup_frame(po, rb, previous, status);
119662306a36Sopenharmony_ci}
119762306a36Sopenharmony_ci
119862306a36Sopenharmony_cistatic void packet_increment_head(struct packet_ring_buffer *buff)
119962306a36Sopenharmony_ci{
120062306a36Sopenharmony_ci	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
120162306a36Sopenharmony_ci}
120262306a36Sopenharmony_ci
120362306a36Sopenharmony_cistatic void packet_inc_pending(struct packet_ring_buffer *rb)
120462306a36Sopenharmony_ci{
120562306a36Sopenharmony_ci	this_cpu_inc(*rb->pending_refcnt);
120662306a36Sopenharmony_ci}
120762306a36Sopenharmony_ci
120862306a36Sopenharmony_cistatic void packet_dec_pending(struct packet_ring_buffer *rb)
120962306a36Sopenharmony_ci{
121062306a36Sopenharmony_ci	this_cpu_dec(*rb->pending_refcnt);
121162306a36Sopenharmony_ci}
121262306a36Sopenharmony_ci
121362306a36Sopenharmony_cistatic unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
121462306a36Sopenharmony_ci{
121562306a36Sopenharmony_ci	unsigned int refcnt = 0;
121662306a36Sopenharmony_ci	int cpu;
121762306a36Sopenharmony_ci
121862306a36Sopenharmony_ci	/* We don't use pending refcount in rx_ring. */
121962306a36Sopenharmony_ci	if (rb->pending_refcnt == NULL)
122062306a36Sopenharmony_ci		return 0;
122162306a36Sopenharmony_ci
122262306a36Sopenharmony_ci	for_each_possible_cpu(cpu)
122362306a36Sopenharmony_ci		refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
122462306a36Sopenharmony_ci
122562306a36Sopenharmony_ci	return refcnt;
122662306a36Sopenharmony_ci}
122762306a36Sopenharmony_ci
122862306a36Sopenharmony_cistatic int packet_alloc_pending(struct packet_sock *po)
122962306a36Sopenharmony_ci{
123062306a36Sopenharmony_ci	po->rx_ring.pending_refcnt = NULL;
123162306a36Sopenharmony_ci
123262306a36Sopenharmony_ci	po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
123362306a36Sopenharmony_ci	if (unlikely(po->tx_ring.pending_refcnt == NULL))
123462306a36Sopenharmony_ci		return -ENOBUFS;
123562306a36Sopenharmony_ci
123662306a36Sopenharmony_ci	return 0;
123762306a36Sopenharmony_ci}
123862306a36Sopenharmony_ci
123962306a36Sopenharmony_cistatic void packet_free_pending(struct packet_sock *po)
124062306a36Sopenharmony_ci{
124162306a36Sopenharmony_ci	free_percpu(po->tx_ring.pending_refcnt);
124262306a36Sopenharmony_ci}
124362306a36Sopenharmony_ci
124462306a36Sopenharmony_ci#define ROOM_POW_OFF	2
124562306a36Sopenharmony_ci#define ROOM_NONE	0x0
124662306a36Sopenharmony_ci#define ROOM_LOW	0x1
124762306a36Sopenharmony_ci#define ROOM_NORMAL	0x2
124862306a36Sopenharmony_ci
124962306a36Sopenharmony_cistatic bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
125062306a36Sopenharmony_ci{
125162306a36Sopenharmony_ci	int idx, len;
125262306a36Sopenharmony_ci
125362306a36Sopenharmony_ci	len = READ_ONCE(po->rx_ring.frame_max) + 1;
125462306a36Sopenharmony_ci	idx = READ_ONCE(po->rx_ring.head);
125562306a36Sopenharmony_ci	if (pow_off)
125662306a36Sopenharmony_ci		idx += len >> pow_off;
125762306a36Sopenharmony_ci	if (idx >= len)
125862306a36Sopenharmony_ci		idx -= len;
125962306a36Sopenharmony_ci	return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
126062306a36Sopenharmony_ci}
126162306a36Sopenharmony_ci
126262306a36Sopenharmony_cistatic bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
126362306a36Sopenharmony_ci{
126462306a36Sopenharmony_ci	int idx, len;
126562306a36Sopenharmony_ci
126662306a36Sopenharmony_ci	len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
126762306a36Sopenharmony_ci	idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
126862306a36Sopenharmony_ci	if (pow_off)
126962306a36Sopenharmony_ci		idx += len >> pow_off;
127062306a36Sopenharmony_ci	if (idx >= len)
127162306a36Sopenharmony_ci		idx -= len;
127262306a36Sopenharmony_ci	return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
127362306a36Sopenharmony_ci}
127462306a36Sopenharmony_ci
127562306a36Sopenharmony_cistatic int __packet_rcv_has_room(const struct packet_sock *po,
127662306a36Sopenharmony_ci				 const struct sk_buff *skb)
127762306a36Sopenharmony_ci{
127862306a36Sopenharmony_ci	const struct sock *sk = &po->sk;
127962306a36Sopenharmony_ci	int ret = ROOM_NONE;
128062306a36Sopenharmony_ci
128162306a36Sopenharmony_ci	if (po->prot_hook.func != tpacket_rcv) {
128262306a36Sopenharmony_ci		int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
128362306a36Sopenharmony_ci		int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
128462306a36Sopenharmony_ci				   - (skb ? skb->truesize : 0);
128562306a36Sopenharmony_ci
128662306a36Sopenharmony_ci		if (avail > (rcvbuf >> ROOM_POW_OFF))
128762306a36Sopenharmony_ci			return ROOM_NORMAL;
128862306a36Sopenharmony_ci		else if (avail > 0)
128962306a36Sopenharmony_ci			return ROOM_LOW;
129062306a36Sopenharmony_ci		else
129162306a36Sopenharmony_ci			return ROOM_NONE;
129262306a36Sopenharmony_ci	}
129362306a36Sopenharmony_ci
129462306a36Sopenharmony_ci	if (po->tp_version == TPACKET_V3) {
129562306a36Sopenharmony_ci		if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
129662306a36Sopenharmony_ci			ret = ROOM_NORMAL;
129762306a36Sopenharmony_ci		else if (__tpacket_v3_has_room(po, 0))
129862306a36Sopenharmony_ci			ret = ROOM_LOW;
129962306a36Sopenharmony_ci	} else {
130062306a36Sopenharmony_ci		if (__tpacket_has_room(po, ROOM_POW_OFF))
130162306a36Sopenharmony_ci			ret = ROOM_NORMAL;
130262306a36Sopenharmony_ci		else if (__tpacket_has_room(po, 0))
130362306a36Sopenharmony_ci			ret = ROOM_LOW;
130462306a36Sopenharmony_ci	}
130562306a36Sopenharmony_ci
130662306a36Sopenharmony_ci	return ret;
130762306a36Sopenharmony_ci}
130862306a36Sopenharmony_ci
130962306a36Sopenharmony_cistatic int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
131062306a36Sopenharmony_ci{
131162306a36Sopenharmony_ci	bool pressure;
131262306a36Sopenharmony_ci	int ret;
131362306a36Sopenharmony_ci
131462306a36Sopenharmony_ci	ret = __packet_rcv_has_room(po, skb);
131562306a36Sopenharmony_ci	pressure = ret != ROOM_NORMAL;
131662306a36Sopenharmony_ci
131762306a36Sopenharmony_ci	if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure)
131862306a36Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure);
131962306a36Sopenharmony_ci
132062306a36Sopenharmony_ci	return ret;
132162306a36Sopenharmony_ci}
132262306a36Sopenharmony_ci
132362306a36Sopenharmony_cistatic void packet_rcv_try_clear_pressure(struct packet_sock *po)
132462306a36Sopenharmony_ci{
132562306a36Sopenharmony_ci	if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) &&
132662306a36Sopenharmony_ci	    __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
132762306a36Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false);
132862306a36Sopenharmony_ci}
132962306a36Sopenharmony_ci
133062306a36Sopenharmony_cistatic void packet_sock_destruct(struct sock *sk)
133162306a36Sopenharmony_ci{
133262306a36Sopenharmony_ci	skb_queue_purge(&sk->sk_error_queue);
133362306a36Sopenharmony_ci
133462306a36Sopenharmony_ci	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
133562306a36Sopenharmony_ci	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
133662306a36Sopenharmony_ci
133762306a36Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD)) {
133862306a36Sopenharmony_ci		pr_err("Attempt to release alive packet socket: %p\n", sk);
133962306a36Sopenharmony_ci		return;
134062306a36Sopenharmony_ci	}
134162306a36Sopenharmony_ci}
134262306a36Sopenharmony_ci
134362306a36Sopenharmony_cistatic bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
134462306a36Sopenharmony_ci{
134562306a36Sopenharmony_ci	u32 *history = po->rollover->history;
134662306a36Sopenharmony_ci	u32 victim, rxhash;
134762306a36Sopenharmony_ci	int i, count = 0;
134862306a36Sopenharmony_ci
134962306a36Sopenharmony_ci	rxhash = skb_get_hash(skb);
135062306a36Sopenharmony_ci	for (i = 0; i < ROLLOVER_HLEN; i++)
135162306a36Sopenharmony_ci		if (READ_ONCE(history[i]) == rxhash)
135262306a36Sopenharmony_ci			count++;
135362306a36Sopenharmony_ci
135462306a36Sopenharmony_ci	victim = get_random_u32_below(ROLLOVER_HLEN);
135562306a36Sopenharmony_ci
135662306a36Sopenharmony_ci	/* Avoid dirtying the cache line if possible */
135762306a36Sopenharmony_ci	if (READ_ONCE(history[victim]) != rxhash)
135862306a36Sopenharmony_ci		WRITE_ONCE(history[victim], rxhash);
135962306a36Sopenharmony_ci
136062306a36Sopenharmony_ci	return count > (ROLLOVER_HLEN >> 1);
136162306a36Sopenharmony_ci}
136262306a36Sopenharmony_ci
136362306a36Sopenharmony_cistatic unsigned int fanout_demux_hash(struct packet_fanout *f,
136462306a36Sopenharmony_ci				      struct sk_buff *skb,
136562306a36Sopenharmony_ci				      unsigned int num)
136662306a36Sopenharmony_ci{
136762306a36Sopenharmony_ci	return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
136862306a36Sopenharmony_ci}
136962306a36Sopenharmony_ci
137062306a36Sopenharmony_cistatic unsigned int fanout_demux_lb(struct packet_fanout *f,
137162306a36Sopenharmony_ci				    struct sk_buff *skb,
137262306a36Sopenharmony_ci				    unsigned int num)
137362306a36Sopenharmony_ci{
137462306a36Sopenharmony_ci	unsigned int val = atomic_inc_return(&f->rr_cur);
137562306a36Sopenharmony_ci
137662306a36Sopenharmony_ci	return val % num;
137762306a36Sopenharmony_ci}
137862306a36Sopenharmony_ci
137962306a36Sopenharmony_cistatic unsigned int fanout_demux_cpu(struct packet_fanout *f,
138062306a36Sopenharmony_ci				     struct sk_buff *skb,
138162306a36Sopenharmony_ci				     unsigned int num)
138262306a36Sopenharmony_ci{
138362306a36Sopenharmony_ci	return smp_processor_id() % num;
138462306a36Sopenharmony_ci}
138562306a36Sopenharmony_ci
138662306a36Sopenharmony_cistatic unsigned int fanout_demux_rnd(struct packet_fanout *f,
138762306a36Sopenharmony_ci				     struct sk_buff *skb,
138862306a36Sopenharmony_ci				     unsigned int num)
138962306a36Sopenharmony_ci{
139062306a36Sopenharmony_ci	return get_random_u32_below(num);
139162306a36Sopenharmony_ci}
139262306a36Sopenharmony_ci
139362306a36Sopenharmony_cistatic unsigned int fanout_demux_rollover(struct packet_fanout *f,
139462306a36Sopenharmony_ci					  struct sk_buff *skb,
139562306a36Sopenharmony_ci					  unsigned int idx, bool try_self,
139662306a36Sopenharmony_ci					  unsigned int num)
139762306a36Sopenharmony_ci{
139862306a36Sopenharmony_ci	struct packet_sock *po, *po_next, *po_skip = NULL;
139962306a36Sopenharmony_ci	unsigned int i, j, room = ROOM_NONE;
140062306a36Sopenharmony_ci
140162306a36Sopenharmony_ci	po = pkt_sk(rcu_dereference(f->arr[idx]));
140262306a36Sopenharmony_ci
140362306a36Sopenharmony_ci	if (try_self) {
140462306a36Sopenharmony_ci		room = packet_rcv_has_room(po, skb);
140562306a36Sopenharmony_ci		if (room == ROOM_NORMAL ||
140662306a36Sopenharmony_ci		    (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
140762306a36Sopenharmony_ci			return idx;
140862306a36Sopenharmony_ci		po_skip = po;
140962306a36Sopenharmony_ci	}
141062306a36Sopenharmony_ci
141162306a36Sopenharmony_ci	i = j = min_t(int, po->rollover->sock, num - 1);
141262306a36Sopenharmony_ci	do {
141362306a36Sopenharmony_ci		po_next = pkt_sk(rcu_dereference(f->arr[i]));
141462306a36Sopenharmony_ci		if (po_next != po_skip &&
141562306a36Sopenharmony_ci		    !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) &&
141662306a36Sopenharmony_ci		    packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
141762306a36Sopenharmony_ci			if (i != j)
141862306a36Sopenharmony_ci				po->rollover->sock = i;
141962306a36Sopenharmony_ci			atomic_long_inc(&po->rollover->num);
142062306a36Sopenharmony_ci			if (room == ROOM_LOW)
142162306a36Sopenharmony_ci				atomic_long_inc(&po->rollover->num_huge);
142262306a36Sopenharmony_ci			return i;
142362306a36Sopenharmony_ci		}
142462306a36Sopenharmony_ci
142562306a36Sopenharmony_ci		if (++i == num)
142662306a36Sopenharmony_ci			i = 0;
142762306a36Sopenharmony_ci	} while (i != j);
142862306a36Sopenharmony_ci
142962306a36Sopenharmony_ci	atomic_long_inc(&po->rollover->num_failed);
143062306a36Sopenharmony_ci	return idx;
143162306a36Sopenharmony_ci}
143262306a36Sopenharmony_ci
143362306a36Sopenharmony_cistatic unsigned int fanout_demux_qm(struct packet_fanout *f,
143462306a36Sopenharmony_ci				    struct sk_buff *skb,
143562306a36Sopenharmony_ci				    unsigned int num)
143662306a36Sopenharmony_ci{
143762306a36Sopenharmony_ci	return skb_get_queue_mapping(skb) % num;
143862306a36Sopenharmony_ci}
143962306a36Sopenharmony_ci
144062306a36Sopenharmony_cistatic unsigned int fanout_demux_bpf(struct packet_fanout *f,
144162306a36Sopenharmony_ci				     struct sk_buff *skb,
144262306a36Sopenharmony_ci				     unsigned int num)
144362306a36Sopenharmony_ci{
144462306a36Sopenharmony_ci	struct bpf_prog *prog;
144562306a36Sopenharmony_ci	unsigned int ret = 0;
144662306a36Sopenharmony_ci
144762306a36Sopenharmony_ci	rcu_read_lock();
144862306a36Sopenharmony_ci	prog = rcu_dereference(f->bpf_prog);
144962306a36Sopenharmony_ci	if (prog)
145062306a36Sopenharmony_ci		ret = bpf_prog_run_clear_cb(prog, skb) % num;
145162306a36Sopenharmony_ci	rcu_read_unlock();
145262306a36Sopenharmony_ci
145362306a36Sopenharmony_ci	return ret;
145462306a36Sopenharmony_ci}
145562306a36Sopenharmony_ci
145662306a36Sopenharmony_cistatic bool fanout_has_flag(struct packet_fanout *f, u16 flag)
145762306a36Sopenharmony_ci{
145862306a36Sopenharmony_ci	return f->flags & (flag >> 8);
145962306a36Sopenharmony_ci}
146062306a36Sopenharmony_ci
146162306a36Sopenharmony_cistatic int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
146262306a36Sopenharmony_ci			     struct packet_type *pt, struct net_device *orig_dev)
146362306a36Sopenharmony_ci{
146462306a36Sopenharmony_ci	struct packet_fanout *f = pt->af_packet_priv;
146562306a36Sopenharmony_ci	unsigned int num = READ_ONCE(f->num_members);
146662306a36Sopenharmony_ci	struct net *net = read_pnet(&f->net);
146762306a36Sopenharmony_ci	struct packet_sock *po;
146862306a36Sopenharmony_ci	unsigned int idx;
146962306a36Sopenharmony_ci
147062306a36Sopenharmony_ci	if (!net_eq(dev_net(dev), net) || !num) {
147162306a36Sopenharmony_ci		kfree_skb(skb);
147262306a36Sopenharmony_ci		return 0;
147362306a36Sopenharmony_ci	}
147462306a36Sopenharmony_ci
147562306a36Sopenharmony_ci	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
147662306a36Sopenharmony_ci		skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
147762306a36Sopenharmony_ci		if (!skb)
147862306a36Sopenharmony_ci			return 0;
147962306a36Sopenharmony_ci	}
148062306a36Sopenharmony_ci	switch (f->type) {
148162306a36Sopenharmony_ci	case PACKET_FANOUT_HASH:
148262306a36Sopenharmony_ci	default:
148362306a36Sopenharmony_ci		idx = fanout_demux_hash(f, skb, num);
148462306a36Sopenharmony_ci		break;
148562306a36Sopenharmony_ci	case PACKET_FANOUT_LB:
148662306a36Sopenharmony_ci		idx = fanout_demux_lb(f, skb, num);
148762306a36Sopenharmony_ci		break;
148862306a36Sopenharmony_ci	case PACKET_FANOUT_CPU:
148962306a36Sopenharmony_ci		idx = fanout_demux_cpu(f, skb, num);
149062306a36Sopenharmony_ci		break;
149162306a36Sopenharmony_ci	case PACKET_FANOUT_RND:
149262306a36Sopenharmony_ci		idx = fanout_demux_rnd(f, skb, num);
149362306a36Sopenharmony_ci		break;
149462306a36Sopenharmony_ci	case PACKET_FANOUT_QM:
149562306a36Sopenharmony_ci		idx = fanout_demux_qm(f, skb, num);
149662306a36Sopenharmony_ci		break;
149762306a36Sopenharmony_ci	case PACKET_FANOUT_ROLLOVER:
149862306a36Sopenharmony_ci		idx = fanout_demux_rollover(f, skb, 0, false, num);
149962306a36Sopenharmony_ci		break;
150062306a36Sopenharmony_ci	case PACKET_FANOUT_CBPF:
150162306a36Sopenharmony_ci	case PACKET_FANOUT_EBPF:
150262306a36Sopenharmony_ci		idx = fanout_demux_bpf(f, skb, num);
150362306a36Sopenharmony_ci		break;
150462306a36Sopenharmony_ci	}
150562306a36Sopenharmony_ci
150662306a36Sopenharmony_ci	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
150762306a36Sopenharmony_ci		idx = fanout_demux_rollover(f, skb, idx, true, num);
150862306a36Sopenharmony_ci
150962306a36Sopenharmony_ci	po = pkt_sk(rcu_dereference(f->arr[idx]));
151062306a36Sopenharmony_ci	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
151162306a36Sopenharmony_ci}
151262306a36Sopenharmony_ci
151362306a36Sopenharmony_ciDEFINE_MUTEX(fanout_mutex);
151462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(fanout_mutex);
151562306a36Sopenharmony_cistatic LIST_HEAD(fanout_list);
151662306a36Sopenharmony_cistatic u16 fanout_next_id;
151762306a36Sopenharmony_ci
151862306a36Sopenharmony_cistatic void __fanout_link(struct sock *sk, struct packet_sock *po)
151962306a36Sopenharmony_ci{
152062306a36Sopenharmony_ci	struct packet_fanout *f = po->fanout;
152162306a36Sopenharmony_ci
152262306a36Sopenharmony_ci	spin_lock(&f->lock);
152362306a36Sopenharmony_ci	rcu_assign_pointer(f->arr[f->num_members], sk);
152462306a36Sopenharmony_ci	smp_wmb();
152562306a36Sopenharmony_ci	f->num_members++;
152662306a36Sopenharmony_ci	if (f->num_members == 1)
152762306a36Sopenharmony_ci		dev_add_pack(&f->prot_hook);
152862306a36Sopenharmony_ci	spin_unlock(&f->lock);
152962306a36Sopenharmony_ci}
153062306a36Sopenharmony_ci
153162306a36Sopenharmony_cistatic void __fanout_unlink(struct sock *sk, struct packet_sock *po)
153262306a36Sopenharmony_ci{
153362306a36Sopenharmony_ci	struct packet_fanout *f = po->fanout;
153462306a36Sopenharmony_ci	int i;
153562306a36Sopenharmony_ci
153662306a36Sopenharmony_ci	spin_lock(&f->lock);
153762306a36Sopenharmony_ci	for (i = 0; i < f->num_members; i++) {
153862306a36Sopenharmony_ci		if (rcu_dereference_protected(f->arr[i],
153962306a36Sopenharmony_ci					      lockdep_is_held(&f->lock)) == sk)
154062306a36Sopenharmony_ci			break;
154162306a36Sopenharmony_ci	}
154262306a36Sopenharmony_ci	BUG_ON(i >= f->num_members);
154362306a36Sopenharmony_ci	rcu_assign_pointer(f->arr[i],
154462306a36Sopenharmony_ci			   rcu_dereference_protected(f->arr[f->num_members - 1],
154562306a36Sopenharmony_ci						     lockdep_is_held(&f->lock)));
154662306a36Sopenharmony_ci	f->num_members--;
154762306a36Sopenharmony_ci	if (f->num_members == 0)
154862306a36Sopenharmony_ci		__dev_remove_pack(&f->prot_hook);
154962306a36Sopenharmony_ci	spin_unlock(&f->lock);
155062306a36Sopenharmony_ci}
155162306a36Sopenharmony_ci
155262306a36Sopenharmony_cistatic bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
155362306a36Sopenharmony_ci{
155462306a36Sopenharmony_ci	if (sk->sk_family != PF_PACKET)
155562306a36Sopenharmony_ci		return false;
155662306a36Sopenharmony_ci
155762306a36Sopenharmony_ci	return ptype->af_packet_priv == pkt_sk(sk)->fanout;
155862306a36Sopenharmony_ci}
155962306a36Sopenharmony_ci
156062306a36Sopenharmony_cistatic void fanout_init_data(struct packet_fanout *f)
156162306a36Sopenharmony_ci{
156262306a36Sopenharmony_ci	switch (f->type) {
156362306a36Sopenharmony_ci	case PACKET_FANOUT_LB:
156462306a36Sopenharmony_ci		atomic_set(&f->rr_cur, 0);
156562306a36Sopenharmony_ci		break;
156662306a36Sopenharmony_ci	case PACKET_FANOUT_CBPF:
156762306a36Sopenharmony_ci	case PACKET_FANOUT_EBPF:
156862306a36Sopenharmony_ci		RCU_INIT_POINTER(f->bpf_prog, NULL);
156962306a36Sopenharmony_ci		break;
157062306a36Sopenharmony_ci	}
157162306a36Sopenharmony_ci}
157262306a36Sopenharmony_ci
157362306a36Sopenharmony_cistatic void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
157462306a36Sopenharmony_ci{
157562306a36Sopenharmony_ci	struct bpf_prog *old;
157662306a36Sopenharmony_ci
157762306a36Sopenharmony_ci	spin_lock(&f->lock);
157862306a36Sopenharmony_ci	old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
157962306a36Sopenharmony_ci	rcu_assign_pointer(f->bpf_prog, new);
158062306a36Sopenharmony_ci	spin_unlock(&f->lock);
158162306a36Sopenharmony_ci
158262306a36Sopenharmony_ci	if (old) {
158362306a36Sopenharmony_ci		synchronize_net();
158462306a36Sopenharmony_ci		bpf_prog_destroy(old);
158562306a36Sopenharmony_ci	}
158662306a36Sopenharmony_ci}
158762306a36Sopenharmony_ci
158862306a36Sopenharmony_cistatic int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
158962306a36Sopenharmony_ci				unsigned int len)
159062306a36Sopenharmony_ci{
159162306a36Sopenharmony_ci	struct bpf_prog *new;
159262306a36Sopenharmony_ci	struct sock_fprog fprog;
159362306a36Sopenharmony_ci	int ret;
159462306a36Sopenharmony_ci
159562306a36Sopenharmony_ci	if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
159662306a36Sopenharmony_ci		return -EPERM;
159762306a36Sopenharmony_ci
159862306a36Sopenharmony_ci	ret = copy_bpf_fprog_from_user(&fprog, data, len);
159962306a36Sopenharmony_ci	if (ret)
160062306a36Sopenharmony_ci		return ret;
160162306a36Sopenharmony_ci
160262306a36Sopenharmony_ci	ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
160362306a36Sopenharmony_ci	if (ret)
160462306a36Sopenharmony_ci		return ret;
160562306a36Sopenharmony_ci
160662306a36Sopenharmony_ci	__fanout_set_data_bpf(po->fanout, new);
160762306a36Sopenharmony_ci	return 0;
160862306a36Sopenharmony_ci}
160962306a36Sopenharmony_ci
161062306a36Sopenharmony_cistatic int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
161162306a36Sopenharmony_ci				unsigned int len)
161262306a36Sopenharmony_ci{
161362306a36Sopenharmony_ci	struct bpf_prog *new;
161462306a36Sopenharmony_ci	u32 fd;
161562306a36Sopenharmony_ci
161662306a36Sopenharmony_ci	if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
161762306a36Sopenharmony_ci		return -EPERM;
161862306a36Sopenharmony_ci	if (len != sizeof(fd))
161962306a36Sopenharmony_ci		return -EINVAL;
162062306a36Sopenharmony_ci	if (copy_from_sockptr(&fd, data, len))
162162306a36Sopenharmony_ci		return -EFAULT;
162262306a36Sopenharmony_ci
162362306a36Sopenharmony_ci	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
162462306a36Sopenharmony_ci	if (IS_ERR(new))
162562306a36Sopenharmony_ci		return PTR_ERR(new);
162662306a36Sopenharmony_ci
162762306a36Sopenharmony_ci	__fanout_set_data_bpf(po->fanout, new);
162862306a36Sopenharmony_ci	return 0;
162962306a36Sopenharmony_ci}
163062306a36Sopenharmony_ci
163162306a36Sopenharmony_cistatic int fanout_set_data(struct packet_sock *po, sockptr_t data,
163262306a36Sopenharmony_ci			   unsigned int len)
163362306a36Sopenharmony_ci{
163462306a36Sopenharmony_ci	switch (po->fanout->type) {
163562306a36Sopenharmony_ci	case PACKET_FANOUT_CBPF:
163662306a36Sopenharmony_ci		return fanout_set_data_cbpf(po, data, len);
163762306a36Sopenharmony_ci	case PACKET_FANOUT_EBPF:
163862306a36Sopenharmony_ci		return fanout_set_data_ebpf(po, data, len);
163962306a36Sopenharmony_ci	default:
164062306a36Sopenharmony_ci		return -EINVAL;
164162306a36Sopenharmony_ci	}
164262306a36Sopenharmony_ci}
164362306a36Sopenharmony_ci
164462306a36Sopenharmony_cistatic void fanout_release_data(struct packet_fanout *f)
164562306a36Sopenharmony_ci{
164662306a36Sopenharmony_ci	switch (f->type) {
164762306a36Sopenharmony_ci	case PACKET_FANOUT_CBPF:
164862306a36Sopenharmony_ci	case PACKET_FANOUT_EBPF:
164962306a36Sopenharmony_ci		__fanout_set_data_bpf(f, NULL);
165062306a36Sopenharmony_ci	}
165162306a36Sopenharmony_ci}
165262306a36Sopenharmony_ci
165362306a36Sopenharmony_cistatic bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
165462306a36Sopenharmony_ci{
165562306a36Sopenharmony_ci	struct packet_fanout *f;
165662306a36Sopenharmony_ci
165762306a36Sopenharmony_ci	list_for_each_entry(f, &fanout_list, list) {
165862306a36Sopenharmony_ci		if (f->id == candidate_id &&
165962306a36Sopenharmony_ci		    read_pnet(&f->net) == sock_net(sk)) {
166062306a36Sopenharmony_ci			return false;
166162306a36Sopenharmony_ci		}
166262306a36Sopenharmony_ci	}
166362306a36Sopenharmony_ci	return true;
166462306a36Sopenharmony_ci}
166562306a36Sopenharmony_ci
166662306a36Sopenharmony_cistatic bool fanout_find_new_id(struct sock *sk, u16 *new_id)
166762306a36Sopenharmony_ci{
166862306a36Sopenharmony_ci	u16 id = fanout_next_id;
166962306a36Sopenharmony_ci
167062306a36Sopenharmony_ci	do {
167162306a36Sopenharmony_ci		if (__fanout_id_is_free(sk, id)) {
167262306a36Sopenharmony_ci			*new_id = id;
167362306a36Sopenharmony_ci			fanout_next_id = id + 1;
167462306a36Sopenharmony_ci			return true;
167562306a36Sopenharmony_ci		}
167662306a36Sopenharmony_ci
167762306a36Sopenharmony_ci		id++;
167862306a36Sopenharmony_ci	} while (id != fanout_next_id);
167962306a36Sopenharmony_ci
168062306a36Sopenharmony_ci	return false;
168162306a36Sopenharmony_ci}
168262306a36Sopenharmony_ci
168362306a36Sopenharmony_cistatic int fanout_add(struct sock *sk, struct fanout_args *args)
168462306a36Sopenharmony_ci{
168562306a36Sopenharmony_ci	struct packet_rollover *rollover = NULL;
168662306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
168762306a36Sopenharmony_ci	u16 type_flags = args->type_flags;
168862306a36Sopenharmony_ci	struct packet_fanout *f, *match;
168962306a36Sopenharmony_ci	u8 type = type_flags & 0xff;
169062306a36Sopenharmony_ci	u8 flags = type_flags >> 8;
169162306a36Sopenharmony_ci	u16 id = args->id;
169262306a36Sopenharmony_ci	int err;
169362306a36Sopenharmony_ci
169462306a36Sopenharmony_ci	switch (type) {
169562306a36Sopenharmony_ci	case PACKET_FANOUT_ROLLOVER:
169662306a36Sopenharmony_ci		if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
169762306a36Sopenharmony_ci			return -EINVAL;
169862306a36Sopenharmony_ci		break;
169962306a36Sopenharmony_ci	case PACKET_FANOUT_HASH:
170062306a36Sopenharmony_ci	case PACKET_FANOUT_LB:
170162306a36Sopenharmony_ci	case PACKET_FANOUT_CPU:
170262306a36Sopenharmony_ci	case PACKET_FANOUT_RND:
170362306a36Sopenharmony_ci	case PACKET_FANOUT_QM:
170462306a36Sopenharmony_ci	case PACKET_FANOUT_CBPF:
170562306a36Sopenharmony_ci	case PACKET_FANOUT_EBPF:
170662306a36Sopenharmony_ci		break;
170762306a36Sopenharmony_ci	default:
170862306a36Sopenharmony_ci		return -EINVAL;
170962306a36Sopenharmony_ci	}
171062306a36Sopenharmony_ci
171162306a36Sopenharmony_ci	mutex_lock(&fanout_mutex);
171262306a36Sopenharmony_ci
171362306a36Sopenharmony_ci	err = -EALREADY;
171462306a36Sopenharmony_ci	if (po->fanout)
171562306a36Sopenharmony_ci		goto out;
171662306a36Sopenharmony_ci
171762306a36Sopenharmony_ci	if (type == PACKET_FANOUT_ROLLOVER ||
171862306a36Sopenharmony_ci	    (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
171962306a36Sopenharmony_ci		err = -ENOMEM;
172062306a36Sopenharmony_ci		rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
172162306a36Sopenharmony_ci		if (!rollover)
172262306a36Sopenharmony_ci			goto out;
172362306a36Sopenharmony_ci		atomic_long_set(&rollover->num, 0);
172462306a36Sopenharmony_ci		atomic_long_set(&rollover->num_huge, 0);
172562306a36Sopenharmony_ci		atomic_long_set(&rollover->num_failed, 0);
172662306a36Sopenharmony_ci	}
172762306a36Sopenharmony_ci
172862306a36Sopenharmony_ci	if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
172962306a36Sopenharmony_ci		if (id != 0) {
173062306a36Sopenharmony_ci			err = -EINVAL;
173162306a36Sopenharmony_ci			goto out;
173262306a36Sopenharmony_ci		}
173362306a36Sopenharmony_ci		if (!fanout_find_new_id(sk, &id)) {
173462306a36Sopenharmony_ci			err = -ENOMEM;
173562306a36Sopenharmony_ci			goto out;
173662306a36Sopenharmony_ci		}
173762306a36Sopenharmony_ci		/* ephemeral flag for the first socket in the group: drop it */
173862306a36Sopenharmony_ci		flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
173962306a36Sopenharmony_ci	}
174062306a36Sopenharmony_ci
174162306a36Sopenharmony_ci	match = NULL;
174262306a36Sopenharmony_ci	list_for_each_entry(f, &fanout_list, list) {
174362306a36Sopenharmony_ci		if (f->id == id &&
174462306a36Sopenharmony_ci		    read_pnet(&f->net) == sock_net(sk)) {
174562306a36Sopenharmony_ci			match = f;
174662306a36Sopenharmony_ci			break;
174762306a36Sopenharmony_ci		}
174862306a36Sopenharmony_ci	}
174962306a36Sopenharmony_ci	err = -EINVAL;
175062306a36Sopenharmony_ci	if (match) {
175162306a36Sopenharmony_ci		if (match->flags != flags)
175262306a36Sopenharmony_ci			goto out;
175362306a36Sopenharmony_ci		if (args->max_num_members &&
175462306a36Sopenharmony_ci		    args->max_num_members != match->max_num_members)
175562306a36Sopenharmony_ci			goto out;
175662306a36Sopenharmony_ci	} else {
175762306a36Sopenharmony_ci		if (args->max_num_members > PACKET_FANOUT_MAX)
175862306a36Sopenharmony_ci			goto out;
175962306a36Sopenharmony_ci		if (!args->max_num_members)
176062306a36Sopenharmony_ci			/* legacy PACKET_FANOUT_MAX */
176162306a36Sopenharmony_ci			args->max_num_members = 256;
176262306a36Sopenharmony_ci		err = -ENOMEM;
176362306a36Sopenharmony_ci		match = kvzalloc(struct_size(match, arr, args->max_num_members),
176462306a36Sopenharmony_ci				 GFP_KERNEL);
176562306a36Sopenharmony_ci		if (!match)
176662306a36Sopenharmony_ci			goto out;
176762306a36Sopenharmony_ci		write_pnet(&match->net, sock_net(sk));
176862306a36Sopenharmony_ci		match->id = id;
176962306a36Sopenharmony_ci		match->type = type;
177062306a36Sopenharmony_ci		match->flags = flags;
177162306a36Sopenharmony_ci		INIT_LIST_HEAD(&match->list);
177262306a36Sopenharmony_ci		spin_lock_init(&match->lock);
177362306a36Sopenharmony_ci		refcount_set(&match->sk_ref, 0);
177462306a36Sopenharmony_ci		fanout_init_data(match);
177562306a36Sopenharmony_ci		match->prot_hook.type = po->prot_hook.type;
177662306a36Sopenharmony_ci		match->prot_hook.dev = po->prot_hook.dev;
177762306a36Sopenharmony_ci		match->prot_hook.func = packet_rcv_fanout;
177862306a36Sopenharmony_ci		match->prot_hook.af_packet_priv = match;
177962306a36Sopenharmony_ci		match->prot_hook.af_packet_net = read_pnet(&match->net);
178062306a36Sopenharmony_ci		match->prot_hook.id_match = match_fanout_group;
178162306a36Sopenharmony_ci		match->max_num_members = args->max_num_members;
178262306a36Sopenharmony_ci		match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING;
178362306a36Sopenharmony_ci		list_add(&match->list, &fanout_list);
178462306a36Sopenharmony_ci	}
178562306a36Sopenharmony_ci	err = -EINVAL;
178662306a36Sopenharmony_ci
178762306a36Sopenharmony_ci	spin_lock(&po->bind_lock);
178862306a36Sopenharmony_ci	if (packet_sock_flag(po, PACKET_SOCK_RUNNING) &&
178962306a36Sopenharmony_ci	    match->type == type &&
179062306a36Sopenharmony_ci	    match->prot_hook.type == po->prot_hook.type &&
179162306a36Sopenharmony_ci	    match->prot_hook.dev == po->prot_hook.dev) {
179262306a36Sopenharmony_ci		err = -ENOSPC;
179362306a36Sopenharmony_ci		if (refcount_read(&match->sk_ref) < match->max_num_members) {
179462306a36Sopenharmony_ci			__dev_remove_pack(&po->prot_hook);
179562306a36Sopenharmony_ci
179662306a36Sopenharmony_ci			/* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
179762306a36Sopenharmony_ci			WRITE_ONCE(po->fanout, match);
179862306a36Sopenharmony_ci
179962306a36Sopenharmony_ci			po->rollover = rollover;
180062306a36Sopenharmony_ci			rollover = NULL;
180162306a36Sopenharmony_ci			refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
180262306a36Sopenharmony_ci			__fanout_link(sk, po);
180362306a36Sopenharmony_ci			err = 0;
180462306a36Sopenharmony_ci		}
180562306a36Sopenharmony_ci	}
180662306a36Sopenharmony_ci	spin_unlock(&po->bind_lock);
180762306a36Sopenharmony_ci
180862306a36Sopenharmony_ci	if (err && !refcount_read(&match->sk_ref)) {
180962306a36Sopenharmony_ci		list_del(&match->list);
181062306a36Sopenharmony_ci		kvfree(match);
181162306a36Sopenharmony_ci	}
181262306a36Sopenharmony_ci
181362306a36Sopenharmony_ciout:
181462306a36Sopenharmony_ci	kfree(rollover);
181562306a36Sopenharmony_ci	mutex_unlock(&fanout_mutex);
181662306a36Sopenharmony_ci	return err;
181762306a36Sopenharmony_ci}
181862306a36Sopenharmony_ci
181962306a36Sopenharmony_ci/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes
182062306a36Sopenharmony_ci * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout.
182162306a36Sopenharmony_ci * It is the responsibility of the caller to call fanout_release_data() and
182262306a36Sopenharmony_ci * free the returned packet_fanout (after synchronize_net())
182362306a36Sopenharmony_ci */
182462306a36Sopenharmony_cistatic struct packet_fanout *fanout_release(struct sock *sk)
182562306a36Sopenharmony_ci{
182662306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
182762306a36Sopenharmony_ci	struct packet_fanout *f;
182862306a36Sopenharmony_ci
182962306a36Sopenharmony_ci	mutex_lock(&fanout_mutex);
183062306a36Sopenharmony_ci	f = po->fanout;
183162306a36Sopenharmony_ci	if (f) {
183262306a36Sopenharmony_ci		po->fanout = NULL;
183362306a36Sopenharmony_ci
183462306a36Sopenharmony_ci		if (refcount_dec_and_test(&f->sk_ref))
183562306a36Sopenharmony_ci			list_del(&f->list);
183662306a36Sopenharmony_ci		else
183762306a36Sopenharmony_ci			f = NULL;
183862306a36Sopenharmony_ci	}
183962306a36Sopenharmony_ci	mutex_unlock(&fanout_mutex);
184062306a36Sopenharmony_ci
184162306a36Sopenharmony_ci	return f;
184262306a36Sopenharmony_ci}
184362306a36Sopenharmony_ci
184462306a36Sopenharmony_cistatic bool packet_extra_vlan_len_allowed(const struct net_device *dev,
184562306a36Sopenharmony_ci					  struct sk_buff *skb)
184662306a36Sopenharmony_ci{
184762306a36Sopenharmony_ci	/* Earlier code assumed this would be a VLAN pkt, double-check
184862306a36Sopenharmony_ci	 * this now that we have the actual packet in hand. We can only
184962306a36Sopenharmony_ci	 * do this check on Ethernet devices.
185062306a36Sopenharmony_ci	 */
185162306a36Sopenharmony_ci	if (unlikely(dev->type != ARPHRD_ETHER))
185262306a36Sopenharmony_ci		return false;
185362306a36Sopenharmony_ci
185462306a36Sopenharmony_ci	skb_reset_mac_header(skb);
185562306a36Sopenharmony_ci	return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
185662306a36Sopenharmony_ci}
185762306a36Sopenharmony_ci
185862306a36Sopenharmony_cistatic const struct proto_ops packet_ops;
185962306a36Sopenharmony_ci
186062306a36Sopenharmony_cistatic const struct proto_ops packet_ops_spkt;
186162306a36Sopenharmony_ci
186262306a36Sopenharmony_cistatic int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
186362306a36Sopenharmony_ci			   struct packet_type *pt, struct net_device *orig_dev)
186462306a36Sopenharmony_ci{
186562306a36Sopenharmony_ci	struct sock *sk;
186662306a36Sopenharmony_ci	struct sockaddr_pkt *spkt;
186762306a36Sopenharmony_ci
186862306a36Sopenharmony_ci	/*
186962306a36Sopenharmony_ci	 *	When we registered the protocol we saved the socket in the data
187062306a36Sopenharmony_ci	 *	field for just this event.
187162306a36Sopenharmony_ci	 */
187262306a36Sopenharmony_ci
187362306a36Sopenharmony_ci	sk = pt->af_packet_priv;
187462306a36Sopenharmony_ci
187562306a36Sopenharmony_ci	/*
187662306a36Sopenharmony_ci	 *	Yank back the headers [hope the device set this
187762306a36Sopenharmony_ci	 *	right or kerboom...]
187862306a36Sopenharmony_ci	 *
187962306a36Sopenharmony_ci	 *	Incoming packets have ll header pulled,
188062306a36Sopenharmony_ci	 *	push it back.
188162306a36Sopenharmony_ci	 *
188262306a36Sopenharmony_ci	 *	For outgoing ones skb->data == skb_mac_header(skb)
188362306a36Sopenharmony_ci	 *	so that this procedure is noop.
188462306a36Sopenharmony_ci	 */
188562306a36Sopenharmony_ci
188662306a36Sopenharmony_ci	if (skb->pkt_type == PACKET_LOOPBACK)
188762306a36Sopenharmony_ci		goto out;
188862306a36Sopenharmony_ci
188962306a36Sopenharmony_ci	if (!net_eq(dev_net(dev), sock_net(sk)))
189062306a36Sopenharmony_ci		goto out;
189162306a36Sopenharmony_ci
189262306a36Sopenharmony_ci	skb = skb_share_check(skb, GFP_ATOMIC);
189362306a36Sopenharmony_ci	if (skb == NULL)
189462306a36Sopenharmony_ci		goto oom;
189562306a36Sopenharmony_ci
189662306a36Sopenharmony_ci	/* drop any routing info */
189762306a36Sopenharmony_ci	skb_dst_drop(skb);
189862306a36Sopenharmony_ci
189962306a36Sopenharmony_ci	/* drop conntrack reference */
190062306a36Sopenharmony_ci	nf_reset_ct(skb);
190162306a36Sopenharmony_ci
190262306a36Sopenharmony_ci	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
190362306a36Sopenharmony_ci
190462306a36Sopenharmony_ci	skb_push(skb, skb->data - skb_mac_header(skb));
190562306a36Sopenharmony_ci
190662306a36Sopenharmony_ci	/*
190762306a36Sopenharmony_ci	 *	The SOCK_PACKET socket receives _all_ frames.
190862306a36Sopenharmony_ci	 */
190962306a36Sopenharmony_ci
191062306a36Sopenharmony_ci	spkt->spkt_family = dev->type;
191162306a36Sopenharmony_ci	strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
191262306a36Sopenharmony_ci	spkt->spkt_protocol = skb->protocol;
191362306a36Sopenharmony_ci
191462306a36Sopenharmony_ci	/*
191562306a36Sopenharmony_ci	 *	Charge the memory to the socket. This is done specifically
191662306a36Sopenharmony_ci	 *	to prevent sockets using all the memory up.
191762306a36Sopenharmony_ci	 */
191862306a36Sopenharmony_ci
191962306a36Sopenharmony_ci	if (sock_queue_rcv_skb(sk, skb) == 0)
192062306a36Sopenharmony_ci		return 0;
192162306a36Sopenharmony_ci
192262306a36Sopenharmony_ciout:
192362306a36Sopenharmony_ci	kfree_skb(skb);
192462306a36Sopenharmony_cioom:
192562306a36Sopenharmony_ci	return 0;
192662306a36Sopenharmony_ci}
192762306a36Sopenharmony_ci
192862306a36Sopenharmony_cistatic void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
192962306a36Sopenharmony_ci{
193062306a36Sopenharmony_ci	int depth;
193162306a36Sopenharmony_ci
193262306a36Sopenharmony_ci	if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
193362306a36Sopenharmony_ci	    sock->type == SOCK_RAW) {
193462306a36Sopenharmony_ci		skb_reset_mac_header(skb);
193562306a36Sopenharmony_ci		skb->protocol = dev_parse_header_protocol(skb);
193662306a36Sopenharmony_ci	}
193762306a36Sopenharmony_ci
193862306a36Sopenharmony_ci	/* Move network header to the right position for VLAN tagged packets */
193962306a36Sopenharmony_ci	if (likely(skb->dev->type == ARPHRD_ETHER) &&
194062306a36Sopenharmony_ci	    eth_type_vlan(skb->protocol) &&
194162306a36Sopenharmony_ci	    vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
194262306a36Sopenharmony_ci		skb_set_network_header(skb, depth);
194362306a36Sopenharmony_ci
194462306a36Sopenharmony_ci	skb_probe_transport_header(skb);
194562306a36Sopenharmony_ci}
194662306a36Sopenharmony_ci
194762306a36Sopenharmony_ci/*
194862306a36Sopenharmony_ci *	Output a raw packet to a device layer. This bypasses all the other
194962306a36Sopenharmony_ci *	protocol layers and you must therefore supply it with a complete frame
195062306a36Sopenharmony_ci */
195162306a36Sopenharmony_ci
195262306a36Sopenharmony_cistatic int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
195362306a36Sopenharmony_ci			       size_t len)
195462306a36Sopenharmony_ci{
195562306a36Sopenharmony_ci	struct sock *sk = sock->sk;
195662306a36Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
195762306a36Sopenharmony_ci	struct sk_buff *skb = NULL;
195862306a36Sopenharmony_ci	struct net_device *dev;
195962306a36Sopenharmony_ci	struct sockcm_cookie sockc;
196062306a36Sopenharmony_ci	__be16 proto = 0;
196162306a36Sopenharmony_ci	int err;
196262306a36Sopenharmony_ci	int extra_len = 0;
196362306a36Sopenharmony_ci
196462306a36Sopenharmony_ci	/*
196562306a36Sopenharmony_ci	 *	Get and verify the address.
196662306a36Sopenharmony_ci	 */
196762306a36Sopenharmony_ci
196862306a36Sopenharmony_ci	if (saddr) {
196962306a36Sopenharmony_ci		if (msg->msg_namelen < sizeof(struct sockaddr))
197062306a36Sopenharmony_ci			return -EINVAL;
197162306a36Sopenharmony_ci		if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
197262306a36Sopenharmony_ci			proto = saddr->spkt_protocol;
197362306a36Sopenharmony_ci	} else
197462306a36Sopenharmony_ci		return -ENOTCONN;	/* SOCK_PACKET must be sent giving an address */
197562306a36Sopenharmony_ci
197662306a36Sopenharmony_ci	/*
197762306a36Sopenharmony_ci	 *	Find the device first to size check it
197862306a36Sopenharmony_ci	 */
197962306a36Sopenharmony_ci
198062306a36Sopenharmony_ci	saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
198162306a36Sopenharmony_ciretry:
198262306a36Sopenharmony_ci	rcu_read_lock();
198362306a36Sopenharmony_ci	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
198462306a36Sopenharmony_ci	err = -ENODEV;
198562306a36Sopenharmony_ci	if (dev == NULL)
198662306a36Sopenharmony_ci		goto out_unlock;
198762306a36Sopenharmony_ci
198862306a36Sopenharmony_ci	err = -ENETDOWN;
198962306a36Sopenharmony_ci	if (!(dev->flags & IFF_UP))
199062306a36Sopenharmony_ci		goto out_unlock;
199162306a36Sopenharmony_ci
199262306a36Sopenharmony_ci	/*
199362306a36Sopenharmony_ci	 * You may not queue a frame bigger than the mtu. This is the lowest level
199462306a36Sopenharmony_ci	 * raw protocol and you must do your own fragmentation at this level.
199562306a36Sopenharmony_ci	 */
199662306a36Sopenharmony_ci
199762306a36Sopenharmony_ci	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
199862306a36Sopenharmony_ci		if (!netif_supports_nofcs(dev)) {
199962306a36Sopenharmony_ci			err = -EPROTONOSUPPORT;
200062306a36Sopenharmony_ci			goto out_unlock;
200162306a36Sopenharmony_ci		}
200262306a36Sopenharmony_ci		extra_len = 4; /* We're doing our own CRC */
200362306a36Sopenharmony_ci	}
200462306a36Sopenharmony_ci
200562306a36Sopenharmony_ci	err = -EMSGSIZE;
200662306a36Sopenharmony_ci	if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
200762306a36Sopenharmony_ci		goto out_unlock;
200862306a36Sopenharmony_ci
200962306a36Sopenharmony_ci	if (!skb) {
201062306a36Sopenharmony_ci		size_t reserved = LL_RESERVED_SPACE(dev);
201162306a36Sopenharmony_ci		int tlen = dev->needed_tailroom;
201262306a36Sopenharmony_ci		unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
201362306a36Sopenharmony_ci
201462306a36Sopenharmony_ci		rcu_read_unlock();
201562306a36Sopenharmony_ci		skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
201662306a36Sopenharmony_ci		if (skb == NULL)
201762306a36Sopenharmony_ci			return -ENOBUFS;
201862306a36Sopenharmony_ci		/* FIXME: Save some space for broken drivers that write a hard
201962306a36Sopenharmony_ci		 * header at transmission time by themselves. PPP is the notable
202062306a36Sopenharmony_ci		 * one here. This should really be fixed at the driver level.
202162306a36Sopenharmony_ci		 */
202262306a36Sopenharmony_ci		skb_reserve(skb, reserved);
202362306a36Sopenharmony_ci		skb_reset_network_header(skb);
202462306a36Sopenharmony_ci
202562306a36Sopenharmony_ci		/* Try to align data part correctly */
202662306a36Sopenharmony_ci		if (hhlen) {
202762306a36Sopenharmony_ci			skb->data -= hhlen;
202862306a36Sopenharmony_ci			skb->tail -= hhlen;
202962306a36Sopenharmony_ci			if (len < hhlen)
203062306a36Sopenharmony_ci				skb_reset_network_header(skb);
203162306a36Sopenharmony_ci		}
203262306a36Sopenharmony_ci		err = memcpy_from_msg(skb_put(skb, len), msg, len);
203362306a36Sopenharmony_ci		if (err)
203462306a36Sopenharmony_ci			goto out_free;
203562306a36Sopenharmony_ci		goto retry;
203662306a36Sopenharmony_ci	}
203762306a36Sopenharmony_ci
203862306a36Sopenharmony_ci	if (!dev_validate_header(dev, skb->data, len) || !skb->len) {
203962306a36Sopenharmony_ci		err = -EINVAL;
204062306a36Sopenharmony_ci		goto out_unlock;
204162306a36Sopenharmony_ci	}
204262306a36Sopenharmony_ci	if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
204362306a36Sopenharmony_ci	    !packet_extra_vlan_len_allowed(dev, skb)) {
204462306a36Sopenharmony_ci		err = -EMSGSIZE;
204562306a36Sopenharmony_ci		goto out_unlock;
204662306a36Sopenharmony_ci	}
204762306a36Sopenharmony_ci
204862306a36Sopenharmony_ci	sockcm_init(&sockc, sk);
204962306a36Sopenharmony_ci	if (msg->msg_controllen) {
205062306a36Sopenharmony_ci		err = sock_cmsg_send(sk, msg, &sockc);
205162306a36Sopenharmony_ci		if (unlikely(err))
205262306a36Sopenharmony_ci			goto out_unlock;
205362306a36Sopenharmony_ci	}
205462306a36Sopenharmony_ci
205562306a36Sopenharmony_ci	skb->protocol = proto;
205662306a36Sopenharmony_ci	skb->dev = dev;
205762306a36Sopenharmony_ci	skb->priority = READ_ONCE(sk->sk_priority);
205862306a36Sopenharmony_ci	skb->mark = READ_ONCE(sk->sk_mark);
205962306a36Sopenharmony_ci	skb->tstamp = sockc.transmit_time;
206062306a36Sopenharmony_ci
206162306a36Sopenharmony_ci	skb_setup_tx_timestamp(skb, sockc.tsflags);
206262306a36Sopenharmony_ci
206362306a36Sopenharmony_ci	if (unlikely(extra_len == 4))
206462306a36Sopenharmony_ci		skb->no_fcs = 1;
206562306a36Sopenharmony_ci
206662306a36Sopenharmony_ci	packet_parse_headers(skb, sock);
206762306a36Sopenharmony_ci
206862306a36Sopenharmony_ci	dev_queue_xmit(skb);
206962306a36Sopenharmony_ci	rcu_read_unlock();
207062306a36Sopenharmony_ci	return len;
207162306a36Sopenharmony_ci
207262306a36Sopenharmony_ciout_unlock:
207362306a36Sopenharmony_ci	rcu_read_unlock();
207462306a36Sopenharmony_ciout_free:
207562306a36Sopenharmony_ci	kfree_skb(skb);
207662306a36Sopenharmony_ci	return err;
207762306a36Sopenharmony_ci}
207862306a36Sopenharmony_ci
207962306a36Sopenharmony_cistatic unsigned int run_filter(struct sk_buff *skb,
208062306a36Sopenharmony_ci			       const struct sock *sk,
208162306a36Sopenharmony_ci			       unsigned int res)
208262306a36Sopenharmony_ci{
208362306a36Sopenharmony_ci	struct sk_filter *filter;
208462306a36Sopenharmony_ci
208562306a36Sopenharmony_ci	rcu_read_lock();
208662306a36Sopenharmony_ci	filter = rcu_dereference(sk->sk_filter);
208762306a36Sopenharmony_ci	if (filter != NULL)
208862306a36Sopenharmony_ci		res = bpf_prog_run_clear_cb(filter->prog, skb);
208962306a36Sopenharmony_ci	rcu_read_unlock();
209062306a36Sopenharmony_ci
209162306a36Sopenharmony_ci	return res;
209262306a36Sopenharmony_ci}
209362306a36Sopenharmony_ci
209462306a36Sopenharmony_cistatic int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
209562306a36Sopenharmony_ci			   size_t *len, int vnet_hdr_sz)
209662306a36Sopenharmony_ci{
209762306a36Sopenharmony_ci	struct virtio_net_hdr_mrg_rxbuf vnet_hdr = { .num_buffers = 0 };
209862306a36Sopenharmony_ci
209962306a36Sopenharmony_ci	if (*len < vnet_hdr_sz)
210062306a36Sopenharmony_ci		return -EINVAL;
210162306a36Sopenharmony_ci	*len -= vnet_hdr_sz;
210262306a36Sopenharmony_ci
210362306a36Sopenharmony_ci	if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, vio_le(), true, 0))
210462306a36Sopenharmony_ci		return -EINVAL;
210562306a36Sopenharmony_ci
210662306a36Sopenharmony_ci	return memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz);
210762306a36Sopenharmony_ci}
210862306a36Sopenharmony_ci
210962306a36Sopenharmony_ci/*
211062306a36Sopenharmony_ci * This function makes lazy skb cloning in hope that most of packets
211162306a36Sopenharmony_ci * are discarded by BPF.
211262306a36Sopenharmony_ci *
211362306a36Sopenharmony_ci * Note tricky part: we DO mangle shared skb! skb->data, skb->len
211462306a36Sopenharmony_ci * and skb->cb are mangled. It works because (and until) packets
211562306a36Sopenharmony_ci * falling here are owned by current CPU. Output packets are cloned
211662306a36Sopenharmony_ci * by dev_queue_xmit_nit(), input packets are processed by net_bh
211762306a36Sopenharmony_ci * sequentially, so that if we return skb to original state on exit,
211862306a36Sopenharmony_ci * we will not harm anyone.
211962306a36Sopenharmony_ci */
212062306a36Sopenharmony_ci
212162306a36Sopenharmony_cistatic int packet_rcv(struct sk_buff *skb, struct net_device *dev,
212262306a36Sopenharmony_ci		      struct packet_type *pt, struct net_device *orig_dev)
212362306a36Sopenharmony_ci{
212462306a36Sopenharmony_ci	struct sock *sk;
212562306a36Sopenharmony_ci	struct sockaddr_ll *sll;
212662306a36Sopenharmony_ci	struct packet_sock *po;
212762306a36Sopenharmony_ci	u8 *skb_head = skb->data;
212862306a36Sopenharmony_ci	int skb_len = skb->len;
212962306a36Sopenharmony_ci	unsigned int snaplen, res;
213062306a36Sopenharmony_ci	bool is_drop_n_account = false;
213162306a36Sopenharmony_ci
213262306a36Sopenharmony_ci	if (skb->pkt_type == PACKET_LOOPBACK)
213362306a36Sopenharmony_ci		goto drop;
213462306a36Sopenharmony_ci
213562306a36Sopenharmony_ci	sk = pt->af_packet_priv;
213662306a36Sopenharmony_ci	po = pkt_sk(sk);
213762306a36Sopenharmony_ci
213862306a36Sopenharmony_ci	if (!net_eq(dev_net(dev), sock_net(sk)))
213962306a36Sopenharmony_ci		goto drop;
214062306a36Sopenharmony_ci
214162306a36Sopenharmony_ci	skb->dev = dev;
214262306a36Sopenharmony_ci
214362306a36Sopenharmony_ci	if (dev_has_header(dev)) {
214462306a36Sopenharmony_ci		/* The device has an explicit notion of ll header,
214562306a36Sopenharmony_ci		 * exported to higher levels.
214662306a36Sopenharmony_ci		 *
214762306a36Sopenharmony_ci		 * Otherwise, the device hides details of its frame
214862306a36Sopenharmony_ci		 * structure, so that corresponding packet head is
214962306a36Sopenharmony_ci		 * never delivered to user.
215062306a36Sopenharmony_ci		 */
215162306a36Sopenharmony_ci		if (sk->sk_type != SOCK_DGRAM)
215262306a36Sopenharmony_ci			skb_push(skb, skb->data - skb_mac_header(skb));
215362306a36Sopenharmony_ci		else if (skb->pkt_type == PACKET_OUTGOING) {
215462306a36Sopenharmony_ci			/* Special case: outgoing packets have ll header at head */
215562306a36Sopenharmony_ci			skb_pull(skb, skb_network_offset(skb));
215662306a36Sopenharmony_ci		}
215762306a36Sopenharmony_ci	}
215862306a36Sopenharmony_ci
215962306a36Sopenharmony_ci	snaplen = skb->len;
216062306a36Sopenharmony_ci
216162306a36Sopenharmony_ci	res = run_filter(skb, sk, snaplen);
216262306a36Sopenharmony_ci	if (!res)
216362306a36Sopenharmony_ci		goto drop_n_restore;
216462306a36Sopenharmony_ci	if (snaplen > res)
216562306a36Sopenharmony_ci		snaplen = res;
216662306a36Sopenharmony_ci
216762306a36Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
216862306a36Sopenharmony_ci		goto drop_n_acct;
216962306a36Sopenharmony_ci
217062306a36Sopenharmony_ci	if (skb_shared(skb)) {
217162306a36Sopenharmony_ci		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
217262306a36Sopenharmony_ci		if (nskb == NULL)
217362306a36Sopenharmony_ci			goto drop_n_acct;
217462306a36Sopenharmony_ci
217562306a36Sopenharmony_ci		if (skb_head != skb->data) {
217662306a36Sopenharmony_ci			skb->data = skb_head;
217762306a36Sopenharmony_ci			skb->len = skb_len;
217862306a36Sopenharmony_ci		}
217962306a36Sopenharmony_ci		consume_skb(skb);
218062306a36Sopenharmony_ci		skb = nskb;
218162306a36Sopenharmony_ci	}
218262306a36Sopenharmony_ci
218362306a36Sopenharmony_ci	sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
218462306a36Sopenharmony_ci
218562306a36Sopenharmony_ci	sll = &PACKET_SKB_CB(skb)->sa.ll;
218662306a36Sopenharmony_ci	sll->sll_hatype = dev->type;
218762306a36Sopenharmony_ci	sll->sll_pkttype = skb->pkt_type;
218862306a36Sopenharmony_ci	if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
218962306a36Sopenharmony_ci		sll->sll_ifindex = orig_dev->ifindex;
219062306a36Sopenharmony_ci	else
219162306a36Sopenharmony_ci		sll->sll_ifindex = dev->ifindex;
219262306a36Sopenharmony_ci
219362306a36Sopenharmony_ci	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
219462306a36Sopenharmony_ci
219562306a36Sopenharmony_ci	/* sll->sll_family and sll->sll_protocol are set in packet_recvmsg().
219662306a36Sopenharmony_ci	 * Use their space for storing the original skb length.
219762306a36Sopenharmony_ci	 */
219862306a36Sopenharmony_ci	PACKET_SKB_CB(skb)->sa.origlen = skb->len;
219962306a36Sopenharmony_ci
220062306a36Sopenharmony_ci	if (pskb_trim(skb, snaplen))
220162306a36Sopenharmony_ci		goto drop_n_acct;
220262306a36Sopenharmony_ci
220362306a36Sopenharmony_ci	skb_set_owner_r(skb, sk);
220462306a36Sopenharmony_ci	skb->dev = NULL;
220562306a36Sopenharmony_ci	skb_dst_drop(skb);
220662306a36Sopenharmony_ci
220762306a36Sopenharmony_ci	/* drop conntrack reference */
220862306a36Sopenharmony_ci	nf_reset_ct(skb);
220962306a36Sopenharmony_ci
221062306a36Sopenharmony_ci	spin_lock(&sk->sk_receive_queue.lock);
221162306a36Sopenharmony_ci	po->stats.stats1.tp_packets++;
221262306a36Sopenharmony_ci	sock_skb_set_dropcount(sk, skb);
221362306a36Sopenharmony_ci	skb_clear_delivery_time(skb);
221462306a36Sopenharmony_ci	__skb_queue_tail(&sk->sk_receive_queue, skb);
221562306a36Sopenharmony_ci	spin_unlock(&sk->sk_receive_queue.lock);
221662306a36Sopenharmony_ci	sk->sk_data_ready(sk);
221762306a36Sopenharmony_ci	return 0;
221862306a36Sopenharmony_ci
221962306a36Sopenharmony_cidrop_n_acct:
222062306a36Sopenharmony_ci	is_drop_n_account = true;
222162306a36Sopenharmony_ci	atomic_inc(&po->tp_drops);
222262306a36Sopenharmony_ci	atomic_inc(&sk->sk_drops);
222362306a36Sopenharmony_ci
222462306a36Sopenharmony_cidrop_n_restore:
222562306a36Sopenharmony_ci	if (skb_head != skb->data && skb_shared(skb)) {
222662306a36Sopenharmony_ci		skb->data = skb_head;
222762306a36Sopenharmony_ci		skb->len = skb_len;
222862306a36Sopenharmony_ci	}
222962306a36Sopenharmony_cidrop:
223062306a36Sopenharmony_ci	if (!is_drop_n_account)
223162306a36Sopenharmony_ci		consume_skb(skb);
223262306a36Sopenharmony_ci	else
223362306a36Sopenharmony_ci		kfree_skb(skb);
223462306a36Sopenharmony_ci	return 0;
223562306a36Sopenharmony_ci}
223662306a36Sopenharmony_ci
223762306a36Sopenharmony_cistatic int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
223862306a36Sopenharmony_ci		       struct packet_type *pt, struct net_device *orig_dev)
223962306a36Sopenharmony_ci{
224062306a36Sopenharmony_ci	struct sock *sk;
224162306a36Sopenharmony_ci	struct packet_sock *po;
224262306a36Sopenharmony_ci	struct sockaddr_ll *sll;
224362306a36Sopenharmony_ci	union tpacket_uhdr h;
224462306a36Sopenharmony_ci	u8 *skb_head = skb->data;
224562306a36Sopenharmony_ci	int skb_len = skb->len;
224662306a36Sopenharmony_ci	unsigned int snaplen, res;
224762306a36Sopenharmony_ci	unsigned long status = TP_STATUS_USER;
224862306a36Sopenharmony_ci	unsigned short macoff, hdrlen;
224962306a36Sopenharmony_ci	unsigned int netoff;
225062306a36Sopenharmony_ci	struct sk_buff *copy_skb = NULL;
225162306a36Sopenharmony_ci	struct timespec64 ts;
225262306a36Sopenharmony_ci	__u32 ts_status;
225362306a36Sopenharmony_ci	bool is_drop_n_account = false;
225462306a36Sopenharmony_ci	unsigned int slot_id = 0;
225562306a36Sopenharmony_ci	int vnet_hdr_sz = 0;
225662306a36Sopenharmony_ci
225762306a36Sopenharmony_ci	/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
225862306a36Sopenharmony_ci	 * We may add members to them until current aligned size without forcing
225962306a36Sopenharmony_ci	 * userspace to call getsockopt(..., PACKET_HDRLEN, ...).
226062306a36Sopenharmony_ci	 */
226162306a36Sopenharmony_ci	BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
226262306a36Sopenharmony_ci	BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
226362306a36Sopenharmony_ci
226462306a36Sopenharmony_ci	if (skb->pkt_type == PACKET_LOOPBACK)
226562306a36Sopenharmony_ci		goto drop;
226662306a36Sopenharmony_ci
226762306a36Sopenharmony_ci	sk = pt->af_packet_priv;
226862306a36Sopenharmony_ci	po = pkt_sk(sk);
226962306a36Sopenharmony_ci
227062306a36Sopenharmony_ci	if (!net_eq(dev_net(dev), sock_net(sk)))
227162306a36Sopenharmony_ci		goto drop;
227262306a36Sopenharmony_ci
227362306a36Sopenharmony_ci	if (dev_has_header(dev)) {
227462306a36Sopenharmony_ci		if (sk->sk_type != SOCK_DGRAM)
227562306a36Sopenharmony_ci			skb_push(skb, skb->data - skb_mac_header(skb));
227662306a36Sopenharmony_ci		else if (skb->pkt_type == PACKET_OUTGOING) {
227762306a36Sopenharmony_ci			/* Special case: outgoing packets have ll header at head */
227862306a36Sopenharmony_ci			skb_pull(skb, skb_network_offset(skb));
227962306a36Sopenharmony_ci		}
228062306a36Sopenharmony_ci	}
228162306a36Sopenharmony_ci
228262306a36Sopenharmony_ci	snaplen = skb->len;
228362306a36Sopenharmony_ci
228462306a36Sopenharmony_ci	res = run_filter(skb, sk, snaplen);
228562306a36Sopenharmony_ci	if (!res)
228662306a36Sopenharmony_ci		goto drop_n_restore;
228762306a36Sopenharmony_ci
228862306a36Sopenharmony_ci	/* If we are flooded, just give up */
228962306a36Sopenharmony_ci	if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
229062306a36Sopenharmony_ci		atomic_inc(&po->tp_drops);
229162306a36Sopenharmony_ci		goto drop_n_restore;
229262306a36Sopenharmony_ci	}
229362306a36Sopenharmony_ci
229462306a36Sopenharmony_ci	if (skb->ip_summed == CHECKSUM_PARTIAL)
229562306a36Sopenharmony_ci		status |= TP_STATUS_CSUMNOTREADY;
229662306a36Sopenharmony_ci	else if (skb->pkt_type != PACKET_OUTGOING &&
229762306a36Sopenharmony_ci		 skb_csum_unnecessary(skb))
229862306a36Sopenharmony_ci		status |= TP_STATUS_CSUM_VALID;
229962306a36Sopenharmony_ci	if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
230062306a36Sopenharmony_ci		status |= TP_STATUS_GSO_TCP;
230162306a36Sopenharmony_ci
230262306a36Sopenharmony_ci	if (snaplen > res)
230362306a36Sopenharmony_ci		snaplen = res;
230462306a36Sopenharmony_ci
230562306a36Sopenharmony_ci	if (sk->sk_type == SOCK_DGRAM) {
230662306a36Sopenharmony_ci		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
230762306a36Sopenharmony_ci				  po->tp_reserve;
230862306a36Sopenharmony_ci	} else {
230962306a36Sopenharmony_ci		unsigned int maclen = skb_network_offset(skb);
231062306a36Sopenharmony_ci		netoff = TPACKET_ALIGN(po->tp_hdrlen +
231162306a36Sopenharmony_ci				       (maclen < 16 ? 16 : maclen)) +
231262306a36Sopenharmony_ci				       po->tp_reserve;
231362306a36Sopenharmony_ci		vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
231462306a36Sopenharmony_ci		if (vnet_hdr_sz)
231562306a36Sopenharmony_ci			netoff += vnet_hdr_sz;
231662306a36Sopenharmony_ci		macoff = netoff - maclen;
231762306a36Sopenharmony_ci	}
231862306a36Sopenharmony_ci	if (netoff > USHRT_MAX) {
231962306a36Sopenharmony_ci		atomic_inc(&po->tp_drops);
232062306a36Sopenharmony_ci		goto drop_n_restore;
232162306a36Sopenharmony_ci	}
232262306a36Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
232362306a36Sopenharmony_ci		if (macoff + snaplen > po->rx_ring.frame_size) {
232462306a36Sopenharmony_ci			if (po->copy_thresh &&
232562306a36Sopenharmony_ci			    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
232662306a36Sopenharmony_ci				if (skb_shared(skb)) {
232762306a36Sopenharmony_ci					copy_skb = skb_clone(skb, GFP_ATOMIC);
232862306a36Sopenharmony_ci				} else {
232962306a36Sopenharmony_ci					copy_skb = skb_get(skb);
233062306a36Sopenharmony_ci					skb_head = skb->data;
233162306a36Sopenharmony_ci				}
233262306a36Sopenharmony_ci				if (copy_skb) {
233362306a36Sopenharmony_ci					memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
233462306a36Sopenharmony_ci					       sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
233562306a36Sopenharmony_ci					skb_set_owner_r(copy_skb, sk);
233662306a36Sopenharmony_ci				}
233762306a36Sopenharmony_ci			}
233862306a36Sopenharmony_ci			snaplen = po->rx_ring.frame_size - macoff;
233962306a36Sopenharmony_ci			if ((int)snaplen < 0) {
234062306a36Sopenharmony_ci				snaplen = 0;
234162306a36Sopenharmony_ci				vnet_hdr_sz = 0;
234262306a36Sopenharmony_ci			}
234362306a36Sopenharmony_ci		}
234462306a36Sopenharmony_ci	} else if (unlikely(macoff + snaplen >
234562306a36Sopenharmony_ci			    GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
234662306a36Sopenharmony_ci		u32 nval;
234762306a36Sopenharmony_ci
234862306a36Sopenharmony_ci		nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
234962306a36Sopenharmony_ci		pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
235062306a36Sopenharmony_ci			    snaplen, nval, macoff);
235162306a36Sopenharmony_ci		snaplen = nval;
235262306a36Sopenharmony_ci		if (unlikely((int)snaplen < 0)) {
235362306a36Sopenharmony_ci			snaplen = 0;
235462306a36Sopenharmony_ci			macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
235562306a36Sopenharmony_ci			vnet_hdr_sz = 0;
235662306a36Sopenharmony_ci		}
235762306a36Sopenharmony_ci	}
235862306a36Sopenharmony_ci	spin_lock(&sk->sk_receive_queue.lock);
235962306a36Sopenharmony_ci	h.raw = packet_current_rx_frame(po, skb,
236062306a36Sopenharmony_ci					TP_STATUS_KERNEL, (macoff+snaplen));
236162306a36Sopenharmony_ci	if (!h.raw)
236262306a36Sopenharmony_ci		goto drop_n_account;
236362306a36Sopenharmony_ci
236462306a36Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
236562306a36Sopenharmony_ci		slot_id = po->rx_ring.head;
236662306a36Sopenharmony_ci		if (test_bit(slot_id, po->rx_ring.rx_owner_map))
236762306a36Sopenharmony_ci			goto drop_n_account;
236862306a36Sopenharmony_ci		__set_bit(slot_id, po->rx_ring.rx_owner_map);
236962306a36Sopenharmony_ci	}
237062306a36Sopenharmony_ci
237162306a36Sopenharmony_ci	if (vnet_hdr_sz &&
237262306a36Sopenharmony_ci	    virtio_net_hdr_from_skb(skb, h.raw + macoff -
237362306a36Sopenharmony_ci				    sizeof(struct virtio_net_hdr),
237462306a36Sopenharmony_ci				    vio_le(), true, 0)) {
237562306a36Sopenharmony_ci		if (po->tp_version == TPACKET_V3)
237662306a36Sopenharmony_ci			prb_clear_blk_fill_status(&po->rx_ring);
237762306a36Sopenharmony_ci		goto drop_n_account;
237862306a36Sopenharmony_ci	}
237962306a36Sopenharmony_ci
238062306a36Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
238162306a36Sopenharmony_ci		packet_increment_rx_head(po, &po->rx_ring);
238262306a36Sopenharmony_ci	/*
238362306a36Sopenharmony_ci	 * LOSING will be reported till you read the stats,
238462306a36Sopenharmony_ci	 * because it's COR - Clear On Read.
238562306a36Sopenharmony_ci	 * Anyways, moving it for V1/V2 only as V3 doesn't need this
238662306a36Sopenharmony_ci	 * at packet level.
238762306a36Sopenharmony_ci	 */
238862306a36Sopenharmony_ci		if (atomic_read(&po->tp_drops))
238962306a36Sopenharmony_ci			status |= TP_STATUS_LOSING;
239062306a36Sopenharmony_ci	}
239162306a36Sopenharmony_ci
239262306a36Sopenharmony_ci	po->stats.stats1.tp_packets++;
239362306a36Sopenharmony_ci	if (copy_skb) {
239462306a36Sopenharmony_ci		status |= TP_STATUS_COPY;
239562306a36Sopenharmony_ci		skb_clear_delivery_time(copy_skb);
239662306a36Sopenharmony_ci		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
239762306a36Sopenharmony_ci	}
239862306a36Sopenharmony_ci	spin_unlock(&sk->sk_receive_queue.lock);
239962306a36Sopenharmony_ci
240062306a36Sopenharmony_ci	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
240162306a36Sopenharmony_ci
240262306a36Sopenharmony_ci	/* Always timestamp; prefer an existing software timestamp taken
240362306a36Sopenharmony_ci	 * closer to the time of capture.
240462306a36Sopenharmony_ci	 */
240562306a36Sopenharmony_ci	ts_status = tpacket_get_timestamp(skb, &ts,
240662306a36Sopenharmony_ci					  READ_ONCE(po->tp_tstamp) |
240762306a36Sopenharmony_ci					  SOF_TIMESTAMPING_SOFTWARE);
240862306a36Sopenharmony_ci	if (!ts_status)
240962306a36Sopenharmony_ci		ktime_get_real_ts64(&ts);
241062306a36Sopenharmony_ci
241162306a36Sopenharmony_ci	status |= ts_status;
241262306a36Sopenharmony_ci
241362306a36Sopenharmony_ci	switch (po->tp_version) {
241462306a36Sopenharmony_ci	case TPACKET_V1:
241562306a36Sopenharmony_ci		h.h1->tp_len = skb->len;
241662306a36Sopenharmony_ci		h.h1->tp_snaplen = snaplen;
241762306a36Sopenharmony_ci		h.h1->tp_mac = macoff;
241862306a36Sopenharmony_ci		h.h1->tp_net = netoff;
241962306a36Sopenharmony_ci		h.h1->tp_sec = ts.tv_sec;
242062306a36Sopenharmony_ci		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
242162306a36Sopenharmony_ci		hdrlen = sizeof(*h.h1);
242262306a36Sopenharmony_ci		break;
242362306a36Sopenharmony_ci	case TPACKET_V2:
242462306a36Sopenharmony_ci		h.h2->tp_len = skb->len;
242562306a36Sopenharmony_ci		h.h2->tp_snaplen = snaplen;
242662306a36Sopenharmony_ci		h.h2->tp_mac = macoff;
242762306a36Sopenharmony_ci		h.h2->tp_net = netoff;
242862306a36Sopenharmony_ci		h.h2->tp_sec = ts.tv_sec;
242962306a36Sopenharmony_ci		h.h2->tp_nsec = ts.tv_nsec;
243062306a36Sopenharmony_ci		if (skb_vlan_tag_present(skb)) {
243162306a36Sopenharmony_ci			h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
243262306a36Sopenharmony_ci			h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
243362306a36Sopenharmony_ci			status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
243462306a36Sopenharmony_ci		} else {
243562306a36Sopenharmony_ci			h.h2->tp_vlan_tci = 0;
243662306a36Sopenharmony_ci			h.h2->tp_vlan_tpid = 0;
243762306a36Sopenharmony_ci		}
243862306a36Sopenharmony_ci		memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
243962306a36Sopenharmony_ci		hdrlen = sizeof(*h.h2);
244062306a36Sopenharmony_ci		break;
244162306a36Sopenharmony_ci	case TPACKET_V3:
244262306a36Sopenharmony_ci		/* tp_nxt_offset,vlan are already populated above.
244362306a36Sopenharmony_ci		 * So DONT clear those fields here
244462306a36Sopenharmony_ci		 */
244562306a36Sopenharmony_ci		h.h3->tp_status |= status;
244662306a36Sopenharmony_ci		h.h3->tp_len = skb->len;
244762306a36Sopenharmony_ci		h.h3->tp_snaplen = snaplen;
244862306a36Sopenharmony_ci		h.h3->tp_mac = macoff;
244962306a36Sopenharmony_ci		h.h3->tp_net = netoff;
245062306a36Sopenharmony_ci		h.h3->tp_sec  = ts.tv_sec;
245162306a36Sopenharmony_ci		h.h3->tp_nsec = ts.tv_nsec;
245262306a36Sopenharmony_ci		memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
245362306a36Sopenharmony_ci		hdrlen = sizeof(*h.h3);
245462306a36Sopenharmony_ci		break;
245562306a36Sopenharmony_ci	default:
245662306a36Sopenharmony_ci		BUG();
245762306a36Sopenharmony_ci	}
245862306a36Sopenharmony_ci
245962306a36Sopenharmony_ci	sll = h.raw + TPACKET_ALIGN(hdrlen);
246062306a36Sopenharmony_ci	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
246162306a36Sopenharmony_ci	sll->sll_family = AF_PACKET;
246262306a36Sopenharmony_ci	sll->sll_hatype = dev->type;
246362306a36Sopenharmony_ci	sll->sll_protocol = skb->protocol;
246462306a36Sopenharmony_ci	sll->sll_pkttype = skb->pkt_type;
246562306a36Sopenharmony_ci	if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
246662306a36Sopenharmony_ci		sll->sll_ifindex = orig_dev->ifindex;
246762306a36Sopenharmony_ci	else
246862306a36Sopenharmony_ci		sll->sll_ifindex = dev->ifindex;
246962306a36Sopenharmony_ci
247062306a36Sopenharmony_ci	smp_mb();
247162306a36Sopenharmony_ci
247262306a36Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
247362306a36Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
247462306a36Sopenharmony_ci		u8 *start, *end;
247562306a36Sopenharmony_ci
247662306a36Sopenharmony_ci		end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
247762306a36Sopenharmony_ci					macoff + snaplen);
247862306a36Sopenharmony_ci
247962306a36Sopenharmony_ci		for (start = h.raw; start < end; start += PAGE_SIZE)
248062306a36Sopenharmony_ci			flush_dcache_page(pgv_to_page(start));
248162306a36Sopenharmony_ci	}
248262306a36Sopenharmony_ci	smp_wmb();
248362306a36Sopenharmony_ci#endif
248462306a36Sopenharmony_ci
248562306a36Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
248662306a36Sopenharmony_ci		spin_lock(&sk->sk_receive_queue.lock);
248762306a36Sopenharmony_ci		__packet_set_status(po, h.raw, status);
248862306a36Sopenharmony_ci		__clear_bit(slot_id, po->rx_ring.rx_owner_map);
248962306a36Sopenharmony_ci		spin_unlock(&sk->sk_receive_queue.lock);
249062306a36Sopenharmony_ci		sk->sk_data_ready(sk);
249162306a36Sopenharmony_ci	} else if (po->tp_version == TPACKET_V3) {
249262306a36Sopenharmony_ci		prb_clear_blk_fill_status(&po->rx_ring);
249362306a36Sopenharmony_ci	}
249462306a36Sopenharmony_ci
249562306a36Sopenharmony_cidrop_n_restore:
249662306a36Sopenharmony_ci	if (skb_head != skb->data && skb_shared(skb)) {
249762306a36Sopenharmony_ci		skb->data = skb_head;
249862306a36Sopenharmony_ci		skb->len = skb_len;
249962306a36Sopenharmony_ci	}
250062306a36Sopenharmony_cidrop:
250162306a36Sopenharmony_ci	if (!is_drop_n_account)
250262306a36Sopenharmony_ci		consume_skb(skb);
250362306a36Sopenharmony_ci	else
250462306a36Sopenharmony_ci		kfree_skb(skb);
250562306a36Sopenharmony_ci	return 0;
250662306a36Sopenharmony_ci
250762306a36Sopenharmony_cidrop_n_account:
250862306a36Sopenharmony_ci	spin_unlock(&sk->sk_receive_queue.lock);
250962306a36Sopenharmony_ci	atomic_inc(&po->tp_drops);
251062306a36Sopenharmony_ci	is_drop_n_account = true;
251162306a36Sopenharmony_ci
251262306a36Sopenharmony_ci	sk->sk_data_ready(sk);
251362306a36Sopenharmony_ci	kfree_skb(copy_skb);
251462306a36Sopenharmony_ci	goto drop_n_restore;
251562306a36Sopenharmony_ci}
251662306a36Sopenharmony_ci
251762306a36Sopenharmony_cistatic void tpacket_destruct_skb(struct sk_buff *skb)
251862306a36Sopenharmony_ci{
251962306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(skb->sk);
252062306a36Sopenharmony_ci
252162306a36Sopenharmony_ci	if (likely(po->tx_ring.pg_vec)) {
252262306a36Sopenharmony_ci		void *ph;
252362306a36Sopenharmony_ci		__u32 ts;
252462306a36Sopenharmony_ci
252562306a36Sopenharmony_ci		ph = skb_zcopy_get_nouarg(skb);
252662306a36Sopenharmony_ci		packet_dec_pending(&po->tx_ring);
252762306a36Sopenharmony_ci
252862306a36Sopenharmony_ci		ts = __packet_set_timestamp(po, ph, skb);
252962306a36Sopenharmony_ci		__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
253062306a36Sopenharmony_ci
253162306a36Sopenharmony_ci		if (!packet_read_pending(&po->tx_ring))
253262306a36Sopenharmony_ci			complete(&po->skb_completion);
253362306a36Sopenharmony_ci	}
253462306a36Sopenharmony_ci
253562306a36Sopenharmony_ci	sock_wfree(skb);
253662306a36Sopenharmony_ci}
253762306a36Sopenharmony_ci
253862306a36Sopenharmony_cistatic int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
253962306a36Sopenharmony_ci{
254062306a36Sopenharmony_ci	if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
254162306a36Sopenharmony_ci	    (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
254262306a36Sopenharmony_ci	     __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
254362306a36Sopenharmony_ci	      __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
254462306a36Sopenharmony_ci		vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
254562306a36Sopenharmony_ci			 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
254662306a36Sopenharmony_ci			__virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
254762306a36Sopenharmony_ci
254862306a36Sopenharmony_ci	if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
254962306a36Sopenharmony_ci		return -EINVAL;
255062306a36Sopenharmony_ci
255162306a36Sopenharmony_ci	return 0;
255262306a36Sopenharmony_ci}
255362306a36Sopenharmony_ci
255462306a36Sopenharmony_cistatic int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
255562306a36Sopenharmony_ci				 struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz)
255662306a36Sopenharmony_ci{
255762306a36Sopenharmony_ci	int ret;
255862306a36Sopenharmony_ci
255962306a36Sopenharmony_ci	if (*len < vnet_hdr_sz)
256062306a36Sopenharmony_ci		return -EINVAL;
256162306a36Sopenharmony_ci	*len -= vnet_hdr_sz;
256262306a36Sopenharmony_ci
256362306a36Sopenharmony_ci	if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
256462306a36Sopenharmony_ci		return -EFAULT;
256562306a36Sopenharmony_ci
256662306a36Sopenharmony_ci	ret = __packet_snd_vnet_parse(vnet_hdr, *len);
256762306a36Sopenharmony_ci	if (ret)
256862306a36Sopenharmony_ci		return ret;
256962306a36Sopenharmony_ci
257062306a36Sopenharmony_ci	/* move iter to point to the start of mac header */
257162306a36Sopenharmony_ci	if (vnet_hdr_sz != sizeof(struct virtio_net_hdr))
257262306a36Sopenharmony_ci		iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr));
257362306a36Sopenharmony_ci
257462306a36Sopenharmony_ci	return 0;
257562306a36Sopenharmony_ci}
257662306a36Sopenharmony_ci
257762306a36Sopenharmony_cistatic int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
257862306a36Sopenharmony_ci		void *frame, struct net_device *dev, void *data, int tp_len,
257962306a36Sopenharmony_ci		__be16 proto, unsigned char *addr, int hlen, int copylen,
258062306a36Sopenharmony_ci		const struct sockcm_cookie *sockc)
258162306a36Sopenharmony_ci{
258262306a36Sopenharmony_ci	union tpacket_uhdr ph;
258362306a36Sopenharmony_ci	int to_write, offset, len, nr_frags, len_max;
258462306a36Sopenharmony_ci	struct socket *sock = po->sk.sk_socket;
258562306a36Sopenharmony_ci	struct page *page;
258662306a36Sopenharmony_ci	int err;
258762306a36Sopenharmony_ci
258862306a36Sopenharmony_ci	ph.raw = frame;
258962306a36Sopenharmony_ci
259062306a36Sopenharmony_ci	skb->protocol = proto;
259162306a36Sopenharmony_ci	skb->dev = dev;
259262306a36Sopenharmony_ci	skb->priority = READ_ONCE(po->sk.sk_priority);
259362306a36Sopenharmony_ci	skb->mark = READ_ONCE(po->sk.sk_mark);
259462306a36Sopenharmony_ci	skb->tstamp = sockc->transmit_time;
259562306a36Sopenharmony_ci	skb_setup_tx_timestamp(skb, sockc->tsflags);
259662306a36Sopenharmony_ci	skb_zcopy_set_nouarg(skb, ph.raw);
259762306a36Sopenharmony_ci
259862306a36Sopenharmony_ci	skb_reserve(skb, hlen);
259962306a36Sopenharmony_ci	skb_reset_network_header(skb);
260062306a36Sopenharmony_ci
260162306a36Sopenharmony_ci	to_write = tp_len;
260262306a36Sopenharmony_ci
260362306a36Sopenharmony_ci	if (sock->type == SOCK_DGRAM) {
260462306a36Sopenharmony_ci		err = dev_hard_header(skb, dev, ntohs(proto), addr,
260562306a36Sopenharmony_ci				NULL, tp_len);
260662306a36Sopenharmony_ci		if (unlikely(err < 0))
260762306a36Sopenharmony_ci			return -EINVAL;
260862306a36Sopenharmony_ci	} else if (copylen) {
260962306a36Sopenharmony_ci		int hdrlen = min_t(int, copylen, tp_len);
261062306a36Sopenharmony_ci
261162306a36Sopenharmony_ci		skb_push(skb, dev->hard_header_len);
261262306a36Sopenharmony_ci		skb_put(skb, copylen - dev->hard_header_len);
261362306a36Sopenharmony_ci		err = skb_store_bits(skb, 0, data, hdrlen);
261462306a36Sopenharmony_ci		if (unlikely(err))
261562306a36Sopenharmony_ci			return err;
261662306a36Sopenharmony_ci		if (!dev_validate_header(dev, skb->data, hdrlen))
261762306a36Sopenharmony_ci			return -EINVAL;
261862306a36Sopenharmony_ci
261962306a36Sopenharmony_ci		data += hdrlen;
262062306a36Sopenharmony_ci		to_write -= hdrlen;
262162306a36Sopenharmony_ci	}
262262306a36Sopenharmony_ci
262362306a36Sopenharmony_ci	offset = offset_in_page(data);
262462306a36Sopenharmony_ci	len_max = PAGE_SIZE - offset;
262562306a36Sopenharmony_ci	len = ((to_write > len_max) ? len_max : to_write);
262662306a36Sopenharmony_ci
262762306a36Sopenharmony_ci	skb->data_len = to_write;
262862306a36Sopenharmony_ci	skb->len += to_write;
262962306a36Sopenharmony_ci	skb->truesize += to_write;
263062306a36Sopenharmony_ci	refcount_add(to_write, &po->sk.sk_wmem_alloc);
263162306a36Sopenharmony_ci
263262306a36Sopenharmony_ci	while (likely(to_write)) {
263362306a36Sopenharmony_ci		nr_frags = skb_shinfo(skb)->nr_frags;
263462306a36Sopenharmony_ci
263562306a36Sopenharmony_ci		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
263662306a36Sopenharmony_ci			pr_err("Packet exceed the number of skb frags(%u)\n",
263762306a36Sopenharmony_ci			       (unsigned int)MAX_SKB_FRAGS);
263862306a36Sopenharmony_ci			return -EFAULT;
263962306a36Sopenharmony_ci		}
264062306a36Sopenharmony_ci
264162306a36Sopenharmony_ci		page = pgv_to_page(data);
264262306a36Sopenharmony_ci		data += len;
264362306a36Sopenharmony_ci		flush_dcache_page(page);
264462306a36Sopenharmony_ci		get_page(page);
264562306a36Sopenharmony_ci		skb_fill_page_desc(skb, nr_frags, page, offset, len);
264662306a36Sopenharmony_ci		to_write -= len;
264762306a36Sopenharmony_ci		offset = 0;
264862306a36Sopenharmony_ci		len_max = PAGE_SIZE;
264962306a36Sopenharmony_ci		len = ((to_write > len_max) ? len_max : to_write);
265062306a36Sopenharmony_ci	}
265162306a36Sopenharmony_ci
265262306a36Sopenharmony_ci	packet_parse_headers(skb, sock);
265362306a36Sopenharmony_ci
265462306a36Sopenharmony_ci	return tp_len;
265562306a36Sopenharmony_ci}
265662306a36Sopenharmony_ci
265762306a36Sopenharmony_cistatic int tpacket_parse_header(struct packet_sock *po, void *frame,
265862306a36Sopenharmony_ci				int size_max, void **data)
265962306a36Sopenharmony_ci{
266062306a36Sopenharmony_ci	union tpacket_uhdr ph;
266162306a36Sopenharmony_ci	int tp_len, off;
266262306a36Sopenharmony_ci
266362306a36Sopenharmony_ci	ph.raw = frame;
266462306a36Sopenharmony_ci
266562306a36Sopenharmony_ci	switch (po->tp_version) {
266662306a36Sopenharmony_ci	case TPACKET_V3:
266762306a36Sopenharmony_ci		if (ph.h3->tp_next_offset != 0) {
266862306a36Sopenharmony_ci			pr_warn_once("variable sized slot not supported");
266962306a36Sopenharmony_ci			return -EINVAL;
267062306a36Sopenharmony_ci		}
267162306a36Sopenharmony_ci		tp_len = ph.h3->tp_len;
267262306a36Sopenharmony_ci		break;
267362306a36Sopenharmony_ci	case TPACKET_V2:
267462306a36Sopenharmony_ci		tp_len = ph.h2->tp_len;
267562306a36Sopenharmony_ci		break;
267662306a36Sopenharmony_ci	default:
267762306a36Sopenharmony_ci		tp_len = ph.h1->tp_len;
267862306a36Sopenharmony_ci		break;
267962306a36Sopenharmony_ci	}
268062306a36Sopenharmony_ci	if (unlikely(tp_len > size_max)) {
268162306a36Sopenharmony_ci		pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
268262306a36Sopenharmony_ci		return -EMSGSIZE;
268362306a36Sopenharmony_ci	}
268462306a36Sopenharmony_ci
268562306a36Sopenharmony_ci	if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) {
268662306a36Sopenharmony_ci		int off_min, off_max;
268762306a36Sopenharmony_ci
268862306a36Sopenharmony_ci		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
268962306a36Sopenharmony_ci		off_max = po->tx_ring.frame_size - tp_len;
269062306a36Sopenharmony_ci		if (po->sk.sk_type == SOCK_DGRAM) {
269162306a36Sopenharmony_ci			switch (po->tp_version) {
269262306a36Sopenharmony_ci			case TPACKET_V3:
269362306a36Sopenharmony_ci				off = ph.h3->tp_net;
269462306a36Sopenharmony_ci				break;
269562306a36Sopenharmony_ci			case TPACKET_V2:
269662306a36Sopenharmony_ci				off = ph.h2->tp_net;
269762306a36Sopenharmony_ci				break;
269862306a36Sopenharmony_ci			default:
269962306a36Sopenharmony_ci				off = ph.h1->tp_net;
270062306a36Sopenharmony_ci				break;
270162306a36Sopenharmony_ci			}
270262306a36Sopenharmony_ci		} else {
270362306a36Sopenharmony_ci			switch (po->tp_version) {
270462306a36Sopenharmony_ci			case TPACKET_V3:
270562306a36Sopenharmony_ci				off = ph.h3->tp_mac;
270662306a36Sopenharmony_ci				break;
270762306a36Sopenharmony_ci			case TPACKET_V2:
270862306a36Sopenharmony_ci				off = ph.h2->tp_mac;
270962306a36Sopenharmony_ci				break;
271062306a36Sopenharmony_ci			default:
271162306a36Sopenharmony_ci				off = ph.h1->tp_mac;
271262306a36Sopenharmony_ci				break;
271362306a36Sopenharmony_ci			}
271462306a36Sopenharmony_ci		}
271562306a36Sopenharmony_ci		if (unlikely((off < off_min) || (off_max < off)))
271662306a36Sopenharmony_ci			return -EINVAL;
271762306a36Sopenharmony_ci	} else {
271862306a36Sopenharmony_ci		off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
271962306a36Sopenharmony_ci	}
272062306a36Sopenharmony_ci
272162306a36Sopenharmony_ci	*data = frame + off;
272262306a36Sopenharmony_ci	return tp_len;
272362306a36Sopenharmony_ci}
272462306a36Sopenharmony_ci
272562306a36Sopenharmony_cistatic int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
272662306a36Sopenharmony_ci{
272762306a36Sopenharmony_ci	struct sk_buff *skb = NULL;
272862306a36Sopenharmony_ci	struct net_device *dev;
272962306a36Sopenharmony_ci	struct virtio_net_hdr *vnet_hdr = NULL;
273062306a36Sopenharmony_ci	struct sockcm_cookie sockc;
273162306a36Sopenharmony_ci	__be16 proto;
273262306a36Sopenharmony_ci	int err, reserve = 0;
273362306a36Sopenharmony_ci	void *ph;
273462306a36Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
273562306a36Sopenharmony_ci	bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
273662306a36Sopenharmony_ci	int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
273762306a36Sopenharmony_ci	unsigned char *addr = NULL;
273862306a36Sopenharmony_ci	int tp_len, size_max;
273962306a36Sopenharmony_ci	void *data;
274062306a36Sopenharmony_ci	int len_sum = 0;
274162306a36Sopenharmony_ci	int status = TP_STATUS_AVAILABLE;
274262306a36Sopenharmony_ci	int hlen, tlen, copylen = 0;
274362306a36Sopenharmony_ci	long timeo = 0;
274462306a36Sopenharmony_ci
274562306a36Sopenharmony_ci	mutex_lock(&po->pg_vec_lock);
274662306a36Sopenharmony_ci
274762306a36Sopenharmony_ci	/* packet_sendmsg() check on tx_ring.pg_vec was lockless,
274862306a36Sopenharmony_ci	 * we need to confirm it under protection of pg_vec_lock.
274962306a36Sopenharmony_ci	 */
275062306a36Sopenharmony_ci	if (unlikely(!po->tx_ring.pg_vec)) {
275162306a36Sopenharmony_ci		err = -EBUSY;
275262306a36Sopenharmony_ci		goto out;
275362306a36Sopenharmony_ci	}
275462306a36Sopenharmony_ci	if (likely(saddr == NULL)) {
275562306a36Sopenharmony_ci		dev	= packet_cached_dev_get(po);
275662306a36Sopenharmony_ci		proto	= READ_ONCE(po->num);
275762306a36Sopenharmony_ci	} else {
275862306a36Sopenharmony_ci		err = -EINVAL;
275962306a36Sopenharmony_ci		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
276062306a36Sopenharmony_ci			goto out;
276162306a36Sopenharmony_ci		if (msg->msg_namelen < (saddr->sll_halen
276262306a36Sopenharmony_ci					+ offsetof(struct sockaddr_ll,
276362306a36Sopenharmony_ci						sll_addr)))
276462306a36Sopenharmony_ci			goto out;
276562306a36Sopenharmony_ci		proto	= saddr->sll_protocol;
276662306a36Sopenharmony_ci		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
276762306a36Sopenharmony_ci		if (po->sk.sk_socket->type == SOCK_DGRAM) {
276862306a36Sopenharmony_ci			if (dev && msg->msg_namelen < dev->addr_len +
276962306a36Sopenharmony_ci				   offsetof(struct sockaddr_ll, sll_addr))
277062306a36Sopenharmony_ci				goto out_put;
277162306a36Sopenharmony_ci			addr = saddr->sll_addr;
277262306a36Sopenharmony_ci		}
277362306a36Sopenharmony_ci	}
277462306a36Sopenharmony_ci
277562306a36Sopenharmony_ci	err = -ENXIO;
277662306a36Sopenharmony_ci	if (unlikely(dev == NULL))
277762306a36Sopenharmony_ci		goto out;
277862306a36Sopenharmony_ci	err = -ENETDOWN;
277962306a36Sopenharmony_ci	if (unlikely(!(dev->flags & IFF_UP)))
278062306a36Sopenharmony_ci		goto out_put;
278162306a36Sopenharmony_ci
278262306a36Sopenharmony_ci	sockcm_init(&sockc, &po->sk);
278362306a36Sopenharmony_ci	if (msg->msg_controllen) {
278462306a36Sopenharmony_ci		err = sock_cmsg_send(&po->sk, msg, &sockc);
278562306a36Sopenharmony_ci		if (unlikely(err))
278662306a36Sopenharmony_ci			goto out_put;
278762306a36Sopenharmony_ci	}
278862306a36Sopenharmony_ci
278962306a36Sopenharmony_ci	if (po->sk.sk_socket->type == SOCK_RAW)
279062306a36Sopenharmony_ci		reserve = dev->hard_header_len;
279162306a36Sopenharmony_ci	size_max = po->tx_ring.frame_size
279262306a36Sopenharmony_ci		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
279362306a36Sopenharmony_ci
279462306a36Sopenharmony_ci	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz)
279562306a36Sopenharmony_ci		size_max = dev->mtu + reserve + VLAN_HLEN;
279662306a36Sopenharmony_ci
279762306a36Sopenharmony_ci	reinit_completion(&po->skb_completion);
279862306a36Sopenharmony_ci
279962306a36Sopenharmony_ci	do {
280062306a36Sopenharmony_ci		ph = packet_current_frame(po, &po->tx_ring,
280162306a36Sopenharmony_ci					  TP_STATUS_SEND_REQUEST);
280262306a36Sopenharmony_ci		if (unlikely(ph == NULL)) {
280362306a36Sopenharmony_ci			if (need_wait && skb) {
280462306a36Sopenharmony_ci				timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
280562306a36Sopenharmony_ci				timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
280662306a36Sopenharmony_ci				if (timeo <= 0) {
280762306a36Sopenharmony_ci					err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
280862306a36Sopenharmony_ci					goto out_put;
280962306a36Sopenharmony_ci				}
281062306a36Sopenharmony_ci			}
281162306a36Sopenharmony_ci			/* check for additional frames */
281262306a36Sopenharmony_ci			continue;
281362306a36Sopenharmony_ci		}
281462306a36Sopenharmony_ci
281562306a36Sopenharmony_ci		skb = NULL;
281662306a36Sopenharmony_ci		tp_len = tpacket_parse_header(po, ph, size_max, &data);
281762306a36Sopenharmony_ci		if (tp_len < 0)
281862306a36Sopenharmony_ci			goto tpacket_error;
281962306a36Sopenharmony_ci
282062306a36Sopenharmony_ci		status = TP_STATUS_SEND_REQUEST;
282162306a36Sopenharmony_ci		hlen = LL_RESERVED_SPACE(dev);
282262306a36Sopenharmony_ci		tlen = dev->needed_tailroom;
282362306a36Sopenharmony_ci		if (vnet_hdr_sz) {
282462306a36Sopenharmony_ci			vnet_hdr = data;
282562306a36Sopenharmony_ci			data += vnet_hdr_sz;
282662306a36Sopenharmony_ci			tp_len -= vnet_hdr_sz;
282762306a36Sopenharmony_ci			if (tp_len < 0 ||
282862306a36Sopenharmony_ci			    __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
282962306a36Sopenharmony_ci				tp_len = -EINVAL;
283062306a36Sopenharmony_ci				goto tpacket_error;
283162306a36Sopenharmony_ci			}
283262306a36Sopenharmony_ci			copylen = __virtio16_to_cpu(vio_le(),
283362306a36Sopenharmony_ci						    vnet_hdr->hdr_len);
283462306a36Sopenharmony_ci		}
283562306a36Sopenharmony_ci		copylen = max_t(int, copylen, dev->hard_header_len);
283662306a36Sopenharmony_ci		skb = sock_alloc_send_skb(&po->sk,
283762306a36Sopenharmony_ci				hlen + tlen + sizeof(struct sockaddr_ll) +
283862306a36Sopenharmony_ci				(copylen - dev->hard_header_len),
283962306a36Sopenharmony_ci				!need_wait, &err);
284062306a36Sopenharmony_ci
284162306a36Sopenharmony_ci		if (unlikely(skb == NULL)) {
284262306a36Sopenharmony_ci			/* we assume the socket was initially writeable ... */
284362306a36Sopenharmony_ci			if (likely(len_sum > 0))
284462306a36Sopenharmony_ci				err = len_sum;
284562306a36Sopenharmony_ci			goto out_status;
284662306a36Sopenharmony_ci		}
284762306a36Sopenharmony_ci		tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
284862306a36Sopenharmony_ci					  addr, hlen, copylen, &sockc);
284962306a36Sopenharmony_ci		if (likely(tp_len >= 0) &&
285062306a36Sopenharmony_ci		    tp_len > dev->mtu + reserve &&
285162306a36Sopenharmony_ci		    !vnet_hdr_sz &&
285262306a36Sopenharmony_ci		    !packet_extra_vlan_len_allowed(dev, skb))
285362306a36Sopenharmony_ci			tp_len = -EMSGSIZE;
285462306a36Sopenharmony_ci
285562306a36Sopenharmony_ci		if (unlikely(tp_len < 0)) {
285662306a36Sopenharmony_citpacket_error:
285762306a36Sopenharmony_ci			if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) {
285862306a36Sopenharmony_ci				__packet_set_status(po, ph,
285962306a36Sopenharmony_ci						TP_STATUS_AVAILABLE);
286062306a36Sopenharmony_ci				packet_increment_head(&po->tx_ring);
286162306a36Sopenharmony_ci				kfree_skb(skb);
286262306a36Sopenharmony_ci				continue;
286362306a36Sopenharmony_ci			} else {
286462306a36Sopenharmony_ci				status = TP_STATUS_WRONG_FORMAT;
286562306a36Sopenharmony_ci				err = tp_len;
286662306a36Sopenharmony_ci				goto out_status;
286762306a36Sopenharmony_ci			}
286862306a36Sopenharmony_ci		}
286962306a36Sopenharmony_ci
287062306a36Sopenharmony_ci		if (vnet_hdr_sz) {
287162306a36Sopenharmony_ci			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
287262306a36Sopenharmony_ci				tp_len = -EINVAL;
287362306a36Sopenharmony_ci				goto tpacket_error;
287462306a36Sopenharmony_ci			}
287562306a36Sopenharmony_ci			virtio_net_hdr_set_proto(skb, vnet_hdr);
287662306a36Sopenharmony_ci		}
287762306a36Sopenharmony_ci
287862306a36Sopenharmony_ci		skb->destructor = tpacket_destruct_skb;
287962306a36Sopenharmony_ci		__packet_set_status(po, ph, TP_STATUS_SENDING);
288062306a36Sopenharmony_ci		packet_inc_pending(&po->tx_ring);
288162306a36Sopenharmony_ci
288262306a36Sopenharmony_ci		status = TP_STATUS_SEND_REQUEST;
288362306a36Sopenharmony_ci		err = packet_xmit(po, skb);
288462306a36Sopenharmony_ci		if (unlikely(err != 0)) {
288562306a36Sopenharmony_ci			if (err > 0)
288662306a36Sopenharmony_ci				err = net_xmit_errno(err);
288762306a36Sopenharmony_ci			if (err && __packet_get_status(po, ph) ==
288862306a36Sopenharmony_ci				   TP_STATUS_AVAILABLE) {
288962306a36Sopenharmony_ci				/* skb was destructed already */
289062306a36Sopenharmony_ci				skb = NULL;
289162306a36Sopenharmony_ci				goto out_status;
289262306a36Sopenharmony_ci			}
289362306a36Sopenharmony_ci			/*
289462306a36Sopenharmony_ci			 * skb was dropped but not destructed yet;
289562306a36Sopenharmony_ci			 * let's treat it like congestion or err < 0
289662306a36Sopenharmony_ci			 */
289762306a36Sopenharmony_ci			err = 0;
289862306a36Sopenharmony_ci		}
289962306a36Sopenharmony_ci		packet_increment_head(&po->tx_ring);
290062306a36Sopenharmony_ci		len_sum += tp_len;
290162306a36Sopenharmony_ci	} while (likely((ph != NULL) ||
290262306a36Sopenharmony_ci		/* Note: packet_read_pending() might be slow if we have
290362306a36Sopenharmony_ci		 * to call it as it's per_cpu variable, but in fast-path
290462306a36Sopenharmony_ci		 * we already short-circuit the loop with the first
290562306a36Sopenharmony_ci		 * condition, and luckily don't have to go that path
290662306a36Sopenharmony_ci		 * anyway.
290762306a36Sopenharmony_ci		 */
290862306a36Sopenharmony_ci		 (need_wait && packet_read_pending(&po->tx_ring))));
290962306a36Sopenharmony_ci
291062306a36Sopenharmony_ci	err = len_sum;
291162306a36Sopenharmony_ci	goto out_put;
291262306a36Sopenharmony_ci
291362306a36Sopenharmony_ciout_status:
291462306a36Sopenharmony_ci	__packet_set_status(po, ph, status);
291562306a36Sopenharmony_ci	kfree_skb(skb);
291662306a36Sopenharmony_ciout_put:
291762306a36Sopenharmony_ci	dev_put(dev);
291862306a36Sopenharmony_ciout:
291962306a36Sopenharmony_ci	mutex_unlock(&po->pg_vec_lock);
292062306a36Sopenharmony_ci	return err;
292162306a36Sopenharmony_ci}
292262306a36Sopenharmony_ci
292362306a36Sopenharmony_cistatic struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
292462306a36Sopenharmony_ci				        size_t reserve, size_t len,
292562306a36Sopenharmony_ci				        size_t linear, int noblock,
292662306a36Sopenharmony_ci				        int *err)
292762306a36Sopenharmony_ci{
292862306a36Sopenharmony_ci	struct sk_buff *skb;
292962306a36Sopenharmony_ci
293062306a36Sopenharmony_ci	/* Under a page?  Don't bother with paged skb. */
293162306a36Sopenharmony_ci	if (prepad + len < PAGE_SIZE || !linear)
293262306a36Sopenharmony_ci		linear = len;
293362306a36Sopenharmony_ci
293462306a36Sopenharmony_ci	if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER))
293562306a36Sopenharmony_ci		linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER);
293662306a36Sopenharmony_ci	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
293762306a36Sopenharmony_ci				   err, PAGE_ALLOC_COSTLY_ORDER);
293862306a36Sopenharmony_ci	if (!skb)
293962306a36Sopenharmony_ci		return NULL;
294062306a36Sopenharmony_ci
294162306a36Sopenharmony_ci	skb_reserve(skb, reserve);
294262306a36Sopenharmony_ci	skb_put(skb, linear);
294362306a36Sopenharmony_ci	skb->data_len = len - linear;
294462306a36Sopenharmony_ci	skb->len += len - linear;
294562306a36Sopenharmony_ci
294662306a36Sopenharmony_ci	return skb;
294762306a36Sopenharmony_ci}
294862306a36Sopenharmony_ci
294962306a36Sopenharmony_cistatic int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
295062306a36Sopenharmony_ci{
295162306a36Sopenharmony_ci	struct sock *sk = sock->sk;
295262306a36Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
295362306a36Sopenharmony_ci	struct sk_buff *skb;
295462306a36Sopenharmony_ci	struct net_device *dev;
295562306a36Sopenharmony_ci	__be16 proto;
295662306a36Sopenharmony_ci	unsigned char *addr = NULL;
295762306a36Sopenharmony_ci	int err, reserve = 0;
295862306a36Sopenharmony_ci	struct sockcm_cookie sockc;
295962306a36Sopenharmony_ci	struct virtio_net_hdr vnet_hdr = { 0 };
296062306a36Sopenharmony_ci	int offset = 0;
296162306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
296262306a36Sopenharmony_ci	int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz);
296362306a36Sopenharmony_ci	int hlen, tlen, linear;
296462306a36Sopenharmony_ci	int extra_len = 0;
296562306a36Sopenharmony_ci
296662306a36Sopenharmony_ci	/*
296762306a36Sopenharmony_ci	 *	Get and verify the address.
296862306a36Sopenharmony_ci	 */
296962306a36Sopenharmony_ci
297062306a36Sopenharmony_ci	if (likely(saddr == NULL)) {
297162306a36Sopenharmony_ci		dev	= packet_cached_dev_get(po);
297262306a36Sopenharmony_ci		proto	= READ_ONCE(po->num);
297362306a36Sopenharmony_ci	} else {
297462306a36Sopenharmony_ci		err = -EINVAL;
297562306a36Sopenharmony_ci		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
297662306a36Sopenharmony_ci			goto out;
297762306a36Sopenharmony_ci		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
297862306a36Sopenharmony_ci			goto out;
297962306a36Sopenharmony_ci		proto	= saddr->sll_protocol;
298062306a36Sopenharmony_ci		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
298162306a36Sopenharmony_ci		if (sock->type == SOCK_DGRAM) {
298262306a36Sopenharmony_ci			if (dev && msg->msg_namelen < dev->addr_len +
298362306a36Sopenharmony_ci				   offsetof(struct sockaddr_ll, sll_addr))
298462306a36Sopenharmony_ci				goto out_unlock;
298562306a36Sopenharmony_ci			addr = saddr->sll_addr;
298662306a36Sopenharmony_ci		}
298762306a36Sopenharmony_ci	}
298862306a36Sopenharmony_ci
298962306a36Sopenharmony_ci	err = -ENXIO;
299062306a36Sopenharmony_ci	if (unlikely(dev == NULL))
299162306a36Sopenharmony_ci		goto out_unlock;
299262306a36Sopenharmony_ci	err = -ENETDOWN;
299362306a36Sopenharmony_ci	if (unlikely(!(dev->flags & IFF_UP)))
299462306a36Sopenharmony_ci		goto out_unlock;
299562306a36Sopenharmony_ci
299662306a36Sopenharmony_ci	sockcm_init(&sockc, sk);
299762306a36Sopenharmony_ci	sockc.mark = READ_ONCE(sk->sk_mark);
299862306a36Sopenharmony_ci	if (msg->msg_controllen) {
299962306a36Sopenharmony_ci		err = sock_cmsg_send(sk, msg, &sockc);
300062306a36Sopenharmony_ci		if (unlikely(err))
300162306a36Sopenharmony_ci			goto out_unlock;
300262306a36Sopenharmony_ci	}
300362306a36Sopenharmony_ci
300462306a36Sopenharmony_ci	if (sock->type == SOCK_RAW)
300562306a36Sopenharmony_ci		reserve = dev->hard_header_len;
300662306a36Sopenharmony_ci	if (vnet_hdr_sz) {
300762306a36Sopenharmony_ci		err = packet_snd_vnet_parse(msg, &len, &vnet_hdr, vnet_hdr_sz);
300862306a36Sopenharmony_ci		if (err)
300962306a36Sopenharmony_ci			goto out_unlock;
301062306a36Sopenharmony_ci	}
301162306a36Sopenharmony_ci
301262306a36Sopenharmony_ci	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
301362306a36Sopenharmony_ci		if (!netif_supports_nofcs(dev)) {
301462306a36Sopenharmony_ci			err = -EPROTONOSUPPORT;
301562306a36Sopenharmony_ci			goto out_unlock;
301662306a36Sopenharmony_ci		}
301762306a36Sopenharmony_ci		extra_len = 4; /* We're doing our own CRC */
301862306a36Sopenharmony_ci	}
301962306a36Sopenharmony_ci
302062306a36Sopenharmony_ci	err = -EMSGSIZE;
302162306a36Sopenharmony_ci	if (!vnet_hdr.gso_type &&
302262306a36Sopenharmony_ci	    (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
302362306a36Sopenharmony_ci		goto out_unlock;
302462306a36Sopenharmony_ci
302562306a36Sopenharmony_ci	err = -ENOBUFS;
302662306a36Sopenharmony_ci	hlen = LL_RESERVED_SPACE(dev);
302762306a36Sopenharmony_ci	tlen = dev->needed_tailroom;
302862306a36Sopenharmony_ci	linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
302962306a36Sopenharmony_ci	linear = max(linear, min_t(int, len, dev->hard_header_len));
303062306a36Sopenharmony_ci	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
303162306a36Sopenharmony_ci			       msg->msg_flags & MSG_DONTWAIT, &err);
303262306a36Sopenharmony_ci	if (skb == NULL)
303362306a36Sopenharmony_ci		goto out_unlock;
303462306a36Sopenharmony_ci
303562306a36Sopenharmony_ci	skb_reset_network_header(skb);
303662306a36Sopenharmony_ci
303762306a36Sopenharmony_ci	err = -EINVAL;
303862306a36Sopenharmony_ci	if (sock->type == SOCK_DGRAM) {
303962306a36Sopenharmony_ci		offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
304062306a36Sopenharmony_ci		if (unlikely(offset < 0))
304162306a36Sopenharmony_ci			goto out_free;
304262306a36Sopenharmony_ci	} else if (reserve) {
304362306a36Sopenharmony_ci		skb_reserve(skb, -reserve);
304462306a36Sopenharmony_ci		if (len < reserve + sizeof(struct ipv6hdr) &&
304562306a36Sopenharmony_ci		    dev->min_header_len != dev->hard_header_len)
304662306a36Sopenharmony_ci			skb_reset_network_header(skb);
304762306a36Sopenharmony_ci	}
304862306a36Sopenharmony_ci
304962306a36Sopenharmony_ci	/* Returns -EFAULT on error */
305062306a36Sopenharmony_ci	err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
305162306a36Sopenharmony_ci	if (err)
305262306a36Sopenharmony_ci		goto out_free;
305362306a36Sopenharmony_ci
305462306a36Sopenharmony_ci	if ((sock->type == SOCK_RAW &&
305562306a36Sopenharmony_ci	     !dev_validate_header(dev, skb->data, len)) || !skb->len) {
305662306a36Sopenharmony_ci		err = -EINVAL;
305762306a36Sopenharmony_ci		goto out_free;
305862306a36Sopenharmony_ci	}
305962306a36Sopenharmony_ci
306062306a36Sopenharmony_ci	skb_setup_tx_timestamp(skb, sockc.tsflags);
306162306a36Sopenharmony_ci
306262306a36Sopenharmony_ci	if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
306362306a36Sopenharmony_ci	    !packet_extra_vlan_len_allowed(dev, skb)) {
306462306a36Sopenharmony_ci		err = -EMSGSIZE;
306562306a36Sopenharmony_ci		goto out_free;
306662306a36Sopenharmony_ci	}
306762306a36Sopenharmony_ci
306862306a36Sopenharmony_ci	skb->protocol = proto;
306962306a36Sopenharmony_ci	skb->dev = dev;
307062306a36Sopenharmony_ci	skb->priority = READ_ONCE(sk->sk_priority);
307162306a36Sopenharmony_ci	skb->mark = sockc.mark;
307262306a36Sopenharmony_ci	skb->tstamp = sockc.transmit_time;
307362306a36Sopenharmony_ci
307462306a36Sopenharmony_ci	if (unlikely(extra_len == 4))
307562306a36Sopenharmony_ci		skb->no_fcs = 1;
307662306a36Sopenharmony_ci
307762306a36Sopenharmony_ci	packet_parse_headers(skb, sock);
307862306a36Sopenharmony_ci
307962306a36Sopenharmony_ci	if (vnet_hdr_sz) {
308062306a36Sopenharmony_ci		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
308162306a36Sopenharmony_ci		if (err)
308262306a36Sopenharmony_ci			goto out_free;
308362306a36Sopenharmony_ci		len += vnet_hdr_sz;
308462306a36Sopenharmony_ci		virtio_net_hdr_set_proto(skb, &vnet_hdr);
308562306a36Sopenharmony_ci	}
308662306a36Sopenharmony_ci
308762306a36Sopenharmony_ci	err = packet_xmit(po, skb);
308862306a36Sopenharmony_ci
308962306a36Sopenharmony_ci	if (unlikely(err != 0)) {
309062306a36Sopenharmony_ci		if (err > 0)
309162306a36Sopenharmony_ci			err = net_xmit_errno(err);
309262306a36Sopenharmony_ci		if (err)
309362306a36Sopenharmony_ci			goto out_unlock;
309462306a36Sopenharmony_ci	}
309562306a36Sopenharmony_ci
309662306a36Sopenharmony_ci	dev_put(dev);
309762306a36Sopenharmony_ci
309862306a36Sopenharmony_ci	return len;
309962306a36Sopenharmony_ci
310062306a36Sopenharmony_ciout_free:
310162306a36Sopenharmony_ci	kfree_skb(skb);
310262306a36Sopenharmony_ciout_unlock:
310362306a36Sopenharmony_ci	dev_put(dev);
310462306a36Sopenharmony_ciout:
310562306a36Sopenharmony_ci	return err;
310662306a36Sopenharmony_ci}
310762306a36Sopenharmony_ci
310862306a36Sopenharmony_cistatic int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
310962306a36Sopenharmony_ci{
311062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
311162306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
311262306a36Sopenharmony_ci
311362306a36Sopenharmony_ci	/* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
311462306a36Sopenharmony_ci	 * tpacket_snd() will redo the check safely.
311562306a36Sopenharmony_ci	 */
311662306a36Sopenharmony_ci	if (data_race(po->tx_ring.pg_vec))
311762306a36Sopenharmony_ci		return tpacket_snd(po, msg);
311862306a36Sopenharmony_ci
311962306a36Sopenharmony_ci	return packet_snd(sock, msg, len);
312062306a36Sopenharmony_ci}
312162306a36Sopenharmony_ci
312262306a36Sopenharmony_ci/*
312362306a36Sopenharmony_ci *	Close a PACKET socket. This is fairly simple. We immediately go
312462306a36Sopenharmony_ci *	to 'closed' state and remove our protocol entry in the device list.
312562306a36Sopenharmony_ci */
312662306a36Sopenharmony_ci
312762306a36Sopenharmony_cistatic int packet_release(struct socket *sock)
312862306a36Sopenharmony_ci{
312962306a36Sopenharmony_ci	struct sock *sk = sock->sk;
313062306a36Sopenharmony_ci	struct packet_sock *po;
313162306a36Sopenharmony_ci	struct packet_fanout *f;
313262306a36Sopenharmony_ci	struct net *net;
313362306a36Sopenharmony_ci	union tpacket_req_u req_u;
313462306a36Sopenharmony_ci
313562306a36Sopenharmony_ci	if (!sk)
313662306a36Sopenharmony_ci		return 0;
313762306a36Sopenharmony_ci
313862306a36Sopenharmony_ci	net = sock_net(sk);
313962306a36Sopenharmony_ci	po = pkt_sk(sk);
314062306a36Sopenharmony_ci
314162306a36Sopenharmony_ci	mutex_lock(&net->packet.sklist_lock);
314262306a36Sopenharmony_ci	sk_del_node_init_rcu(sk);
314362306a36Sopenharmony_ci	mutex_unlock(&net->packet.sklist_lock);
314462306a36Sopenharmony_ci
314562306a36Sopenharmony_ci	sock_prot_inuse_add(net, sk->sk_prot, -1);
314662306a36Sopenharmony_ci
314762306a36Sopenharmony_ci	spin_lock(&po->bind_lock);
314862306a36Sopenharmony_ci	unregister_prot_hook(sk, false);
314962306a36Sopenharmony_ci	packet_cached_dev_reset(po);
315062306a36Sopenharmony_ci
315162306a36Sopenharmony_ci	if (po->prot_hook.dev) {
315262306a36Sopenharmony_ci		netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
315362306a36Sopenharmony_ci		po->prot_hook.dev = NULL;
315462306a36Sopenharmony_ci	}
315562306a36Sopenharmony_ci	spin_unlock(&po->bind_lock);
315662306a36Sopenharmony_ci
315762306a36Sopenharmony_ci	packet_flush_mclist(sk);
315862306a36Sopenharmony_ci
315962306a36Sopenharmony_ci	lock_sock(sk);
316062306a36Sopenharmony_ci	if (po->rx_ring.pg_vec) {
316162306a36Sopenharmony_ci		memset(&req_u, 0, sizeof(req_u));
316262306a36Sopenharmony_ci		packet_set_ring(sk, &req_u, 1, 0);
316362306a36Sopenharmony_ci	}
316462306a36Sopenharmony_ci
316562306a36Sopenharmony_ci	if (po->tx_ring.pg_vec) {
316662306a36Sopenharmony_ci		memset(&req_u, 0, sizeof(req_u));
316762306a36Sopenharmony_ci		packet_set_ring(sk, &req_u, 1, 1);
316862306a36Sopenharmony_ci	}
316962306a36Sopenharmony_ci	release_sock(sk);
317062306a36Sopenharmony_ci
317162306a36Sopenharmony_ci	f = fanout_release(sk);
317262306a36Sopenharmony_ci
317362306a36Sopenharmony_ci	synchronize_net();
317462306a36Sopenharmony_ci
317562306a36Sopenharmony_ci	kfree(po->rollover);
317662306a36Sopenharmony_ci	if (f) {
317762306a36Sopenharmony_ci		fanout_release_data(f);
317862306a36Sopenharmony_ci		kvfree(f);
317962306a36Sopenharmony_ci	}
318062306a36Sopenharmony_ci	/*
318162306a36Sopenharmony_ci	 *	Now the socket is dead. No more input will appear.
318262306a36Sopenharmony_ci	 */
318362306a36Sopenharmony_ci	sock_orphan(sk);
318462306a36Sopenharmony_ci	sock->sk = NULL;
318562306a36Sopenharmony_ci
318662306a36Sopenharmony_ci	/* Purge queues */
318762306a36Sopenharmony_ci
318862306a36Sopenharmony_ci	skb_queue_purge(&sk->sk_receive_queue);
318962306a36Sopenharmony_ci	packet_free_pending(po);
319062306a36Sopenharmony_ci
319162306a36Sopenharmony_ci	sock_put(sk);
319262306a36Sopenharmony_ci	return 0;
319362306a36Sopenharmony_ci}
319462306a36Sopenharmony_ci
319562306a36Sopenharmony_ci/*
319662306a36Sopenharmony_ci *	Attach a packet hook.
319762306a36Sopenharmony_ci */
319862306a36Sopenharmony_ci
319962306a36Sopenharmony_cistatic int packet_do_bind(struct sock *sk, const char *name, int ifindex,
320062306a36Sopenharmony_ci			  __be16 proto)
320162306a36Sopenharmony_ci{
320262306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
320362306a36Sopenharmony_ci	struct net_device *dev = NULL;
320462306a36Sopenharmony_ci	bool unlisted = false;
320562306a36Sopenharmony_ci	bool need_rehook;
320662306a36Sopenharmony_ci	int ret = 0;
320762306a36Sopenharmony_ci
320862306a36Sopenharmony_ci	lock_sock(sk);
320962306a36Sopenharmony_ci	spin_lock(&po->bind_lock);
321062306a36Sopenharmony_ci	if (!proto)
321162306a36Sopenharmony_ci		proto = po->num;
321262306a36Sopenharmony_ci
321362306a36Sopenharmony_ci	rcu_read_lock();
321462306a36Sopenharmony_ci
321562306a36Sopenharmony_ci	if (po->fanout) {
321662306a36Sopenharmony_ci		ret = -EINVAL;
321762306a36Sopenharmony_ci		goto out_unlock;
321862306a36Sopenharmony_ci	}
321962306a36Sopenharmony_ci
322062306a36Sopenharmony_ci	if (name) {
322162306a36Sopenharmony_ci		dev = dev_get_by_name_rcu(sock_net(sk), name);
322262306a36Sopenharmony_ci		if (!dev) {
322362306a36Sopenharmony_ci			ret = -ENODEV;
322462306a36Sopenharmony_ci			goto out_unlock;
322562306a36Sopenharmony_ci		}
322662306a36Sopenharmony_ci	} else if (ifindex) {
322762306a36Sopenharmony_ci		dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
322862306a36Sopenharmony_ci		if (!dev) {
322962306a36Sopenharmony_ci			ret = -ENODEV;
323062306a36Sopenharmony_ci			goto out_unlock;
323162306a36Sopenharmony_ci		}
323262306a36Sopenharmony_ci	}
323362306a36Sopenharmony_ci
323462306a36Sopenharmony_ci	need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev;
323562306a36Sopenharmony_ci
323662306a36Sopenharmony_ci	if (need_rehook) {
323762306a36Sopenharmony_ci		dev_hold(dev);
323862306a36Sopenharmony_ci		if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
323962306a36Sopenharmony_ci			rcu_read_unlock();
324062306a36Sopenharmony_ci			/* prevents packet_notifier() from calling
324162306a36Sopenharmony_ci			 * register_prot_hook()
324262306a36Sopenharmony_ci			 */
324362306a36Sopenharmony_ci			WRITE_ONCE(po->num, 0);
324462306a36Sopenharmony_ci			__unregister_prot_hook(sk, true);
324562306a36Sopenharmony_ci			rcu_read_lock();
324662306a36Sopenharmony_ci			if (dev)
324762306a36Sopenharmony_ci				unlisted = !dev_get_by_index_rcu(sock_net(sk),
324862306a36Sopenharmony_ci								 dev->ifindex);
324962306a36Sopenharmony_ci		}
325062306a36Sopenharmony_ci
325162306a36Sopenharmony_ci		BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING));
325262306a36Sopenharmony_ci		WRITE_ONCE(po->num, proto);
325362306a36Sopenharmony_ci		po->prot_hook.type = proto;
325462306a36Sopenharmony_ci
325562306a36Sopenharmony_ci		netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker);
325662306a36Sopenharmony_ci
325762306a36Sopenharmony_ci		if (unlikely(unlisted)) {
325862306a36Sopenharmony_ci			po->prot_hook.dev = NULL;
325962306a36Sopenharmony_ci			WRITE_ONCE(po->ifindex, -1);
326062306a36Sopenharmony_ci			packet_cached_dev_reset(po);
326162306a36Sopenharmony_ci		} else {
326262306a36Sopenharmony_ci			netdev_hold(dev, &po->prot_hook.dev_tracker,
326362306a36Sopenharmony_ci				    GFP_ATOMIC);
326462306a36Sopenharmony_ci			po->prot_hook.dev = dev;
326562306a36Sopenharmony_ci			WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
326662306a36Sopenharmony_ci			packet_cached_dev_assign(po, dev);
326762306a36Sopenharmony_ci		}
326862306a36Sopenharmony_ci		dev_put(dev);
326962306a36Sopenharmony_ci	}
327062306a36Sopenharmony_ci
327162306a36Sopenharmony_ci	if (proto == 0 || !need_rehook)
327262306a36Sopenharmony_ci		goto out_unlock;
327362306a36Sopenharmony_ci
327462306a36Sopenharmony_ci	if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
327562306a36Sopenharmony_ci		register_prot_hook(sk);
327662306a36Sopenharmony_ci	} else {
327762306a36Sopenharmony_ci		sk->sk_err = ENETDOWN;
327862306a36Sopenharmony_ci		if (!sock_flag(sk, SOCK_DEAD))
327962306a36Sopenharmony_ci			sk_error_report(sk);
328062306a36Sopenharmony_ci	}
328162306a36Sopenharmony_ci
328262306a36Sopenharmony_ciout_unlock:
328362306a36Sopenharmony_ci	rcu_read_unlock();
328462306a36Sopenharmony_ci	spin_unlock(&po->bind_lock);
328562306a36Sopenharmony_ci	release_sock(sk);
328662306a36Sopenharmony_ci	return ret;
328762306a36Sopenharmony_ci}
328862306a36Sopenharmony_ci
328962306a36Sopenharmony_ci/*
329062306a36Sopenharmony_ci *	Bind a packet socket to a device
329162306a36Sopenharmony_ci */
329262306a36Sopenharmony_ci
329362306a36Sopenharmony_cistatic int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
329462306a36Sopenharmony_ci			    int addr_len)
329562306a36Sopenharmony_ci{
329662306a36Sopenharmony_ci	struct sock *sk = sock->sk;
329762306a36Sopenharmony_ci	char name[sizeof(uaddr->sa_data_min) + 1];
329862306a36Sopenharmony_ci
329962306a36Sopenharmony_ci	/*
330062306a36Sopenharmony_ci	 *	Check legality
330162306a36Sopenharmony_ci	 */
330262306a36Sopenharmony_ci
330362306a36Sopenharmony_ci	if (addr_len != sizeof(struct sockaddr))
330462306a36Sopenharmony_ci		return -EINVAL;
330562306a36Sopenharmony_ci	/* uaddr->sa_data comes from the userspace, it's not guaranteed to be
330662306a36Sopenharmony_ci	 * zero-terminated.
330762306a36Sopenharmony_ci	 */
330862306a36Sopenharmony_ci	memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min));
330962306a36Sopenharmony_ci	name[sizeof(uaddr->sa_data_min)] = 0;
331062306a36Sopenharmony_ci
331162306a36Sopenharmony_ci	return packet_do_bind(sk, name, 0, 0);
331262306a36Sopenharmony_ci}
331362306a36Sopenharmony_ci
331462306a36Sopenharmony_cistatic int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
331562306a36Sopenharmony_ci{
331662306a36Sopenharmony_ci	struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
331762306a36Sopenharmony_ci	struct sock *sk = sock->sk;
331862306a36Sopenharmony_ci
331962306a36Sopenharmony_ci	/*
332062306a36Sopenharmony_ci	 *	Check legality
332162306a36Sopenharmony_ci	 */
332262306a36Sopenharmony_ci
332362306a36Sopenharmony_ci	if (addr_len < sizeof(struct sockaddr_ll))
332462306a36Sopenharmony_ci		return -EINVAL;
332562306a36Sopenharmony_ci	if (sll->sll_family != AF_PACKET)
332662306a36Sopenharmony_ci		return -EINVAL;
332762306a36Sopenharmony_ci
332862306a36Sopenharmony_ci	return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol);
332962306a36Sopenharmony_ci}
333062306a36Sopenharmony_ci
333162306a36Sopenharmony_cistatic struct proto packet_proto = {
333262306a36Sopenharmony_ci	.name	  = "PACKET",
333362306a36Sopenharmony_ci	.owner	  = THIS_MODULE,
333462306a36Sopenharmony_ci	.obj_size = sizeof(struct packet_sock),
333562306a36Sopenharmony_ci};
333662306a36Sopenharmony_ci
333762306a36Sopenharmony_ci/*
333862306a36Sopenharmony_ci *	Create a packet of type SOCK_PACKET.
333962306a36Sopenharmony_ci */
334062306a36Sopenharmony_ci
334162306a36Sopenharmony_cistatic int packet_create(struct net *net, struct socket *sock, int protocol,
334262306a36Sopenharmony_ci			 int kern)
334362306a36Sopenharmony_ci{
334462306a36Sopenharmony_ci	struct sock *sk;
334562306a36Sopenharmony_ci	struct packet_sock *po;
334662306a36Sopenharmony_ci	__be16 proto = (__force __be16)protocol; /* weird, but documented */
334762306a36Sopenharmony_ci	int err;
334862306a36Sopenharmony_ci
334962306a36Sopenharmony_ci	if (!ns_capable(net->user_ns, CAP_NET_RAW))
335062306a36Sopenharmony_ci		return -EPERM;
335162306a36Sopenharmony_ci	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
335262306a36Sopenharmony_ci	    sock->type != SOCK_PACKET)
335362306a36Sopenharmony_ci		return -ESOCKTNOSUPPORT;
335462306a36Sopenharmony_ci
335562306a36Sopenharmony_ci	sock->state = SS_UNCONNECTED;
335662306a36Sopenharmony_ci
335762306a36Sopenharmony_ci	err = -ENOBUFS;
335862306a36Sopenharmony_ci	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
335962306a36Sopenharmony_ci	if (sk == NULL)
336062306a36Sopenharmony_ci		goto out;
336162306a36Sopenharmony_ci
336262306a36Sopenharmony_ci	sock->ops = &packet_ops;
336362306a36Sopenharmony_ci	if (sock->type == SOCK_PACKET)
336462306a36Sopenharmony_ci		sock->ops = &packet_ops_spkt;
336562306a36Sopenharmony_ci
336662306a36Sopenharmony_ci	sock_init_data(sock, sk);
336762306a36Sopenharmony_ci
336862306a36Sopenharmony_ci	po = pkt_sk(sk);
336962306a36Sopenharmony_ci	init_completion(&po->skb_completion);
337062306a36Sopenharmony_ci	sk->sk_family = PF_PACKET;
337162306a36Sopenharmony_ci	po->num = proto;
337262306a36Sopenharmony_ci
337362306a36Sopenharmony_ci	err = packet_alloc_pending(po);
337462306a36Sopenharmony_ci	if (err)
337562306a36Sopenharmony_ci		goto out2;
337662306a36Sopenharmony_ci
337762306a36Sopenharmony_ci	packet_cached_dev_reset(po);
337862306a36Sopenharmony_ci
337962306a36Sopenharmony_ci	sk->sk_destruct = packet_sock_destruct;
338062306a36Sopenharmony_ci
338162306a36Sopenharmony_ci	/*
338262306a36Sopenharmony_ci	 *	Attach a protocol block
338362306a36Sopenharmony_ci	 */
338462306a36Sopenharmony_ci
338562306a36Sopenharmony_ci	spin_lock_init(&po->bind_lock);
338662306a36Sopenharmony_ci	mutex_init(&po->pg_vec_lock);
338762306a36Sopenharmony_ci	po->rollover = NULL;
338862306a36Sopenharmony_ci	po->prot_hook.func = packet_rcv;
338962306a36Sopenharmony_ci
339062306a36Sopenharmony_ci	if (sock->type == SOCK_PACKET)
339162306a36Sopenharmony_ci		po->prot_hook.func = packet_rcv_spkt;
339262306a36Sopenharmony_ci
339362306a36Sopenharmony_ci	po->prot_hook.af_packet_priv = sk;
339462306a36Sopenharmony_ci	po->prot_hook.af_packet_net = sock_net(sk);
339562306a36Sopenharmony_ci
339662306a36Sopenharmony_ci	if (proto) {
339762306a36Sopenharmony_ci		po->prot_hook.type = proto;
339862306a36Sopenharmony_ci		__register_prot_hook(sk);
339962306a36Sopenharmony_ci	}
340062306a36Sopenharmony_ci
340162306a36Sopenharmony_ci	mutex_lock(&net->packet.sklist_lock);
340262306a36Sopenharmony_ci	sk_add_node_tail_rcu(sk, &net->packet.sklist);
340362306a36Sopenharmony_ci	mutex_unlock(&net->packet.sklist_lock);
340462306a36Sopenharmony_ci
340562306a36Sopenharmony_ci	sock_prot_inuse_add(net, &packet_proto, 1);
340662306a36Sopenharmony_ci
340762306a36Sopenharmony_ci	return 0;
340862306a36Sopenharmony_ciout2:
340962306a36Sopenharmony_ci	sk_free(sk);
341062306a36Sopenharmony_ciout:
341162306a36Sopenharmony_ci	return err;
341262306a36Sopenharmony_ci}
341362306a36Sopenharmony_ci
341462306a36Sopenharmony_ci/*
341562306a36Sopenharmony_ci *	Pull a packet from our receive queue and hand it to the user.
341662306a36Sopenharmony_ci *	If necessary we block.
341762306a36Sopenharmony_ci */
341862306a36Sopenharmony_ci
341962306a36Sopenharmony_cistatic int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
342062306a36Sopenharmony_ci			  int flags)
342162306a36Sopenharmony_ci{
342262306a36Sopenharmony_ci	struct sock *sk = sock->sk;
342362306a36Sopenharmony_ci	struct sk_buff *skb;
342462306a36Sopenharmony_ci	int copied, err;
342562306a36Sopenharmony_ci	int vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz);
342662306a36Sopenharmony_ci	unsigned int origlen = 0;
342762306a36Sopenharmony_ci
342862306a36Sopenharmony_ci	err = -EINVAL;
342962306a36Sopenharmony_ci	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
343062306a36Sopenharmony_ci		goto out;
343162306a36Sopenharmony_ci
343262306a36Sopenharmony_ci#if 0
343362306a36Sopenharmony_ci	/* What error should we return now? EUNATTACH? */
343462306a36Sopenharmony_ci	if (pkt_sk(sk)->ifindex < 0)
343562306a36Sopenharmony_ci		return -ENODEV;
343662306a36Sopenharmony_ci#endif
343762306a36Sopenharmony_ci
343862306a36Sopenharmony_ci	if (flags & MSG_ERRQUEUE) {
343962306a36Sopenharmony_ci		err = sock_recv_errqueue(sk, msg, len,
344062306a36Sopenharmony_ci					 SOL_PACKET, PACKET_TX_TIMESTAMP);
344162306a36Sopenharmony_ci		goto out;
344262306a36Sopenharmony_ci	}
344362306a36Sopenharmony_ci
344462306a36Sopenharmony_ci	/*
344562306a36Sopenharmony_ci	 *	Call the generic datagram receiver. This handles all sorts
344662306a36Sopenharmony_ci	 *	of horrible races and re-entrancy so we can forget about it
344762306a36Sopenharmony_ci	 *	in the protocol layers.
344862306a36Sopenharmony_ci	 *
344962306a36Sopenharmony_ci	 *	Now it will return ENETDOWN, if device have just gone down,
345062306a36Sopenharmony_ci	 *	but then it will block.
345162306a36Sopenharmony_ci	 */
345262306a36Sopenharmony_ci
345362306a36Sopenharmony_ci	skb = skb_recv_datagram(sk, flags, &err);
345462306a36Sopenharmony_ci
345562306a36Sopenharmony_ci	/*
345662306a36Sopenharmony_ci	 *	An error occurred so return it. Because skb_recv_datagram()
345762306a36Sopenharmony_ci	 *	handles the blocking we don't see and worry about blocking
345862306a36Sopenharmony_ci	 *	retries.
345962306a36Sopenharmony_ci	 */
346062306a36Sopenharmony_ci
346162306a36Sopenharmony_ci	if (skb == NULL)
346262306a36Sopenharmony_ci		goto out;
346362306a36Sopenharmony_ci
346462306a36Sopenharmony_ci	packet_rcv_try_clear_pressure(pkt_sk(sk));
346562306a36Sopenharmony_ci
346662306a36Sopenharmony_ci	if (vnet_hdr_len) {
346762306a36Sopenharmony_ci		err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len);
346862306a36Sopenharmony_ci		if (err)
346962306a36Sopenharmony_ci			goto out_free;
347062306a36Sopenharmony_ci	}
347162306a36Sopenharmony_ci
347262306a36Sopenharmony_ci	/* You lose any data beyond the buffer you gave. If it worries
347362306a36Sopenharmony_ci	 * a user program they can ask the device for its MTU
347462306a36Sopenharmony_ci	 * anyway.
347562306a36Sopenharmony_ci	 */
347662306a36Sopenharmony_ci	copied = skb->len;
347762306a36Sopenharmony_ci	if (copied > len) {
347862306a36Sopenharmony_ci		copied = len;
347962306a36Sopenharmony_ci		msg->msg_flags |= MSG_TRUNC;
348062306a36Sopenharmony_ci	}
348162306a36Sopenharmony_ci
348262306a36Sopenharmony_ci	err = skb_copy_datagram_msg(skb, 0, msg, copied);
348362306a36Sopenharmony_ci	if (err)
348462306a36Sopenharmony_ci		goto out_free;
348562306a36Sopenharmony_ci
348662306a36Sopenharmony_ci	if (sock->type != SOCK_PACKET) {
348762306a36Sopenharmony_ci		struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
348862306a36Sopenharmony_ci
348962306a36Sopenharmony_ci		/* Original length was stored in sockaddr_ll fields */
349062306a36Sopenharmony_ci		origlen = PACKET_SKB_CB(skb)->sa.origlen;
349162306a36Sopenharmony_ci		sll->sll_family = AF_PACKET;
349262306a36Sopenharmony_ci		sll->sll_protocol = skb->protocol;
349362306a36Sopenharmony_ci	}
349462306a36Sopenharmony_ci
349562306a36Sopenharmony_ci	sock_recv_cmsgs(msg, sk, skb);
349662306a36Sopenharmony_ci
349762306a36Sopenharmony_ci	if (msg->msg_name) {
349862306a36Sopenharmony_ci		const size_t max_len = min(sizeof(skb->cb),
349962306a36Sopenharmony_ci					   sizeof(struct sockaddr_storage));
350062306a36Sopenharmony_ci		int copy_len;
350162306a36Sopenharmony_ci
350262306a36Sopenharmony_ci		/* If the address length field is there to be filled
350362306a36Sopenharmony_ci		 * in, we fill it in now.
350462306a36Sopenharmony_ci		 */
350562306a36Sopenharmony_ci		if (sock->type == SOCK_PACKET) {
350662306a36Sopenharmony_ci			__sockaddr_check_size(sizeof(struct sockaddr_pkt));
350762306a36Sopenharmony_ci			msg->msg_namelen = sizeof(struct sockaddr_pkt);
350862306a36Sopenharmony_ci			copy_len = msg->msg_namelen;
350962306a36Sopenharmony_ci		} else {
351062306a36Sopenharmony_ci			struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
351162306a36Sopenharmony_ci
351262306a36Sopenharmony_ci			msg->msg_namelen = sll->sll_halen +
351362306a36Sopenharmony_ci				offsetof(struct sockaddr_ll, sll_addr);
351462306a36Sopenharmony_ci			copy_len = msg->msg_namelen;
351562306a36Sopenharmony_ci			if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
351662306a36Sopenharmony_ci				memset(msg->msg_name +
351762306a36Sopenharmony_ci				       offsetof(struct sockaddr_ll, sll_addr),
351862306a36Sopenharmony_ci				       0, sizeof(sll->sll_addr));
351962306a36Sopenharmony_ci				msg->msg_namelen = sizeof(struct sockaddr_ll);
352062306a36Sopenharmony_ci			}
352162306a36Sopenharmony_ci		}
352262306a36Sopenharmony_ci		if (WARN_ON_ONCE(copy_len > max_len)) {
352362306a36Sopenharmony_ci			copy_len = max_len;
352462306a36Sopenharmony_ci			msg->msg_namelen = copy_len;
352562306a36Sopenharmony_ci		}
352662306a36Sopenharmony_ci		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
352762306a36Sopenharmony_ci	}
352862306a36Sopenharmony_ci
352962306a36Sopenharmony_ci	if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
353062306a36Sopenharmony_ci		struct tpacket_auxdata aux;
353162306a36Sopenharmony_ci
353262306a36Sopenharmony_ci		aux.tp_status = TP_STATUS_USER;
353362306a36Sopenharmony_ci		if (skb->ip_summed == CHECKSUM_PARTIAL)
353462306a36Sopenharmony_ci			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
353562306a36Sopenharmony_ci		else if (skb->pkt_type != PACKET_OUTGOING &&
353662306a36Sopenharmony_ci			 skb_csum_unnecessary(skb))
353762306a36Sopenharmony_ci			aux.tp_status |= TP_STATUS_CSUM_VALID;
353862306a36Sopenharmony_ci		if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
353962306a36Sopenharmony_ci			aux.tp_status |= TP_STATUS_GSO_TCP;
354062306a36Sopenharmony_ci
354162306a36Sopenharmony_ci		aux.tp_len = origlen;
354262306a36Sopenharmony_ci		aux.tp_snaplen = skb->len;
354362306a36Sopenharmony_ci		aux.tp_mac = 0;
354462306a36Sopenharmony_ci		aux.tp_net = skb_network_offset(skb);
354562306a36Sopenharmony_ci		if (skb_vlan_tag_present(skb)) {
354662306a36Sopenharmony_ci			aux.tp_vlan_tci = skb_vlan_tag_get(skb);
354762306a36Sopenharmony_ci			aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
354862306a36Sopenharmony_ci			aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
354962306a36Sopenharmony_ci		} else {
355062306a36Sopenharmony_ci			aux.tp_vlan_tci = 0;
355162306a36Sopenharmony_ci			aux.tp_vlan_tpid = 0;
355262306a36Sopenharmony_ci		}
355362306a36Sopenharmony_ci		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
355462306a36Sopenharmony_ci	}
355562306a36Sopenharmony_ci
355662306a36Sopenharmony_ci	/*
355762306a36Sopenharmony_ci	 *	Free or return the buffer as appropriate. Again this
355862306a36Sopenharmony_ci	 *	hides all the races and re-entrancy issues from us.
355962306a36Sopenharmony_ci	 */
356062306a36Sopenharmony_ci	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
356162306a36Sopenharmony_ci
356262306a36Sopenharmony_ciout_free:
356362306a36Sopenharmony_ci	skb_free_datagram(sk, skb);
356462306a36Sopenharmony_ciout:
356562306a36Sopenharmony_ci	return err;
356662306a36Sopenharmony_ci}
356762306a36Sopenharmony_ci
356862306a36Sopenharmony_cistatic int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
356962306a36Sopenharmony_ci			       int peer)
357062306a36Sopenharmony_ci{
357162306a36Sopenharmony_ci	struct net_device *dev;
357262306a36Sopenharmony_ci	struct sock *sk	= sock->sk;
357362306a36Sopenharmony_ci
357462306a36Sopenharmony_ci	if (peer)
357562306a36Sopenharmony_ci		return -EOPNOTSUPP;
357662306a36Sopenharmony_ci
357762306a36Sopenharmony_ci	uaddr->sa_family = AF_PACKET;
357862306a36Sopenharmony_ci	memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min));
357962306a36Sopenharmony_ci	rcu_read_lock();
358062306a36Sopenharmony_ci	dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
358162306a36Sopenharmony_ci	if (dev)
358262306a36Sopenharmony_ci		strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min));
358362306a36Sopenharmony_ci	rcu_read_unlock();
358462306a36Sopenharmony_ci
358562306a36Sopenharmony_ci	return sizeof(*uaddr);
358662306a36Sopenharmony_ci}
358762306a36Sopenharmony_ci
358862306a36Sopenharmony_cistatic int packet_getname(struct socket *sock, struct sockaddr *uaddr,
358962306a36Sopenharmony_ci			  int peer)
359062306a36Sopenharmony_ci{
359162306a36Sopenharmony_ci	struct net_device *dev;
359262306a36Sopenharmony_ci	struct sock *sk = sock->sk;
359362306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
359462306a36Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
359562306a36Sopenharmony_ci	int ifindex;
359662306a36Sopenharmony_ci
359762306a36Sopenharmony_ci	if (peer)
359862306a36Sopenharmony_ci		return -EOPNOTSUPP;
359962306a36Sopenharmony_ci
360062306a36Sopenharmony_ci	ifindex = READ_ONCE(po->ifindex);
360162306a36Sopenharmony_ci	sll->sll_family = AF_PACKET;
360262306a36Sopenharmony_ci	sll->sll_ifindex = ifindex;
360362306a36Sopenharmony_ci	sll->sll_protocol = READ_ONCE(po->num);
360462306a36Sopenharmony_ci	sll->sll_pkttype = 0;
360562306a36Sopenharmony_ci	rcu_read_lock();
360662306a36Sopenharmony_ci	dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
360762306a36Sopenharmony_ci	if (dev) {
360862306a36Sopenharmony_ci		sll->sll_hatype = dev->type;
360962306a36Sopenharmony_ci		sll->sll_halen = dev->addr_len;
361062306a36Sopenharmony_ci
361162306a36Sopenharmony_ci		/* Let __fortify_memcpy_chk() know the actual buffer size. */
361262306a36Sopenharmony_ci		memcpy(((struct sockaddr_storage *)sll)->__data +
361362306a36Sopenharmony_ci		       offsetof(struct sockaddr_ll, sll_addr) -
361462306a36Sopenharmony_ci		       offsetofend(struct sockaddr_ll, sll_family),
361562306a36Sopenharmony_ci		       dev->dev_addr, dev->addr_len);
361662306a36Sopenharmony_ci	} else {
361762306a36Sopenharmony_ci		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
361862306a36Sopenharmony_ci		sll->sll_halen = 0;
361962306a36Sopenharmony_ci	}
362062306a36Sopenharmony_ci	rcu_read_unlock();
362162306a36Sopenharmony_ci
362262306a36Sopenharmony_ci	return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
362362306a36Sopenharmony_ci}
362462306a36Sopenharmony_ci
362562306a36Sopenharmony_cistatic int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
362662306a36Sopenharmony_ci			 int what)
362762306a36Sopenharmony_ci{
362862306a36Sopenharmony_ci	switch (i->type) {
362962306a36Sopenharmony_ci	case PACKET_MR_MULTICAST:
363062306a36Sopenharmony_ci		if (i->alen != dev->addr_len)
363162306a36Sopenharmony_ci			return -EINVAL;
363262306a36Sopenharmony_ci		if (what > 0)
363362306a36Sopenharmony_ci			return dev_mc_add(dev, i->addr);
363462306a36Sopenharmony_ci		else
363562306a36Sopenharmony_ci			return dev_mc_del(dev, i->addr);
363662306a36Sopenharmony_ci		break;
363762306a36Sopenharmony_ci	case PACKET_MR_PROMISC:
363862306a36Sopenharmony_ci		return dev_set_promiscuity(dev, what);
363962306a36Sopenharmony_ci	case PACKET_MR_ALLMULTI:
364062306a36Sopenharmony_ci		return dev_set_allmulti(dev, what);
364162306a36Sopenharmony_ci	case PACKET_MR_UNICAST:
364262306a36Sopenharmony_ci		if (i->alen != dev->addr_len)
364362306a36Sopenharmony_ci			return -EINVAL;
364462306a36Sopenharmony_ci		if (what > 0)
364562306a36Sopenharmony_ci			return dev_uc_add(dev, i->addr);
364662306a36Sopenharmony_ci		else
364762306a36Sopenharmony_ci			return dev_uc_del(dev, i->addr);
364862306a36Sopenharmony_ci		break;
364962306a36Sopenharmony_ci	default:
365062306a36Sopenharmony_ci		break;
365162306a36Sopenharmony_ci	}
365262306a36Sopenharmony_ci	return 0;
365362306a36Sopenharmony_ci}
365462306a36Sopenharmony_ci
365562306a36Sopenharmony_cistatic void packet_dev_mclist_delete(struct net_device *dev,
365662306a36Sopenharmony_ci				     struct packet_mclist **mlp)
365762306a36Sopenharmony_ci{
365862306a36Sopenharmony_ci	struct packet_mclist *ml;
365962306a36Sopenharmony_ci
366062306a36Sopenharmony_ci	while ((ml = *mlp) != NULL) {
366162306a36Sopenharmony_ci		if (ml->ifindex == dev->ifindex) {
366262306a36Sopenharmony_ci			packet_dev_mc(dev, ml, -1);
366362306a36Sopenharmony_ci			*mlp = ml->next;
366462306a36Sopenharmony_ci			kfree(ml);
366562306a36Sopenharmony_ci		} else
366662306a36Sopenharmony_ci			mlp = &ml->next;
366762306a36Sopenharmony_ci	}
366862306a36Sopenharmony_ci}
366962306a36Sopenharmony_ci
367062306a36Sopenharmony_cistatic int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
367162306a36Sopenharmony_ci{
367262306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
367362306a36Sopenharmony_ci	struct packet_mclist *ml, *i;
367462306a36Sopenharmony_ci	struct net_device *dev;
367562306a36Sopenharmony_ci	int err;
367662306a36Sopenharmony_ci
367762306a36Sopenharmony_ci	rtnl_lock();
367862306a36Sopenharmony_ci
367962306a36Sopenharmony_ci	err = -ENODEV;
368062306a36Sopenharmony_ci	dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
368162306a36Sopenharmony_ci	if (!dev)
368262306a36Sopenharmony_ci		goto done;
368362306a36Sopenharmony_ci
368462306a36Sopenharmony_ci	err = -EINVAL;
368562306a36Sopenharmony_ci	if (mreq->mr_alen > dev->addr_len)
368662306a36Sopenharmony_ci		goto done;
368762306a36Sopenharmony_ci
368862306a36Sopenharmony_ci	err = -ENOBUFS;
368962306a36Sopenharmony_ci	i = kmalloc(sizeof(*i), GFP_KERNEL);
369062306a36Sopenharmony_ci	if (i == NULL)
369162306a36Sopenharmony_ci		goto done;
369262306a36Sopenharmony_ci
369362306a36Sopenharmony_ci	err = 0;
369462306a36Sopenharmony_ci	for (ml = po->mclist; ml; ml = ml->next) {
369562306a36Sopenharmony_ci		if (ml->ifindex == mreq->mr_ifindex &&
369662306a36Sopenharmony_ci		    ml->type == mreq->mr_type &&
369762306a36Sopenharmony_ci		    ml->alen == mreq->mr_alen &&
369862306a36Sopenharmony_ci		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
369962306a36Sopenharmony_ci			ml->count++;
370062306a36Sopenharmony_ci			/* Free the new element ... */
370162306a36Sopenharmony_ci			kfree(i);
370262306a36Sopenharmony_ci			goto done;
370362306a36Sopenharmony_ci		}
370462306a36Sopenharmony_ci	}
370562306a36Sopenharmony_ci
370662306a36Sopenharmony_ci	i->type = mreq->mr_type;
370762306a36Sopenharmony_ci	i->ifindex = mreq->mr_ifindex;
370862306a36Sopenharmony_ci	i->alen = mreq->mr_alen;
370962306a36Sopenharmony_ci	memcpy(i->addr, mreq->mr_address, i->alen);
371062306a36Sopenharmony_ci	memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
371162306a36Sopenharmony_ci	i->count = 1;
371262306a36Sopenharmony_ci	i->next = po->mclist;
371362306a36Sopenharmony_ci	po->mclist = i;
371462306a36Sopenharmony_ci	err = packet_dev_mc(dev, i, 1);
371562306a36Sopenharmony_ci	if (err) {
371662306a36Sopenharmony_ci		po->mclist = i->next;
371762306a36Sopenharmony_ci		kfree(i);
371862306a36Sopenharmony_ci	}
371962306a36Sopenharmony_ci
372062306a36Sopenharmony_cidone:
372162306a36Sopenharmony_ci	rtnl_unlock();
372262306a36Sopenharmony_ci	return err;
372362306a36Sopenharmony_ci}
372462306a36Sopenharmony_ci
372562306a36Sopenharmony_cistatic int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
372662306a36Sopenharmony_ci{
372762306a36Sopenharmony_ci	struct packet_mclist *ml, **mlp;
372862306a36Sopenharmony_ci
372962306a36Sopenharmony_ci	rtnl_lock();
373062306a36Sopenharmony_ci
373162306a36Sopenharmony_ci	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
373262306a36Sopenharmony_ci		if (ml->ifindex == mreq->mr_ifindex &&
373362306a36Sopenharmony_ci		    ml->type == mreq->mr_type &&
373462306a36Sopenharmony_ci		    ml->alen == mreq->mr_alen &&
373562306a36Sopenharmony_ci		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
373662306a36Sopenharmony_ci			if (--ml->count == 0) {
373762306a36Sopenharmony_ci				struct net_device *dev;
373862306a36Sopenharmony_ci				*mlp = ml->next;
373962306a36Sopenharmony_ci				dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
374062306a36Sopenharmony_ci				if (dev)
374162306a36Sopenharmony_ci					packet_dev_mc(dev, ml, -1);
374262306a36Sopenharmony_ci				kfree(ml);
374362306a36Sopenharmony_ci			}
374462306a36Sopenharmony_ci			break;
374562306a36Sopenharmony_ci		}
374662306a36Sopenharmony_ci	}
374762306a36Sopenharmony_ci	rtnl_unlock();
374862306a36Sopenharmony_ci	return 0;
374962306a36Sopenharmony_ci}
375062306a36Sopenharmony_ci
375162306a36Sopenharmony_cistatic void packet_flush_mclist(struct sock *sk)
375262306a36Sopenharmony_ci{
375362306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
375462306a36Sopenharmony_ci	struct packet_mclist *ml;
375562306a36Sopenharmony_ci
375662306a36Sopenharmony_ci	if (!po->mclist)
375762306a36Sopenharmony_ci		return;
375862306a36Sopenharmony_ci
375962306a36Sopenharmony_ci	rtnl_lock();
376062306a36Sopenharmony_ci	while ((ml = po->mclist) != NULL) {
376162306a36Sopenharmony_ci		struct net_device *dev;
376262306a36Sopenharmony_ci
376362306a36Sopenharmony_ci		po->mclist = ml->next;
376462306a36Sopenharmony_ci		dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
376562306a36Sopenharmony_ci		if (dev != NULL)
376662306a36Sopenharmony_ci			packet_dev_mc(dev, ml, -1);
376762306a36Sopenharmony_ci		kfree(ml);
376862306a36Sopenharmony_ci	}
376962306a36Sopenharmony_ci	rtnl_unlock();
377062306a36Sopenharmony_ci}
377162306a36Sopenharmony_ci
377262306a36Sopenharmony_cistatic int
377362306a36Sopenharmony_cipacket_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
377462306a36Sopenharmony_ci		  unsigned int optlen)
377562306a36Sopenharmony_ci{
377662306a36Sopenharmony_ci	struct sock *sk = sock->sk;
377762306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
377862306a36Sopenharmony_ci	int ret;
377962306a36Sopenharmony_ci
378062306a36Sopenharmony_ci	if (level != SOL_PACKET)
378162306a36Sopenharmony_ci		return -ENOPROTOOPT;
378262306a36Sopenharmony_ci
378362306a36Sopenharmony_ci	switch (optname) {
378462306a36Sopenharmony_ci	case PACKET_ADD_MEMBERSHIP:
378562306a36Sopenharmony_ci	case PACKET_DROP_MEMBERSHIP:
378662306a36Sopenharmony_ci	{
378762306a36Sopenharmony_ci		struct packet_mreq_max mreq;
378862306a36Sopenharmony_ci		int len = optlen;
378962306a36Sopenharmony_ci		memset(&mreq, 0, sizeof(mreq));
379062306a36Sopenharmony_ci		if (len < sizeof(struct packet_mreq))
379162306a36Sopenharmony_ci			return -EINVAL;
379262306a36Sopenharmony_ci		if (len > sizeof(mreq))
379362306a36Sopenharmony_ci			len = sizeof(mreq);
379462306a36Sopenharmony_ci		if (copy_from_sockptr(&mreq, optval, len))
379562306a36Sopenharmony_ci			return -EFAULT;
379662306a36Sopenharmony_ci		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
379762306a36Sopenharmony_ci			return -EINVAL;
379862306a36Sopenharmony_ci		if (optname == PACKET_ADD_MEMBERSHIP)
379962306a36Sopenharmony_ci			ret = packet_mc_add(sk, &mreq);
380062306a36Sopenharmony_ci		else
380162306a36Sopenharmony_ci			ret = packet_mc_drop(sk, &mreq);
380262306a36Sopenharmony_ci		return ret;
380362306a36Sopenharmony_ci	}
380462306a36Sopenharmony_ci
380562306a36Sopenharmony_ci	case PACKET_RX_RING:
380662306a36Sopenharmony_ci	case PACKET_TX_RING:
380762306a36Sopenharmony_ci	{
380862306a36Sopenharmony_ci		union tpacket_req_u req_u;
380962306a36Sopenharmony_ci		int len;
381062306a36Sopenharmony_ci
381162306a36Sopenharmony_ci		lock_sock(sk);
381262306a36Sopenharmony_ci		switch (po->tp_version) {
381362306a36Sopenharmony_ci		case TPACKET_V1:
381462306a36Sopenharmony_ci		case TPACKET_V2:
381562306a36Sopenharmony_ci			len = sizeof(req_u.req);
381662306a36Sopenharmony_ci			break;
381762306a36Sopenharmony_ci		case TPACKET_V3:
381862306a36Sopenharmony_ci		default:
381962306a36Sopenharmony_ci			len = sizeof(req_u.req3);
382062306a36Sopenharmony_ci			break;
382162306a36Sopenharmony_ci		}
382262306a36Sopenharmony_ci		if (optlen < len) {
382362306a36Sopenharmony_ci			ret = -EINVAL;
382462306a36Sopenharmony_ci		} else {
382562306a36Sopenharmony_ci			if (copy_from_sockptr(&req_u.req, optval, len))
382662306a36Sopenharmony_ci				ret = -EFAULT;
382762306a36Sopenharmony_ci			else
382862306a36Sopenharmony_ci				ret = packet_set_ring(sk, &req_u, 0,
382962306a36Sopenharmony_ci						    optname == PACKET_TX_RING);
383062306a36Sopenharmony_ci		}
383162306a36Sopenharmony_ci		release_sock(sk);
383262306a36Sopenharmony_ci		return ret;
383362306a36Sopenharmony_ci	}
383462306a36Sopenharmony_ci	case PACKET_COPY_THRESH:
383562306a36Sopenharmony_ci	{
383662306a36Sopenharmony_ci		int val;
383762306a36Sopenharmony_ci
383862306a36Sopenharmony_ci		if (optlen != sizeof(val))
383962306a36Sopenharmony_ci			return -EINVAL;
384062306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
384162306a36Sopenharmony_ci			return -EFAULT;
384262306a36Sopenharmony_ci
384362306a36Sopenharmony_ci		pkt_sk(sk)->copy_thresh = val;
384462306a36Sopenharmony_ci		return 0;
384562306a36Sopenharmony_ci	}
384662306a36Sopenharmony_ci	case PACKET_VERSION:
384762306a36Sopenharmony_ci	{
384862306a36Sopenharmony_ci		int val;
384962306a36Sopenharmony_ci
385062306a36Sopenharmony_ci		if (optlen != sizeof(val))
385162306a36Sopenharmony_ci			return -EINVAL;
385262306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
385362306a36Sopenharmony_ci			return -EFAULT;
385462306a36Sopenharmony_ci		switch (val) {
385562306a36Sopenharmony_ci		case TPACKET_V1:
385662306a36Sopenharmony_ci		case TPACKET_V2:
385762306a36Sopenharmony_ci		case TPACKET_V3:
385862306a36Sopenharmony_ci			break;
385962306a36Sopenharmony_ci		default:
386062306a36Sopenharmony_ci			return -EINVAL;
386162306a36Sopenharmony_ci		}
386262306a36Sopenharmony_ci		lock_sock(sk);
386362306a36Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
386462306a36Sopenharmony_ci			ret = -EBUSY;
386562306a36Sopenharmony_ci		} else {
386662306a36Sopenharmony_ci			po->tp_version = val;
386762306a36Sopenharmony_ci			ret = 0;
386862306a36Sopenharmony_ci		}
386962306a36Sopenharmony_ci		release_sock(sk);
387062306a36Sopenharmony_ci		return ret;
387162306a36Sopenharmony_ci	}
387262306a36Sopenharmony_ci	case PACKET_RESERVE:
387362306a36Sopenharmony_ci	{
387462306a36Sopenharmony_ci		unsigned int val;
387562306a36Sopenharmony_ci
387662306a36Sopenharmony_ci		if (optlen != sizeof(val))
387762306a36Sopenharmony_ci			return -EINVAL;
387862306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
387962306a36Sopenharmony_ci			return -EFAULT;
388062306a36Sopenharmony_ci		if (val > INT_MAX)
388162306a36Sopenharmony_ci			return -EINVAL;
388262306a36Sopenharmony_ci		lock_sock(sk);
388362306a36Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
388462306a36Sopenharmony_ci			ret = -EBUSY;
388562306a36Sopenharmony_ci		} else {
388662306a36Sopenharmony_ci			po->tp_reserve = val;
388762306a36Sopenharmony_ci			ret = 0;
388862306a36Sopenharmony_ci		}
388962306a36Sopenharmony_ci		release_sock(sk);
389062306a36Sopenharmony_ci		return ret;
389162306a36Sopenharmony_ci	}
389262306a36Sopenharmony_ci	case PACKET_LOSS:
389362306a36Sopenharmony_ci	{
389462306a36Sopenharmony_ci		unsigned int val;
389562306a36Sopenharmony_ci
389662306a36Sopenharmony_ci		if (optlen != sizeof(val))
389762306a36Sopenharmony_ci			return -EINVAL;
389862306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
389962306a36Sopenharmony_ci			return -EFAULT;
390062306a36Sopenharmony_ci
390162306a36Sopenharmony_ci		lock_sock(sk);
390262306a36Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
390362306a36Sopenharmony_ci			ret = -EBUSY;
390462306a36Sopenharmony_ci		} else {
390562306a36Sopenharmony_ci			packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val);
390662306a36Sopenharmony_ci			ret = 0;
390762306a36Sopenharmony_ci		}
390862306a36Sopenharmony_ci		release_sock(sk);
390962306a36Sopenharmony_ci		return ret;
391062306a36Sopenharmony_ci	}
391162306a36Sopenharmony_ci	case PACKET_AUXDATA:
391262306a36Sopenharmony_ci	{
391362306a36Sopenharmony_ci		int val;
391462306a36Sopenharmony_ci
391562306a36Sopenharmony_ci		if (optlen < sizeof(val))
391662306a36Sopenharmony_ci			return -EINVAL;
391762306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
391862306a36Sopenharmony_ci			return -EFAULT;
391962306a36Sopenharmony_ci
392062306a36Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
392162306a36Sopenharmony_ci		return 0;
392262306a36Sopenharmony_ci	}
392362306a36Sopenharmony_ci	case PACKET_ORIGDEV:
392462306a36Sopenharmony_ci	{
392562306a36Sopenharmony_ci		int val;
392662306a36Sopenharmony_ci
392762306a36Sopenharmony_ci		if (optlen < sizeof(val))
392862306a36Sopenharmony_ci			return -EINVAL;
392962306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
393062306a36Sopenharmony_ci			return -EFAULT;
393162306a36Sopenharmony_ci
393262306a36Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
393362306a36Sopenharmony_ci		return 0;
393462306a36Sopenharmony_ci	}
393562306a36Sopenharmony_ci	case PACKET_VNET_HDR:
393662306a36Sopenharmony_ci	case PACKET_VNET_HDR_SZ:
393762306a36Sopenharmony_ci	{
393862306a36Sopenharmony_ci		int val, hdr_len;
393962306a36Sopenharmony_ci
394062306a36Sopenharmony_ci		if (sock->type != SOCK_RAW)
394162306a36Sopenharmony_ci			return -EINVAL;
394262306a36Sopenharmony_ci		if (optlen < sizeof(val))
394362306a36Sopenharmony_ci			return -EINVAL;
394462306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
394562306a36Sopenharmony_ci			return -EFAULT;
394662306a36Sopenharmony_ci
394762306a36Sopenharmony_ci		if (optname == PACKET_VNET_HDR_SZ) {
394862306a36Sopenharmony_ci			if (val && val != sizeof(struct virtio_net_hdr) &&
394962306a36Sopenharmony_ci			    val != sizeof(struct virtio_net_hdr_mrg_rxbuf))
395062306a36Sopenharmony_ci				return -EINVAL;
395162306a36Sopenharmony_ci			hdr_len = val;
395262306a36Sopenharmony_ci		} else {
395362306a36Sopenharmony_ci			hdr_len = val ? sizeof(struct virtio_net_hdr) : 0;
395462306a36Sopenharmony_ci		}
395562306a36Sopenharmony_ci		lock_sock(sk);
395662306a36Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
395762306a36Sopenharmony_ci			ret = -EBUSY;
395862306a36Sopenharmony_ci		} else {
395962306a36Sopenharmony_ci			WRITE_ONCE(po->vnet_hdr_sz, hdr_len);
396062306a36Sopenharmony_ci			ret = 0;
396162306a36Sopenharmony_ci		}
396262306a36Sopenharmony_ci		release_sock(sk);
396362306a36Sopenharmony_ci		return ret;
396462306a36Sopenharmony_ci	}
396562306a36Sopenharmony_ci	case PACKET_TIMESTAMP:
396662306a36Sopenharmony_ci	{
396762306a36Sopenharmony_ci		int val;
396862306a36Sopenharmony_ci
396962306a36Sopenharmony_ci		if (optlen != sizeof(val))
397062306a36Sopenharmony_ci			return -EINVAL;
397162306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
397262306a36Sopenharmony_ci			return -EFAULT;
397362306a36Sopenharmony_ci
397462306a36Sopenharmony_ci		WRITE_ONCE(po->tp_tstamp, val);
397562306a36Sopenharmony_ci		return 0;
397662306a36Sopenharmony_ci	}
397762306a36Sopenharmony_ci	case PACKET_FANOUT:
397862306a36Sopenharmony_ci	{
397962306a36Sopenharmony_ci		struct fanout_args args = { 0 };
398062306a36Sopenharmony_ci
398162306a36Sopenharmony_ci		if (optlen != sizeof(int) && optlen != sizeof(args))
398262306a36Sopenharmony_ci			return -EINVAL;
398362306a36Sopenharmony_ci		if (copy_from_sockptr(&args, optval, optlen))
398462306a36Sopenharmony_ci			return -EFAULT;
398562306a36Sopenharmony_ci
398662306a36Sopenharmony_ci		return fanout_add(sk, &args);
398762306a36Sopenharmony_ci	}
398862306a36Sopenharmony_ci	case PACKET_FANOUT_DATA:
398962306a36Sopenharmony_ci	{
399062306a36Sopenharmony_ci		/* Paired with the WRITE_ONCE() in fanout_add() */
399162306a36Sopenharmony_ci		if (!READ_ONCE(po->fanout))
399262306a36Sopenharmony_ci			return -EINVAL;
399362306a36Sopenharmony_ci
399462306a36Sopenharmony_ci		return fanout_set_data(po, optval, optlen);
399562306a36Sopenharmony_ci	}
399662306a36Sopenharmony_ci	case PACKET_IGNORE_OUTGOING:
399762306a36Sopenharmony_ci	{
399862306a36Sopenharmony_ci		int val;
399962306a36Sopenharmony_ci
400062306a36Sopenharmony_ci		if (optlen != sizeof(val))
400162306a36Sopenharmony_ci			return -EINVAL;
400262306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
400362306a36Sopenharmony_ci			return -EFAULT;
400462306a36Sopenharmony_ci		if (val < 0 || val > 1)
400562306a36Sopenharmony_ci			return -EINVAL;
400662306a36Sopenharmony_ci
400762306a36Sopenharmony_ci		WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val);
400862306a36Sopenharmony_ci		return 0;
400962306a36Sopenharmony_ci	}
401062306a36Sopenharmony_ci	case PACKET_TX_HAS_OFF:
401162306a36Sopenharmony_ci	{
401262306a36Sopenharmony_ci		unsigned int val;
401362306a36Sopenharmony_ci
401462306a36Sopenharmony_ci		if (optlen != sizeof(val))
401562306a36Sopenharmony_ci			return -EINVAL;
401662306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
401762306a36Sopenharmony_ci			return -EFAULT;
401862306a36Sopenharmony_ci
401962306a36Sopenharmony_ci		lock_sock(sk);
402062306a36Sopenharmony_ci		if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec)
402162306a36Sopenharmony_ci			packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val);
402262306a36Sopenharmony_ci
402362306a36Sopenharmony_ci		release_sock(sk);
402462306a36Sopenharmony_ci		return 0;
402562306a36Sopenharmony_ci	}
402662306a36Sopenharmony_ci	case PACKET_QDISC_BYPASS:
402762306a36Sopenharmony_ci	{
402862306a36Sopenharmony_ci		int val;
402962306a36Sopenharmony_ci
403062306a36Sopenharmony_ci		if (optlen != sizeof(val))
403162306a36Sopenharmony_ci			return -EINVAL;
403262306a36Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
403362306a36Sopenharmony_ci			return -EFAULT;
403462306a36Sopenharmony_ci
403562306a36Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val);
403662306a36Sopenharmony_ci		return 0;
403762306a36Sopenharmony_ci	}
403862306a36Sopenharmony_ci	default:
403962306a36Sopenharmony_ci		return -ENOPROTOOPT;
404062306a36Sopenharmony_ci	}
404162306a36Sopenharmony_ci}
404262306a36Sopenharmony_ci
404362306a36Sopenharmony_cistatic int packet_getsockopt(struct socket *sock, int level, int optname,
404462306a36Sopenharmony_ci			     char __user *optval, int __user *optlen)
404562306a36Sopenharmony_ci{
404662306a36Sopenharmony_ci	int len;
404762306a36Sopenharmony_ci	int val, lv = sizeof(val);
404862306a36Sopenharmony_ci	struct sock *sk = sock->sk;
404962306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
405062306a36Sopenharmony_ci	void *data = &val;
405162306a36Sopenharmony_ci	union tpacket_stats_u st;
405262306a36Sopenharmony_ci	struct tpacket_rollover_stats rstats;
405362306a36Sopenharmony_ci	int drops;
405462306a36Sopenharmony_ci
405562306a36Sopenharmony_ci	if (level != SOL_PACKET)
405662306a36Sopenharmony_ci		return -ENOPROTOOPT;
405762306a36Sopenharmony_ci
405862306a36Sopenharmony_ci	if (get_user(len, optlen))
405962306a36Sopenharmony_ci		return -EFAULT;
406062306a36Sopenharmony_ci
406162306a36Sopenharmony_ci	if (len < 0)
406262306a36Sopenharmony_ci		return -EINVAL;
406362306a36Sopenharmony_ci
406462306a36Sopenharmony_ci	switch (optname) {
406562306a36Sopenharmony_ci	case PACKET_STATISTICS:
406662306a36Sopenharmony_ci		spin_lock_bh(&sk->sk_receive_queue.lock);
406762306a36Sopenharmony_ci		memcpy(&st, &po->stats, sizeof(st));
406862306a36Sopenharmony_ci		memset(&po->stats, 0, sizeof(po->stats));
406962306a36Sopenharmony_ci		spin_unlock_bh(&sk->sk_receive_queue.lock);
407062306a36Sopenharmony_ci		drops = atomic_xchg(&po->tp_drops, 0);
407162306a36Sopenharmony_ci
407262306a36Sopenharmony_ci		if (po->tp_version == TPACKET_V3) {
407362306a36Sopenharmony_ci			lv = sizeof(struct tpacket_stats_v3);
407462306a36Sopenharmony_ci			st.stats3.tp_drops = drops;
407562306a36Sopenharmony_ci			st.stats3.tp_packets += drops;
407662306a36Sopenharmony_ci			data = &st.stats3;
407762306a36Sopenharmony_ci		} else {
407862306a36Sopenharmony_ci			lv = sizeof(struct tpacket_stats);
407962306a36Sopenharmony_ci			st.stats1.tp_drops = drops;
408062306a36Sopenharmony_ci			st.stats1.tp_packets += drops;
408162306a36Sopenharmony_ci			data = &st.stats1;
408262306a36Sopenharmony_ci		}
408362306a36Sopenharmony_ci
408462306a36Sopenharmony_ci		break;
408562306a36Sopenharmony_ci	case PACKET_AUXDATA:
408662306a36Sopenharmony_ci		val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
408762306a36Sopenharmony_ci		break;
408862306a36Sopenharmony_ci	case PACKET_ORIGDEV:
408962306a36Sopenharmony_ci		val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
409062306a36Sopenharmony_ci		break;
409162306a36Sopenharmony_ci	case PACKET_VNET_HDR:
409262306a36Sopenharmony_ci		val = !!READ_ONCE(po->vnet_hdr_sz);
409362306a36Sopenharmony_ci		break;
409462306a36Sopenharmony_ci	case PACKET_VNET_HDR_SZ:
409562306a36Sopenharmony_ci		val = READ_ONCE(po->vnet_hdr_sz);
409662306a36Sopenharmony_ci		break;
409762306a36Sopenharmony_ci	case PACKET_VERSION:
409862306a36Sopenharmony_ci		val = po->tp_version;
409962306a36Sopenharmony_ci		break;
410062306a36Sopenharmony_ci	case PACKET_HDRLEN:
410162306a36Sopenharmony_ci		if (len > sizeof(int))
410262306a36Sopenharmony_ci			len = sizeof(int);
410362306a36Sopenharmony_ci		if (len < sizeof(int))
410462306a36Sopenharmony_ci			return -EINVAL;
410562306a36Sopenharmony_ci		if (copy_from_user(&val, optval, len))
410662306a36Sopenharmony_ci			return -EFAULT;
410762306a36Sopenharmony_ci		switch (val) {
410862306a36Sopenharmony_ci		case TPACKET_V1:
410962306a36Sopenharmony_ci			val = sizeof(struct tpacket_hdr);
411062306a36Sopenharmony_ci			break;
411162306a36Sopenharmony_ci		case TPACKET_V2:
411262306a36Sopenharmony_ci			val = sizeof(struct tpacket2_hdr);
411362306a36Sopenharmony_ci			break;
411462306a36Sopenharmony_ci		case TPACKET_V3:
411562306a36Sopenharmony_ci			val = sizeof(struct tpacket3_hdr);
411662306a36Sopenharmony_ci			break;
411762306a36Sopenharmony_ci		default:
411862306a36Sopenharmony_ci			return -EINVAL;
411962306a36Sopenharmony_ci		}
412062306a36Sopenharmony_ci		break;
412162306a36Sopenharmony_ci	case PACKET_RESERVE:
412262306a36Sopenharmony_ci		val = po->tp_reserve;
412362306a36Sopenharmony_ci		break;
412462306a36Sopenharmony_ci	case PACKET_LOSS:
412562306a36Sopenharmony_ci		val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS);
412662306a36Sopenharmony_ci		break;
412762306a36Sopenharmony_ci	case PACKET_TIMESTAMP:
412862306a36Sopenharmony_ci		val = READ_ONCE(po->tp_tstamp);
412962306a36Sopenharmony_ci		break;
413062306a36Sopenharmony_ci	case PACKET_FANOUT:
413162306a36Sopenharmony_ci		val = (po->fanout ?
413262306a36Sopenharmony_ci		       ((u32)po->fanout->id |
413362306a36Sopenharmony_ci			((u32)po->fanout->type << 16) |
413462306a36Sopenharmony_ci			((u32)po->fanout->flags << 24)) :
413562306a36Sopenharmony_ci		       0);
413662306a36Sopenharmony_ci		break;
413762306a36Sopenharmony_ci	case PACKET_IGNORE_OUTGOING:
413862306a36Sopenharmony_ci		val = READ_ONCE(po->prot_hook.ignore_outgoing);
413962306a36Sopenharmony_ci		break;
414062306a36Sopenharmony_ci	case PACKET_ROLLOVER_STATS:
414162306a36Sopenharmony_ci		if (!po->rollover)
414262306a36Sopenharmony_ci			return -EINVAL;
414362306a36Sopenharmony_ci		rstats.tp_all = atomic_long_read(&po->rollover->num);
414462306a36Sopenharmony_ci		rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
414562306a36Sopenharmony_ci		rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
414662306a36Sopenharmony_ci		data = &rstats;
414762306a36Sopenharmony_ci		lv = sizeof(rstats);
414862306a36Sopenharmony_ci		break;
414962306a36Sopenharmony_ci	case PACKET_TX_HAS_OFF:
415062306a36Sopenharmony_ci		val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF);
415162306a36Sopenharmony_ci		break;
415262306a36Sopenharmony_ci	case PACKET_QDISC_BYPASS:
415362306a36Sopenharmony_ci		val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS);
415462306a36Sopenharmony_ci		break;
415562306a36Sopenharmony_ci	default:
415662306a36Sopenharmony_ci		return -ENOPROTOOPT;
415762306a36Sopenharmony_ci	}
415862306a36Sopenharmony_ci
415962306a36Sopenharmony_ci	if (len > lv)
416062306a36Sopenharmony_ci		len = lv;
416162306a36Sopenharmony_ci	if (put_user(len, optlen))
416262306a36Sopenharmony_ci		return -EFAULT;
416362306a36Sopenharmony_ci	if (copy_to_user(optval, data, len))
416462306a36Sopenharmony_ci		return -EFAULT;
416562306a36Sopenharmony_ci	return 0;
416662306a36Sopenharmony_ci}
416762306a36Sopenharmony_ci
416862306a36Sopenharmony_cistatic int packet_notifier(struct notifier_block *this,
416962306a36Sopenharmony_ci			   unsigned long msg, void *ptr)
417062306a36Sopenharmony_ci{
417162306a36Sopenharmony_ci	struct sock *sk;
417262306a36Sopenharmony_ci	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
417362306a36Sopenharmony_ci	struct net *net = dev_net(dev);
417462306a36Sopenharmony_ci
417562306a36Sopenharmony_ci	rcu_read_lock();
417662306a36Sopenharmony_ci	sk_for_each_rcu(sk, &net->packet.sklist) {
417762306a36Sopenharmony_ci		struct packet_sock *po = pkt_sk(sk);
417862306a36Sopenharmony_ci
417962306a36Sopenharmony_ci		switch (msg) {
418062306a36Sopenharmony_ci		case NETDEV_UNREGISTER:
418162306a36Sopenharmony_ci			if (po->mclist)
418262306a36Sopenharmony_ci				packet_dev_mclist_delete(dev, &po->mclist);
418362306a36Sopenharmony_ci			fallthrough;
418462306a36Sopenharmony_ci
418562306a36Sopenharmony_ci		case NETDEV_DOWN:
418662306a36Sopenharmony_ci			if (dev->ifindex == po->ifindex) {
418762306a36Sopenharmony_ci				spin_lock(&po->bind_lock);
418862306a36Sopenharmony_ci				if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) {
418962306a36Sopenharmony_ci					__unregister_prot_hook(sk, false);
419062306a36Sopenharmony_ci					sk->sk_err = ENETDOWN;
419162306a36Sopenharmony_ci					if (!sock_flag(sk, SOCK_DEAD))
419262306a36Sopenharmony_ci						sk_error_report(sk);
419362306a36Sopenharmony_ci				}
419462306a36Sopenharmony_ci				if (msg == NETDEV_UNREGISTER) {
419562306a36Sopenharmony_ci					packet_cached_dev_reset(po);
419662306a36Sopenharmony_ci					WRITE_ONCE(po->ifindex, -1);
419762306a36Sopenharmony_ci					netdev_put(po->prot_hook.dev,
419862306a36Sopenharmony_ci						   &po->prot_hook.dev_tracker);
419962306a36Sopenharmony_ci					po->prot_hook.dev = NULL;
420062306a36Sopenharmony_ci				}
420162306a36Sopenharmony_ci				spin_unlock(&po->bind_lock);
420262306a36Sopenharmony_ci			}
420362306a36Sopenharmony_ci			break;
420462306a36Sopenharmony_ci		case NETDEV_UP:
420562306a36Sopenharmony_ci			if (dev->ifindex == po->ifindex) {
420662306a36Sopenharmony_ci				spin_lock(&po->bind_lock);
420762306a36Sopenharmony_ci				if (po->num)
420862306a36Sopenharmony_ci					register_prot_hook(sk);
420962306a36Sopenharmony_ci				spin_unlock(&po->bind_lock);
421062306a36Sopenharmony_ci			}
421162306a36Sopenharmony_ci			break;
421262306a36Sopenharmony_ci		}
421362306a36Sopenharmony_ci	}
421462306a36Sopenharmony_ci	rcu_read_unlock();
421562306a36Sopenharmony_ci	return NOTIFY_DONE;
421662306a36Sopenharmony_ci}
421762306a36Sopenharmony_ci
421862306a36Sopenharmony_ci
421962306a36Sopenharmony_cistatic int packet_ioctl(struct socket *sock, unsigned int cmd,
422062306a36Sopenharmony_ci			unsigned long arg)
422162306a36Sopenharmony_ci{
422262306a36Sopenharmony_ci	struct sock *sk = sock->sk;
422362306a36Sopenharmony_ci
422462306a36Sopenharmony_ci	switch (cmd) {
422562306a36Sopenharmony_ci	case SIOCOUTQ:
422662306a36Sopenharmony_ci	{
422762306a36Sopenharmony_ci		int amount = sk_wmem_alloc_get(sk);
422862306a36Sopenharmony_ci
422962306a36Sopenharmony_ci		return put_user(amount, (int __user *)arg);
423062306a36Sopenharmony_ci	}
423162306a36Sopenharmony_ci	case SIOCINQ:
423262306a36Sopenharmony_ci	{
423362306a36Sopenharmony_ci		struct sk_buff *skb;
423462306a36Sopenharmony_ci		int amount = 0;
423562306a36Sopenharmony_ci
423662306a36Sopenharmony_ci		spin_lock_bh(&sk->sk_receive_queue.lock);
423762306a36Sopenharmony_ci		skb = skb_peek(&sk->sk_receive_queue);
423862306a36Sopenharmony_ci		if (skb)
423962306a36Sopenharmony_ci			amount = skb->len;
424062306a36Sopenharmony_ci		spin_unlock_bh(&sk->sk_receive_queue.lock);
424162306a36Sopenharmony_ci		return put_user(amount, (int __user *)arg);
424262306a36Sopenharmony_ci	}
424362306a36Sopenharmony_ci#ifdef CONFIG_INET
424462306a36Sopenharmony_ci	case SIOCADDRT:
424562306a36Sopenharmony_ci	case SIOCDELRT:
424662306a36Sopenharmony_ci	case SIOCDARP:
424762306a36Sopenharmony_ci	case SIOCGARP:
424862306a36Sopenharmony_ci	case SIOCSARP:
424962306a36Sopenharmony_ci	case SIOCGIFADDR:
425062306a36Sopenharmony_ci	case SIOCSIFADDR:
425162306a36Sopenharmony_ci	case SIOCGIFBRDADDR:
425262306a36Sopenharmony_ci	case SIOCSIFBRDADDR:
425362306a36Sopenharmony_ci	case SIOCGIFNETMASK:
425462306a36Sopenharmony_ci	case SIOCSIFNETMASK:
425562306a36Sopenharmony_ci	case SIOCGIFDSTADDR:
425662306a36Sopenharmony_ci	case SIOCSIFDSTADDR:
425762306a36Sopenharmony_ci	case SIOCSIFFLAGS:
425862306a36Sopenharmony_ci		return inet_dgram_ops.ioctl(sock, cmd, arg);
425962306a36Sopenharmony_ci#endif
426062306a36Sopenharmony_ci
426162306a36Sopenharmony_ci	default:
426262306a36Sopenharmony_ci		return -ENOIOCTLCMD;
426362306a36Sopenharmony_ci	}
426462306a36Sopenharmony_ci	return 0;
426562306a36Sopenharmony_ci}
426662306a36Sopenharmony_ci
426762306a36Sopenharmony_cistatic __poll_t packet_poll(struct file *file, struct socket *sock,
426862306a36Sopenharmony_ci				poll_table *wait)
426962306a36Sopenharmony_ci{
427062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
427162306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
427262306a36Sopenharmony_ci	__poll_t mask = datagram_poll(file, sock, wait);
427362306a36Sopenharmony_ci
427462306a36Sopenharmony_ci	spin_lock_bh(&sk->sk_receive_queue.lock);
427562306a36Sopenharmony_ci	if (po->rx_ring.pg_vec) {
427662306a36Sopenharmony_ci		if (!packet_previous_rx_frame(po, &po->rx_ring,
427762306a36Sopenharmony_ci			TP_STATUS_KERNEL))
427862306a36Sopenharmony_ci			mask |= EPOLLIN | EPOLLRDNORM;
427962306a36Sopenharmony_ci	}
428062306a36Sopenharmony_ci	packet_rcv_try_clear_pressure(po);
428162306a36Sopenharmony_ci	spin_unlock_bh(&sk->sk_receive_queue.lock);
428262306a36Sopenharmony_ci	spin_lock_bh(&sk->sk_write_queue.lock);
428362306a36Sopenharmony_ci	if (po->tx_ring.pg_vec) {
428462306a36Sopenharmony_ci		if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
428562306a36Sopenharmony_ci			mask |= EPOLLOUT | EPOLLWRNORM;
428662306a36Sopenharmony_ci	}
428762306a36Sopenharmony_ci	spin_unlock_bh(&sk->sk_write_queue.lock);
428862306a36Sopenharmony_ci	return mask;
428962306a36Sopenharmony_ci}
429062306a36Sopenharmony_ci
429162306a36Sopenharmony_ci
429262306a36Sopenharmony_ci/* Dirty? Well, I still did not learn better way to account
429362306a36Sopenharmony_ci * for user mmaps.
429462306a36Sopenharmony_ci */
429562306a36Sopenharmony_ci
429662306a36Sopenharmony_cistatic void packet_mm_open(struct vm_area_struct *vma)
429762306a36Sopenharmony_ci{
429862306a36Sopenharmony_ci	struct file *file = vma->vm_file;
429962306a36Sopenharmony_ci	struct socket *sock = file->private_data;
430062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
430162306a36Sopenharmony_ci
430262306a36Sopenharmony_ci	if (sk)
430362306a36Sopenharmony_ci		atomic_long_inc(&pkt_sk(sk)->mapped);
430462306a36Sopenharmony_ci}
430562306a36Sopenharmony_ci
430662306a36Sopenharmony_cistatic void packet_mm_close(struct vm_area_struct *vma)
430762306a36Sopenharmony_ci{
430862306a36Sopenharmony_ci	struct file *file = vma->vm_file;
430962306a36Sopenharmony_ci	struct socket *sock = file->private_data;
431062306a36Sopenharmony_ci	struct sock *sk = sock->sk;
431162306a36Sopenharmony_ci
431262306a36Sopenharmony_ci	if (sk)
431362306a36Sopenharmony_ci		atomic_long_dec(&pkt_sk(sk)->mapped);
431462306a36Sopenharmony_ci}
431562306a36Sopenharmony_ci
431662306a36Sopenharmony_cistatic const struct vm_operations_struct packet_mmap_ops = {
431762306a36Sopenharmony_ci	.open	=	packet_mm_open,
431862306a36Sopenharmony_ci	.close	=	packet_mm_close,
431962306a36Sopenharmony_ci};
432062306a36Sopenharmony_ci
432162306a36Sopenharmony_cistatic void free_pg_vec(struct pgv *pg_vec, unsigned int order,
432262306a36Sopenharmony_ci			unsigned int len)
432362306a36Sopenharmony_ci{
432462306a36Sopenharmony_ci	int i;
432562306a36Sopenharmony_ci
432662306a36Sopenharmony_ci	for (i = 0; i < len; i++) {
432762306a36Sopenharmony_ci		if (likely(pg_vec[i].buffer)) {
432862306a36Sopenharmony_ci			if (is_vmalloc_addr(pg_vec[i].buffer))
432962306a36Sopenharmony_ci				vfree(pg_vec[i].buffer);
433062306a36Sopenharmony_ci			else
433162306a36Sopenharmony_ci				free_pages((unsigned long)pg_vec[i].buffer,
433262306a36Sopenharmony_ci					   order);
433362306a36Sopenharmony_ci			pg_vec[i].buffer = NULL;
433462306a36Sopenharmony_ci		}
433562306a36Sopenharmony_ci	}
433662306a36Sopenharmony_ci	kfree(pg_vec);
433762306a36Sopenharmony_ci}
433862306a36Sopenharmony_ci
433962306a36Sopenharmony_cistatic char *alloc_one_pg_vec_page(unsigned long order)
434062306a36Sopenharmony_ci{
434162306a36Sopenharmony_ci	char *buffer;
434262306a36Sopenharmony_ci	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
434362306a36Sopenharmony_ci			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
434462306a36Sopenharmony_ci
434562306a36Sopenharmony_ci	buffer = (char *) __get_free_pages(gfp_flags, order);
434662306a36Sopenharmony_ci	if (buffer)
434762306a36Sopenharmony_ci		return buffer;
434862306a36Sopenharmony_ci
434962306a36Sopenharmony_ci	/* __get_free_pages failed, fall back to vmalloc */
435062306a36Sopenharmony_ci	buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
435162306a36Sopenharmony_ci	if (buffer)
435262306a36Sopenharmony_ci		return buffer;
435362306a36Sopenharmony_ci
435462306a36Sopenharmony_ci	/* vmalloc failed, lets dig into swap here */
435562306a36Sopenharmony_ci	gfp_flags &= ~__GFP_NORETRY;
435662306a36Sopenharmony_ci	buffer = (char *) __get_free_pages(gfp_flags, order);
435762306a36Sopenharmony_ci	if (buffer)
435862306a36Sopenharmony_ci		return buffer;
435962306a36Sopenharmony_ci
436062306a36Sopenharmony_ci	/* complete and utter failure */
436162306a36Sopenharmony_ci	return NULL;
436262306a36Sopenharmony_ci}
436362306a36Sopenharmony_ci
436462306a36Sopenharmony_cistatic struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
436562306a36Sopenharmony_ci{
436662306a36Sopenharmony_ci	unsigned int block_nr = req->tp_block_nr;
436762306a36Sopenharmony_ci	struct pgv *pg_vec;
436862306a36Sopenharmony_ci	int i;
436962306a36Sopenharmony_ci
437062306a36Sopenharmony_ci	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
437162306a36Sopenharmony_ci	if (unlikely(!pg_vec))
437262306a36Sopenharmony_ci		goto out;
437362306a36Sopenharmony_ci
437462306a36Sopenharmony_ci	for (i = 0; i < block_nr; i++) {
437562306a36Sopenharmony_ci		pg_vec[i].buffer = alloc_one_pg_vec_page(order);
437662306a36Sopenharmony_ci		if (unlikely(!pg_vec[i].buffer))
437762306a36Sopenharmony_ci			goto out_free_pgvec;
437862306a36Sopenharmony_ci	}
437962306a36Sopenharmony_ci
438062306a36Sopenharmony_ciout:
438162306a36Sopenharmony_ci	return pg_vec;
438262306a36Sopenharmony_ci
438362306a36Sopenharmony_ciout_free_pgvec:
438462306a36Sopenharmony_ci	free_pg_vec(pg_vec, order, block_nr);
438562306a36Sopenharmony_ci	pg_vec = NULL;
438662306a36Sopenharmony_ci	goto out;
438762306a36Sopenharmony_ci}
438862306a36Sopenharmony_ci
438962306a36Sopenharmony_cistatic int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
439062306a36Sopenharmony_ci		int closing, int tx_ring)
439162306a36Sopenharmony_ci{
439262306a36Sopenharmony_ci	struct pgv *pg_vec = NULL;
439362306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
439462306a36Sopenharmony_ci	unsigned long *rx_owner_map = NULL;
439562306a36Sopenharmony_ci	int was_running, order = 0;
439662306a36Sopenharmony_ci	struct packet_ring_buffer *rb;
439762306a36Sopenharmony_ci	struct sk_buff_head *rb_queue;
439862306a36Sopenharmony_ci	__be16 num;
439962306a36Sopenharmony_ci	int err;
440062306a36Sopenharmony_ci	/* Added to avoid minimal code churn */
440162306a36Sopenharmony_ci	struct tpacket_req *req = &req_u->req;
440262306a36Sopenharmony_ci
440362306a36Sopenharmony_ci	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
440462306a36Sopenharmony_ci	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
440562306a36Sopenharmony_ci
440662306a36Sopenharmony_ci	err = -EBUSY;
440762306a36Sopenharmony_ci	if (!closing) {
440862306a36Sopenharmony_ci		if (atomic_long_read(&po->mapped))
440962306a36Sopenharmony_ci			goto out;
441062306a36Sopenharmony_ci		if (packet_read_pending(rb))
441162306a36Sopenharmony_ci			goto out;
441262306a36Sopenharmony_ci	}
441362306a36Sopenharmony_ci
441462306a36Sopenharmony_ci	if (req->tp_block_nr) {
441562306a36Sopenharmony_ci		unsigned int min_frame_size;
441662306a36Sopenharmony_ci
441762306a36Sopenharmony_ci		/* Sanity tests and some calculations */
441862306a36Sopenharmony_ci		err = -EBUSY;
441962306a36Sopenharmony_ci		if (unlikely(rb->pg_vec))
442062306a36Sopenharmony_ci			goto out;
442162306a36Sopenharmony_ci
442262306a36Sopenharmony_ci		switch (po->tp_version) {
442362306a36Sopenharmony_ci		case TPACKET_V1:
442462306a36Sopenharmony_ci			po->tp_hdrlen = TPACKET_HDRLEN;
442562306a36Sopenharmony_ci			break;
442662306a36Sopenharmony_ci		case TPACKET_V2:
442762306a36Sopenharmony_ci			po->tp_hdrlen = TPACKET2_HDRLEN;
442862306a36Sopenharmony_ci			break;
442962306a36Sopenharmony_ci		case TPACKET_V3:
443062306a36Sopenharmony_ci			po->tp_hdrlen = TPACKET3_HDRLEN;
443162306a36Sopenharmony_ci			break;
443262306a36Sopenharmony_ci		}
443362306a36Sopenharmony_ci
443462306a36Sopenharmony_ci		err = -EINVAL;
443562306a36Sopenharmony_ci		if (unlikely((int)req->tp_block_size <= 0))
443662306a36Sopenharmony_ci			goto out;
443762306a36Sopenharmony_ci		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
443862306a36Sopenharmony_ci			goto out;
443962306a36Sopenharmony_ci		min_frame_size = po->tp_hdrlen + po->tp_reserve;
444062306a36Sopenharmony_ci		if (po->tp_version >= TPACKET_V3 &&
444162306a36Sopenharmony_ci		    req->tp_block_size <
444262306a36Sopenharmony_ci		    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
444362306a36Sopenharmony_ci			goto out;
444462306a36Sopenharmony_ci		if (unlikely(req->tp_frame_size < min_frame_size))
444562306a36Sopenharmony_ci			goto out;
444662306a36Sopenharmony_ci		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
444762306a36Sopenharmony_ci			goto out;
444862306a36Sopenharmony_ci
444962306a36Sopenharmony_ci		rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
445062306a36Sopenharmony_ci		if (unlikely(rb->frames_per_block == 0))
445162306a36Sopenharmony_ci			goto out;
445262306a36Sopenharmony_ci		if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
445362306a36Sopenharmony_ci			goto out;
445462306a36Sopenharmony_ci		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
445562306a36Sopenharmony_ci					req->tp_frame_nr))
445662306a36Sopenharmony_ci			goto out;
445762306a36Sopenharmony_ci
445862306a36Sopenharmony_ci		err = -ENOMEM;
445962306a36Sopenharmony_ci		order = get_order(req->tp_block_size);
446062306a36Sopenharmony_ci		pg_vec = alloc_pg_vec(req, order);
446162306a36Sopenharmony_ci		if (unlikely(!pg_vec))
446262306a36Sopenharmony_ci			goto out;
446362306a36Sopenharmony_ci		switch (po->tp_version) {
446462306a36Sopenharmony_ci		case TPACKET_V3:
446562306a36Sopenharmony_ci			/* Block transmit is not supported yet */
446662306a36Sopenharmony_ci			if (!tx_ring) {
446762306a36Sopenharmony_ci				init_prb_bdqc(po, rb, pg_vec, req_u);
446862306a36Sopenharmony_ci			} else {
446962306a36Sopenharmony_ci				struct tpacket_req3 *req3 = &req_u->req3;
447062306a36Sopenharmony_ci
447162306a36Sopenharmony_ci				if (req3->tp_retire_blk_tov ||
447262306a36Sopenharmony_ci				    req3->tp_sizeof_priv ||
447362306a36Sopenharmony_ci				    req3->tp_feature_req_word) {
447462306a36Sopenharmony_ci					err = -EINVAL;
447562306a36Sopenharmony_ci					goto out_free_pg_vec;
447662306a36Sopenharmony_ci				}
447762306a36Sopenharmony_ci			}
447862306a36Sopenharmony_ci			break;
447962306a36Sopenharmony_ci		default:
448062306a36Sopenharmony_ci			if (!tx_ring) {
448162306a36Sopenharmony_ci				rx_owner_map = bitmap_alloc(req->tp_frame_nr,
448262306a36Sopenharmony_ci					GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
448362306a36Sopenharmony_ci				if (!rx_owner_map)
448462306a36Sopenharmony_ci					goto out_free_pg_vec;
448562306a36Sopenharmony_ci			}
448662306a36Sopenharmony_ci			break;
448762306a36Sopenharmony_ci		}
448862306a36Sopenharmony_ci	}
448962306a36Sopenharmony_ci	/* Done */
449062306a36Sopenharmony_ci	else {
449162306a36Sopenharmony_ci		err = -EINVAL;
449262306a36Sopenharmony_ci		if (unlikely(req->tp_frame_nr))
449362306a36Sopenharmony_ci			goto out;
449462306a36Sopenharmony_ci	}
449562306a36Sopenharmony_ci
449662306a36Sopenharmony_ci
449762306a36Sopenharmony_ci	/* Detach socket from network */
449862306a36Sopenharmony_ci	spin_lock(&po->bind_lock);
449962306a36Sopenharmony_ci	was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING);
450062306a36Sopenharmony_ci	num = po->num;
450162306a36Sopenharmony_ci	if (was_running) {
450262306a36Sopenharmony_ci		WRITE_ONCE(po->num, 0);
450362306a36Sopenharmony_ci		__unregister_prot_hook(sk, false);
450462306a36Sopenharmony_ci	}
450562306a36Sopenharmony_ci	spin_unlock(&po->bind_lock);
450662306a36Sopenharmony_ci
450762306a36Sopenharmony_ci	synchronize_net();
450862306a36Sopenharmony_ci
450962306a36Sopenharmony_ci	err = -EBUSY;
451062306a36Sopenharmony_ci	mutex_lock(&po->pg_vec_lock);
451162306a36Sopenharmony_ci	if (closing || atomic_long_read(&po->mapped) == 0) {
451262306a36Sopenharmony_ci		err = 0;
451362306a36Sopenharmony_ci		spin_lock_bh(&rb_queue->lock);
451462306a36Sopenharmony_ci		swap(rb->pg_vec, pg_vec);
451562306a36Sopenharmony_ci		if (po->tp_version <= TPACKET_V2)
451662306a36Sopenharmony_ci			swap(rb->rx_owner_map, rx_owner_map);
451762306a36Sopenharmony_ci		rb->frame_max = (req->tp_frame_nr - 1);
451862306a36Sopenharmony_ci		rb->head = 0;
451962306a36Sopenharmony_ci		rb->frame_size = req->tp_frame_size;
452062306a36Sopenharmony_ci		spin_unlock_bh(&rb_queue->lock);
452162306a36Sopenharmony_ci
452262306a36Sopenharmony_ci		swap(rb->pg_vec_order, order);
452362306a36Sopenharmony_ci		swap(rb->pg_vec_len, req->tp_block_nr);
452462306a36Sopenharmony_ci
452562306a36Sopenharmony_ci		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
452662306a36Sopenharmony_ci		po->prot_hook.func = (po->rx_ring.pg_vec) ?
452762306a36Sopenharmony_ci						tpacket_rcv : packet_rcv;
452862306a36Sopenharmony_ci		skb_queue_purge(rb_queue);
452962306a36Sopenharmony_ci		if (atomic_long_read(&po->mapped))
453062306a36Sopenharmony_ci			pr_err("packet_mmap: vma is busy: %ld\n",
453162306a36Sopenharmony_ci			       atomic_long_read(&po->mapped));
453262306a36Sopenharmony_ci	}
453362306a36Sopenharmony_ci	mutex_unlock(&po->pg_vec_lock);
453462306a36Sopenharmony_ci
453562306a36Sopenharmony_ci	spin_lock(&po->bind_lock);
453662306a36Sopenharmony_ci	if (was_running) {
453762306a36Sopenharmony_ci		WRITE_ONCE(po->num, num);
453862306a36Sopenharmony_ci		register_prot_hook(sk);
453962306a36Sopenharmony_ci	}
454062306a36Sopenharmony_ci	spin_unlock(&po->bind_lock);
454162306a36Sopenharmony_ci	if (pg_vec && (po->tp_version > TPACKET_V2)) {
454262306a36Sopenharmony_ci		/* Because we don't support block-based V3 on tx-ring */
454362306a36Sopenharmony_ci		if (!tx_ring)
454462306a36Sopenharmony_ci			prb_shutdown_retire_blk_timer(po, rb_queue);
454562306a36Sopenharmony_ci	}
454662306a36Sopenharmony_ci
454762306a36Sopenharmony_ciout_free_pg_vec:
454862306a36Sopenharmony_ci	if (pg_vec) {
454962306a36Sopenharmony_ci		bitmap_free(rx_owner_map);
455062306a36Sopenharmony_ci		free_pg_vec(pg_vec, order, req->tp_block_nr);
455162306a36Sopenharmony_ci	}
455262306a36Sopenharmony_ciout:
455362306a36Sopenharmony_ci	return err;
455462306a36Sopenharmony_ci}
455562306a36Sopenharmony_ci
455662306a36Sopenharmony_cistatic int packet_mmap(struct file *file, struct socket *sock,
455762306a36Sopenharmony_ci		struct vm_area_struct *vma)
455862306a36Sopenharmony_ci{
455962306a36Sopenharmony_ci	struct sock *sk = sock->sk;
456062306a36Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
456162306a36Sopenharmony_ci	unsigned long size, expected_size;
456262306a36Sopenharmony_ci	struct packet_ring_buffer *rb;
456362306a36Sopenharmony_ci	unsigned long start;
456462306a36Sopenharmony_ci	int err = -EINVAL;
456562306a36Sopenharmony_ci	int i;
456662306a36Sopenharmony_ci
456762306a36Sopenharmony_ci	if (vma->vm_pgoff)
456862306a36Sopenharmony_ci		return -EINVAL;
456962306a36Sopenharmony_ci
457062306a36Sopenharmony_ci	mutex_lock(&po->pg_vec_lock);
457162306a36Sopenharmony_ci
457262306a36Sopenharmony_ci	expected_size = 0;
457362306a36Sopenharmony_ci	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
457462306a36Sopenharmony_ci		if (rb->pg_vec) {
457562306a36Sopenharmony_ci			expected_size += rb->pg_vec_len
457662306a36Sopenharmony_ci						* rb->pg_vec_pages
457762306a36Sopenharmony_ci						* PAGE_SIZE;
457862306a36Sopenharmony_ci		}
457962306a36Sopenharmony_ci	}
458062306a36Sopenharmony_ci
458162306a36Sopenharmony_ci	if (expected_size == 0)
458262306a36Sopenharmony_ci		goto out;
458362306a36Sopenharmony_ci
458462306a36Sopenharmony_ci	size = vma->vm_end - vma->vm_start;
458562306a36Sopenharmony_ci	if (size != expected_size)
458662306a36Sopenharmony_ci		goto out;
458762306a36Sopenharmony_ci
458862306a36Sopenharmony_ci	start = vma->vm_start;
458962306a36Sopenharmony_ci	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
459062306a36Sopenharmony_ci		if (rb->pg_vec == NULL)
459162306a36Sopenharmony_ci			continue;
459262306a36Sopenharmony_ci
459362306a36Sopenharmony_ci		for (i = 0; i < rb->pg_vec_len; i++) {
459462306a36Sopenharmony_ci			struct page *page;
459562306a36Sopenharmony_ci			void *kaddr = rb->pg_vec[i].buffer;
459662306a36Sopenharmony_ci			int pg_num;
459762306a36Sopenharmony_ci
459862306a36Sopenharmony_ci			for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
459962306a36Sopenharmony_ci				page = pgv_to_page(kaddr);
460062306a36Sopenharmony_ci				err = vm_insert_page(vma, start, page);
460162306a36Sopenharmony_ci				if (unlikely(err))
460262306a36Sopenharmony_ci					goto out;
460362306a36Sopenharmony_ci				start += PAGE_SIZE;
460462306a36Sopenharmony_ci				kaddr += PAGE_SIZE;
460562306a36Sopenharmony_ci			}
460662306a36Sopenharmony_ci		}
460762306a36Sopenharmony_ci	}
460862306a36Sopenharmony_ci
460962306a36Sopenharmony_ci	atomic_long_inc(&po->mapped);
461062306a36Sopenharmony_ci	vma->vm_ops = &packet_mmap_ops;
461162306a36Sopenharmony_ci	err = 0;
461262306a36Sopenharmony_ci
461362306a36Sopenharmony_ciout:
461462306a36Sopenharmony_ci	mutex_unlock(&po->pg_vec_lock);
461562306a36Sopenharmony_ci	return err;
461662306a36Sopenharmony_ci}
461762306a36Sopenharmony_ci
461862306a36Sopenharmony_cistatic const struct proto_ops packet_ops_spkt = {
461962306a36Sopenharmony_ci	.family =	PF_PACKET,
462062306a36Sopenharmony_ci	.owner =	THIS_MODULE,
462162306a36Sopenharmony_ci	.release =	packet_release,
462262306a36Sopenharmony_ci	.bind =		packet_bind_spkt,
462362306a36Sopenharmony_ci	.connect =	sock_no_connect,
462462306a36Sopenharmony_ci	.socketpair =	sock_no_socketpair,
462562306a36Sopenharmony_ci	.accept =	sock_no_accept,
462662306a36Sopenharmony_ci	.getname =	packet_getname_spkt,
462762306a36Sopenharmony_ci	.poll =		datagram_poll,
462862306a36Sopenharmony_ci	.ioctl =	packet_ioctl,
462962306a36Sopenharmony_ci	.gettstamp =	sock_gettstamp,
463062306a36Sopenharmony_ci	.listen =	sock_no_listen,
463162306a36Sopenharmony_ci	.shutdown =	sock_no_shutdown,
463262306a36Sopenharmony_ci	.sendmsg =	packet_sendmsg_spkt,
463362306a36Sopenharmony_ci	.recvmsg =	packet_recvmsg,
463462306a36Sopenharmony_ci	.mmap =		sock_no_mmap,
463562306a36Sopenharmony_ci};
463662306a36Sopenharmony_ci
463762306a36Sopenharmony_cistatic const struct proto_ops packet_ops = {
463862306a36Sopenharmony_ci	.family =	PF_PACKET,
463962306a36Sopenharmony_ci	.owner =	THIS_MODULE,
464062306a36Sopenharmony_ci	.release =	packet_release,
464162306a36Sopenharmony_ci	.bind =		packet_bind,
464262306a36Sopenharmony_ci	.connect =	sock_no_connect,
464362306a36Sopenharmony_ci	.socketpair =	sock_no_socketpair,
464462306a36Sopenharmony_ci	.accept =	sock_no_accept,
464562306a36Sopenharmony_ci	.getname =	packet_getname,
464662306a36Sopenharmony_ci	.poll =		packet_poll,
464762306a36Sopenharmony_ci	.ioctl =	packet_ioctl,
464862306a36Sopenharmony_ci	.gettstamp =	sock_gettstamp,
464962306a36Sopenharmony_ci	.listen =	sock_no_listen,
465062306a36Sopenharmony_ci	.shutdown =	sock_no_shutdown,
465162306a36Sopenharmony_ci	.setsockopt =	packet_setsockopt,
465262306a36Sopenharmony_ci	.getsockopt =	packet_getsockopt,
465362306a36Sopenharmony_ci	.sendmsg =	packet_sendmsg,
465462306a36Sopenharmony_ci	.recvmsg =	packet_recvmsg,
465562306a36Sopenharmony_ci	.mmap =		packet_mmap,
465662306a36Sopenharmony_ci};
465762306a36Sopenharmony_ci
465862306a36Sopenharmony_cistatic const struct net_proto_family packet_family_ops = {
465962306a36Sopenharmony_ci	.family =	PF_PACKET,
466062306a36Sopenharmony_ci	.create =	packet_create,
466162306a36Sopenharmony_ci	.owner	=	THIS_MODULE,
466262306a36Sopenharmony_ci};
466362306a36Sopenharmony_ci
466462306a36Sopenharmony_cistatic struct notifier_block packet_netdev_notifier = {
466562306a36Sopenharmony_ci	.notifier_call =	packet_notifier,
466662306a36Sopenharmony_ci};
466762306a36Sopenharmony_ci
466862306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS
466962306a36Sopenharmony_ci
467062306a36Sopenharmony_cistatic void *packet_seq_start(struct seq_file *seq, loff_t *pos)
467162306a36Sopenharmony_ci	__acquires(RCU)
467262306a36Sopenharmony_ci{
467362306a36Sopenharmony_ci	struct net *net = seq_file_net(seq);
467462306a36Sopenharmony_ci
467562306a36Sopenharmony_ci	rcu_read_lock();
467662306a36Sopenharmony_ci	return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
467762306a36Sopenharmony_ci}
467862306a36Sopenharmony_ci
467962306a36Sopenharmony_cistatic void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
468062306a36Sopenharmony_ci{
468162306a36Sopenharmony_ci	struct net *net = seq_file_net(seq);
468262306a36Sopenharmony_ci	return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
468362306a36Sopenharmony_ci}
468462306a36Sopenharmony_ci
468562306a36Sopenharmony_cistatic void packet_seq_stop(struct seq_file *seq, void *v)
468662306a36Sopenharmony_ci	__releases(RCU)
468762306a36Sopenharmony_ci{
468862306a36Sopenharmony_ci	rcu_read_unlock();
468962306a36Sopenharmony_ci}
469062306a36Sopenharmony_ci
469162306a36Sopenharmony_cistatic int packet_seq_show(struct seq_file *seq, void *v)
469262306a36Sopenharmony_ci{
469362306a36Sopenharmony_ci	if (v == SEQ_START_TOKEN)
469462306a36Sopenharmony_ci		seq_printf(seq,
469562306a36Sopenharmony_ci			   "%*sRefCnt Type Proto  Iface R Rmem   User   Inode\n",
469662306a36Sopenharmony_ci			   IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk");
469762306a36Sopenharmony_ci	else {
469862306a36Sopenharmony_ci		struct sock *s = sk_entry(v);
469962306a36Sopenharmony_ci		const struct packet_sock *po = pkt_sk(s);
470062306a36Sopenharmony_ci
470162306a36Sopenharmony_ci		seq_printf(seq,
470262306a36Sopenharmony_ci			   "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
470362306a36Sopenharmony_ci			   s,
470462306a36Sopenharmony_ci			   refcount_read(&s->sk_refcnt),
470562306a36Sopenharmony_ci			   s->sk_type,
470662306a36Sopenharmony_ci			   ntohs(READ_ONCE(po->num)),
470762306a36Sopenharmony_ci			   READ_ONCE(po->ifindex),
470862306a36Sopenharmony_ci			   packet_sock_flag(po, PACKET_SOCK_RUNNING),
470962306a36Sopenharmony_ci			   atomic_read(&s->sk_rmem_alloc),
471062306a36Sopenharmony_ci			   from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
471162306a36Sopenharmony_ci			   sock_i_ino(s));
471262306a36Sopenharmony_ci	}
471362306a36Sopenharmony_ci
471462306a36Sopenharmony_ci	return 0;
471562306a36Sopenharmony_ci}
471662306a36Sopenharmony_ci
471762306a36Sopenharmony_cistatic const struct seq_operations packet_seq_ops = {
471862306a36Sopenharmony_ci	.start	= packet_seq_start,
471962306a36Sopenharmony_ci	.next	= packet_seq_next,
472062306a36Sopenharmony_ci	.stop	= packet_seq_stop,
472162306a36Sopenharmony_ci	.show	= packet_seq_show,
472262306a36Sopenharmony_ci};
472362306a36Sopenharmony_ci#endif
472462306a36Sopenharmony_ci
472562306a36Sopenharmony_cistatic int __net_init packet_net_init(struct net *net)
472662306a36Sopenharmony_ci{
472762306a36Sopenharmony_ci	mutex_init(&net->packet.sklist_lock);
472862306a36Sopenharmony_ci	INIT_HLIST_HEAD(&net->packet.sklist);
472962306a36Sopenharmony_ci
473062306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS
473162306a36Sopenharmony_ci	if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
473262306a36Sopenharmony_ci			sizeof(struct seq_net_private)))
473362306a36Sopenharmony_ci		return -ENOMEM;
473462306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */
473562306a36Sopenharmony_ci
473662306a36Sopenharmony_ci	return 0;
473762306a36Sopenharmony_ci}
473862306a36Sopenharmony_ci
473962306a36Sopenharmony_cistatic void __net_exit packet_net_exit(struct net *net)
474062306a36Sopenharmony_ci{
474162306a36Sopenharmony_ci	remove_proc_entry("packet", net->proc_net);
474262306a36Sopenharmony_ci	WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
474362306a36Sopenharmony_ci}
474462306a36Sopenharmony_ci
474562306a36Sopenharmony_cistatic struct pernet_operations packet_net_ops = {
474662306a36Sopenharmony_ci	.init = packet_net_init,
474762306a36Sopenharmony_ci	.exit = packet_net_exit,
474862306a36Sopenharmony_ci};
474962306a36Sopenharmony_ci
475062306a36Sopenharmony_ci
475162306a36Sopenharmony_cistatic void __exit packet_exit(void)
475262306a36Sopenharmony_ci{
475362306a36Sopenharmony_ci	sock_unregister(PF_PACKET);
475462306a36Sopenharmony_ci	proto_unregister(&packet_proto);
475562306a36Sopenharmony_ci	unregister_netdevice_notifier(&packet_netdev_notifier);
475662306a36Sopenharmony_ci	unregister_pernet_subsys(&packet_net_ops);
475762306a36Sopenharmony_ci}
475862306a36Sopenharmony_ci
475962306a36Sopenharmony_cistatic int __init packet_init(void)
476062306a36Sopenharmony_ci{
476162306a36Sopenharmony_ci	int rc;
476262306a36Sopenharmony_ci
476362306a36Sopenharmony_ci	rc = register_pernet_subsys(&packet_net_ops);
476462306a36Sopenharmony_ci	if (rc)
476562306a36Sopenharmony_ci		goto out;
476662306a36Sopenharmony_ci	rc = register_netdevice_notifier(&packet_netdev_notifier);
476762306a36Sopenharmony_ci	if (rc)
476862306a36Sopenharmony_ci		goto out_pernet;
476962306a36Sopenharmony_ci	rc = proto_register(&packet_proto, 0);
477062306a36Sopenharmony_ci	if (rc)
477162306a36Sopenharmony_ci		goto out_notifier;
477262306a36Sopenharmony_ci	rc = sock_register(&packet_family_ops);
477362306a36Sopenharmony_ci	if (rc)
477462306a36Sopenharmony_ci		goto out_proto;
477562306a36Sopenharmony_ci
477662306a36Sopenharmony_ci	return 0;
477762306a36Sopenharmony_ci
477862306a36Sopenharmony_ciout_proto:
477962306a36Sopenharmony_ci	proto_unregister(&packet_proto);
478062306a36Sopenharmony_ciout_notifier:
478162306a36Sopenharmony_ci	unregister_netdevice_notifier(&packet_netdev_notifier);
478262306a36Sopenharmony_ciout_pernet:
478362306a36Sopenharmony_ci	unregister_pernet_subsys(&packet_net_ops);
478462306a36Sopenharmony_ciout:
478562306a36Sopenharmony_ci	return rc;
478662306a36Sopenharmony_ci}
478762306a36Sopenharmony_ci
478862306a36Sopenharmony_cimodule_init(packet_init);
478962306a36Sopenharmony_cimodule_exit(packet_exit);
479062306a36Sopenharmony_ciMODULE_LICENSE("GPL");
479162306a36Sopenharmony_ciMODULE_ALIAS_NETPROTO(PF_PACKET);
4792