18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later
28c2ecf20Sopenharmony_ci/*
38c2ecf20Sopenharmony_ci * INET		An implementation of the TCP/IP protocol suite for the LINUX
48c2ecf20Sopenharmony_ci *		operating system.  INET is implemented using the  BSD Socket
58c2ecf20Sopenharmony_ci *		interface as the means of communication with the user level.
68c2ecf20Sopenharmony_ci *
78c2ecf20Sopenharmony_ci *		PACKET - implements raw packet sockets.
88c2ecf20Sopenharmony_ci *
98c2ecf20Sopenharmony_ci * Authors:	Ross Biro
108c2ecf20Sopenharmony_ci *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
118c2ecf20Sopenharmony_ci *		Alan Cox, <gw4pts@gw4pts.ampr.org>
128c2ecf20Sopenharmony_ci *
138c2ecf20Sopenharmony_ci * Fixes:
148c2ecf20Sopenharmony_ci *		Alan Cox	:	verify_area() now used correctly
158c2ecf20Sopenharmony_ci *		Alan Cox	:	new skbuff lists, look ma no backlogs!
168c2ecf20Sopenharmony_ci *		Alan Cox	:	tidied skbuff lists.
178c2ecf20Sopenharmony_ci *		Alan Cox	:	Now uses generic datagram routines I
188c2ecf20Sopenharmony_ci *					added. Also fixed the peek/read crash
198c2ecf20Sopenharmony_ci *					from all old Linux datagram code.
208c2ecf20Sopenharmony_ci *		Alan Cox	:	Uses the improved datagram code.
218c2ecf20Sopenharmony_ci *		Alan Cox	:	Added NULL's for socket options.
228c2ecf20Sopenharmony_ci *		Alan Cox	:	Re-commented the code.
238c2ecf20Sopenharmony_ci *		Alan Cox	:	Use new kernel side addressing
248c2ecf20Sopenharmony_ci *		Rob Janssen	:	Correct MTU usage.
258c2ecf20Sopenharmony_ci *		Dave Platt	:	Counter leaks caused by incorrect
268c2ecf20Sopenharmony_ci *					interrupt locking and some slightly
278c2ecf20Sopenharmony_ci *					dubious gcc output. Can you read
288c2ecf20Sopenharmony_ci *					compiler: it said _VOLATILE_
298c2ecf20Sopenharmony_ci *	Richard Kooijman	:	Timestamp fixes.
308c2ecf20Sopenharmony_ci *		Alan Cox	:	New buffers. Use sk->mac.raw.
318c2ecf20Sopenharmony_ci *		Alan Cox	:	sendmsg/recvmsg support.
328c2ecf20Sopenharmony_ci *		Alan Cox	:	Protocol setting support
338c2ecf20Sopenharmony_ci *	Alexey Kuznetsov	:	Untied from IPv4 stack.
348c2ecf20Sopenharmony_ci *	Cyrus Durgin		:	Fixed kerneld for kmod.
358c2ecf20Sopenharmony_ci *	Michal Ostrowski        :       Module initialization cleanup.
368c2ecf20Sopenharmony_ci *         Ulises Alonso        :       Frame number limit removal and
378c2ecf20Sopenharmony_ci *                                      packet_set_ring memory leak.
388c2ecf20Sopenharmony_ci *		Eric Biederman	:	Allow for > 8 byte hardware addresses.
398c2ecf20Sopenharmony_ci *					The convention is that longer addresses
408c2ecf20Sopenharmony_ci *					will simply extend the hardware address
418c2ecf20Sopenharmony_ci *					byte arrays at the end of sockaddr_ll
428c2ecf20Sopenharmony_ci *					and packet_mreq.
438c2ecf20Sopenharmony_ci *		Johann Baudy	:	Added TX RING.
448c2ecf20Sopenharmony_ci *		Chetan Loke	:	Implemented TPACKET_V3 block abstraction
458c2ecf20Sopenharmony_ci *					layer.
468c2ecf20Sopenharmony_ci *					Copyright (C) 2011, <lokec@ccs.neu.edu>
478c2ecf20Sopenharmony_ci */
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_ci#include <linux/types.h>
508c2ecf20Sopenharmony_ci#include <linux/mm.h>
518c2ecf20Sopenharmony_ci#include <linux/capability.h>
528c2ecf20Sopenharmony_ci#include <linux/fcntl.h>
538c2ecf20Sopenharmony_ci#include <linux/socket.h>
548c2ecf20Sopenharmony_ci#include <linux/in.h>
558c2ecf20Sopenharmony_ci#include <linux/inet.h>
568c2ecf20Sopenharmony_ci#include <linux/netdevice.h>
578c2ecf20Sopenharmony_ci#include <linux/if_packet.h>
588c2ecf20Sopenharmony_ci#include <linux/wireless.h>
598c2ecf20Sopenharmony_ci#include <linux/kernel.h>
608c2ecf20Sopenharmony_ci#include <linux/kmod.h>
618c2ecf20Sopenharmony_ci#include <linux/slab.h>
628c2ecf20Sopenharmony_ci#include <linux/vmalloc.h>
638c2ecf20Sopenharmony_ci#include <net/net_namespace.h>
648c2ecf20Sopenharmony_ci#include <net/ip.h>
658c2ecf20Sopenharmony_ci#include <net/protocol.h>
668c2ecf20Sopenharmony_ci#include <linux/skbuff.h>
678c2ecf20Sopenharmony_ci#include <net/sock.h>
688c2ecf20Sopenharmony_ci#include <linux/errno.h>
698c2ecf20Sopenharmony_ci#include <linux/timer.h>
708c2ecf20Sopenharmony_ci#include <linux/uaccess.h>
718c2ecf20Sopenharmony_ci#include <asm/ioctls.h>
728c2ecf20Sopenharmony_ci#include <asm/page.h>
738c2ecf20Sopenharmony_ci#include <asm/cacheflush.h>
748c2ecf20Sopenharmony_ci#include <asm/io.h>
758c2ecf20Sopenharmony_ci#include <linux/proc_fs.h>
768c2ecf20Sopenharmony_ci#include <linux/seq_file.h>
778c2ecf20Sopenharmony_ci#include <linux/poll.h>
788c2ecf20Sopenharmony_ci#include <linux/module.h>
798c2ecf20Sopenharmony_ci#include <linux/init.h>
808c2ecf20Sopenharmony_ci#include <linux/mutex.h>
818c2ecf20Sopenharmony_ci#include <linux/if_vlan.h>
828c2ecf20Sopenharmony_ci#include <linux/virtio_net.h>
838c2ecf20Sopenharmony_ci#include <linux/errqueue.h>
848c2ecf20Sopenharmony_ci#include <linux/net_tstamp.h>
858c2ecf20Sopenharmony_ci#include <linux/percpu.h>
868c2ecf20Sopenharmony_ci#ifdef CONFIG_INET
878c2ecf20Sopenharmony_ci#include <net/inet_common.h>
888c2ecf20Sopenharmony_ci#endif
898c2ecf20Sopenharmony_ci#include <linux/bpf.h>
908c2ecf20Sopenharmony_ci#include <net/compat.h>
918c2ecf20Sopenharmony_ci
928c2ecf20Sopenharmony_ci#include "internal.h"
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_ci/*
958c2ecf20Sopenharmony_ci   Assumptions:
968c2ecf20Sopenharmony_ci   - If the device has no dev->header_ops->create, there is no LL header
978c2ecf20Sopenharmony_ci     visible above the device. In this case, its hard_header_len should be 0.
988c2ecf20Sopenharmony_ci     The device may prepend its own header internally. In this case, its
998c2ecf20Sopenharmony_ci     needed_headroom should be set to the space needed for it to add its
1008c2ecf20Sopenharmony_ci     internal header.
1018c2ecf20Sopenharmony_ci     For example, a WiFi driver pretending to be an Ethernet driver should
1028c2ecf20Sopenharmony_ci     set its hard_header_len to be the Ethernet header length, and set its
1038c2ecf20Sopenharmony_ci     needed_headroom to be (the real WiFi header length - the fake Ethernet
1048c2ecf20Sopenharmony_ci     header length).
1058c2ecf20Sopenharmony_ci   - packet socket receives packets with pulled ll header,
1068c2ecf20Sopenharmony_ci     so that SOCK_RAW should push it back.
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ciOn receive:
1098c2ecf20Sopenharmony_ci-----------
1108c2ecf20Sopenharmony_ci
1118c2ecf20Sopenharmony_ciIncoming, dev_has_header(dev) == true
1128c2ecf20Sopenharmony_ci   mac_header -> ll header
1138c2ecf20Sopenharmony_ci   data       -> data
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ciOutgoing, dev_has_header(dev) == true
1168c2ecf20Sopenharmony_ci   mac_header -> ll header
1178c2ecf20Sopenharmony_ci   data       -> ll header
1188c2ecf20Sopenharmony_ci
1198c2ecf20Sopenharmony_ciIncoming, dev_has_header(dev) == false
1208c2ecf20Sopenharmony_ci   mac_header -> data
1218c2ecf20Sopenharmony_ci     However drivers often make it point to the ll header.
1228c2ecf20Sopenharmony_ci     This is incorrect because the ll header should be invisible to us.
1238c2ecf20Sopenharmony_ci   data       -> data
1248c2ecf20Sopenharmony_ci
1258c2ecf20Sopenharmony_ciOutgoing, dev_has_header(dev) == false
1268c2ecf20Sopenharmony_ci   mac_header -> data. ll header is invisible to us.
1278c2ecf20Sopenharmony_ci   data       -> data
1288c2ecf20Sopenharmony_ci
1298c2ecf20Sopenharmony_ciResume
1308c2ecf20Sopenharmony_ci  If dev_has_header(dev) == false we are unable to restore the ll header,
1318c2ecf20Sopenharmony_ci    because it is invisible to us.
1328c2ecf20Sopenharmony_ci
1338c2ecf20Sopenharmony_ci
1348c2ecf20Sopenharmony_ciOn transmit:
1358c2ecf20Sopenharmony_ci------------
1368c2ecf20Sopenharmony_ci
1378c2ecf20Sopenharmony_cidev->header_ops != NULL
1388c2ecf20Sopenharmony_ci   mac_header -> ll header
1398c2ecf20Sopenharmony_ci   data       -> ll header
1408c2ecf20Sopenharmony_ci
1418c2ecf20Sopenharmony_cidev->header_ops == NULL (ll header is invisible to us)
1428c2ecf20Sopenharmony_ci   mac_header -> data
1438c2ecf20Sopenharmony_ci   data       -> data
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci   We should set network_header on output to the correct position,
1468c2ecf20Sopenharmony_ci   packet classifier depends on it.
1478c2ecf20Sopenharmony_ci */
1488c2ecf20Sopenharmony_ci
1498c2ecf20Sopenharmony_ci/* Private packet socket structures. */
1508c2ecf20Sopenharmony_ci
1518c2ecf20Sopenharmony_ci/* identical to struct packet_mreq except it has
1528c2ecf20Sopenharmony_ci * a longer address field.
1538c2ecf20Sopenharmony_ci */
1548c2ecf20Sopenharmony_cistruct packet_mreq_max {
1558c2ecf20Sopenharmony_ci	int		mr_ifindex;
1568c2ecf20Sopenharmony_ci	unsigned short	mr_type;
1578c2ecf20Sopenharmony_ci	unsigned short	mr_alen;
1588c2ecf20Sopenharmony_ci	unsigned char	mr_address[MAX_ADDR_LEN];
1598c2ecf20Sopenharmony_ci};
1608c2ecf20Sopenharmony_ci
1618c2ecf20Sopenharmony_ciunion tpacket_uhdr {
1628c2ecf20Sopenharmony_ci	struct tpacket_hdr  *h1;
1638c2ecf20Sopenharmony_ci	struct tpacket2_hdr *h2;
1648c2ecf20Sopenharmony_ci	struct tpacket3_hdr *h3;
1658c2ecf20Sopenharmony_ci	void *raw;
1668c2ecf20Sopenharmony_ci};
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_cistatic int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
1698c2ecf20Sopenharmony_ci		int closing, int tx_ring);
1708c2ecf20Sopenharmony_ci
1718c2ecf20Sopenharmony_ci#define V3_ALIGNMENT	(8)
1728c2ecf20Sopenharmony_ci
1738c2ecf20Sopenharmony_ci#define BLK_HDR_LEN	(ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT))
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_ci#define BLK_PLUS_PRIV(sz_of_priv) \
1768c2ecf20Sopenharmony_ci	(BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT))
1778c2ecf20Sopenharmony_ci
1788c2ecf20Sopenharmony_ci#define BLOCK_STATUS(x)	((x)->hdr.bh1.block_status)
1798c2ecf20Sopenharmony_ci#define BLOCK_NUM_PKTS(x)	((x)->hdr.bh1.num_pkts)
1808c2ecf20Sopenharmony_ci#define BLOCK_O2FP(x)		((x)->hdr.bh1.offset_to_first_pkt)
1818c2ecf20Sopenharmony_ci#define BLOCK_LEN(x)		((x)->hdr.bh1.blk_len)
1828c2ecf20Sopenharmony_ci#define BLOCK_SNUM(x)		((x)->hdr.bh1.seq_num)
1838c2ecf20Sopenharmony_ci#define BLOCK_O2PRIV(x)	((x)->offset_to_priv)
1848c2ecf20Sopenharmony_ci
1858c2ecf20Sopenharmony_cistruct packet_sock;
1868c2ecf20Sopenharmony_cistatic int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
1878c2ecf20Sopenharmony_ci		       struct packet_type *pt, struct net_device *orig_dev);
1888c2ecf20Sopenharmony_ci
1898c2ecf20Sopenharmony_cistatic void *packet_previous_frame(struct packet_sock *po,
1908c2ecf20Sopenharmony_ci		struct packet_ring_buffer *rb,
1918c2ecf20Sopenharmony_ci		int status);
1928c2ecf20Sopenharmony_cistatic void packet_increment_head(struct packet_ring_buffer *buff);
1938c2ecf20Sopenharmony_cistatic int prb_curr_blk_in_use(struct tpacket_block_desc *);
1948c2ecf20Sopenharmony_cistatic void *prb_dispatch_next_block(struct tpacket_kbdq_core *,
1958c2ecf20Sopenharmony_ci			struct packet_sock *);
1968c2ecf20Sopenharmony_cistatic void prb_retire_current_block(struct tpacket_kbdq_core *,
1978c2ecf20Sopenharmony_ci		struct packet_sock *, unsigned int status);
1988c2ecf20Sopenharmony_cistatic int prb_queue_frozen(struct tpacket_kbdq_core *);
1998c2ecf20Sopenharmony_cistatic void prb_open_block(struct tpacket_kbdq_core *,
2008c2ecf20Sopenharmony_ci		struct tpacket_block_desc *);
2018c2ecf20Sopenharmony_cistatic void prb_retire_rx_blk_timer_expired(struct timer_list *);
2028c2ecf20Sopenharmony_cistatic void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *);
2038c2ecf20Sopenharmony_cistatic void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *);
2048c2ecf20Sopenharmony_cistatic void prb_clear_rxhash(struct tpacket_kbdq_core *,
2058c2ecf20Sopenharmony_ci		struct tpacket3_hdr *);
2068c2ecf20Sopenharmony_cistatic void prb_fill_vlan_info(struct tpacket_kbdq_core *,
2078c2ecf20Sopenharmony_ci		struct tpacket3_hdr *);
2088c2ecf20Sopenharmony_cistatic void packet_flush_mclist(struct sock *sk);
2098c2ecf20Sopenharmony_cistatic u16 packet_pick_tx_queue(struct sk_buff *skb);
2108c2ecf20Sopenharmony_ci
2118c2ecf20Sopenharmony_cistruct packet_skb_cb {
2128c2ecf20Sopenharmony_ci	union {
2138c2ecf20Sopenharmony_ci		struct sockaddr_pkt pkt;
2148c2ecf20Sopenharmony_ci		union {
2158c2ecf20Sopenharmony_ci			/* Trick: alias skb original length with
2168c2ecf20Sopenharmony_ci			 * ll.sll_family and ll.protocol in order
2178c2ecf20Sopenharmony_ci			 * to save room.
2188c2ecf20Sopenharmony_ci			 */
2198c2ecf20Sopenharmony_ci			unsigned int origlen;
2208c2ecf20Sopenharmony_ci			struct sockaddr_ll ll;
2218c2ecf20Sopenharmony_ci		};
2228c2ecf20Sopenharmony_ci	} sa;
2238c2ecf20Sopenharmony_ci};
2248c2ecf20Sopenharmony_ci
2258c2ecf20Sopenharmony_ci#define vio_le() virtio_legacy_is_little_endian()
2268c2ecf20Sopenharmony_ci
2278c2ecf20Sopenharmony_ci#define PACKET_SKB_CB(__skb)	((struct packet_skb_cb *)((__skb)->cb))
2288c2ecf20Sopenharmony_ci
2298c2ecf20Sopenharmony_ci#define GET_PBDQC_FROM_RB(x)	((struct tpacket_kbdq_core *)(&(x)->prb_bdqc))
2308c2ecf20Sopenharmony_ci#define GET_PBLOCK_DESC(x, bid)	\
2318c2ecf20Sopenharmony_ci	((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer))
2328c2ecf20Sopenharmony_ci#define GET_CURR_PBLOCK_DESC_FROM_CORE(x)	\
2338c2ecf20Sopenharmony_ci	((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer))
2348c2ecf20Sopenharmony_ci#define GET_NEXT_PRB_BLK_NUM(x) \
2358c2ecf20Sopenharmony_ci	(((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \
2368c2ecf20Sopenharmony_ci	((x)->kactive_blk_num+1) : 0)
2378c2ecf20Sopenharmony_ci
2388c2ecf20Sopenharmony_cistatic void __fanout_unlink(struct sock *sk, struct packet_sock *po);
2398c2ecf20Sopenharmony_cistatic void __fanout_link(struct sock *sk, struct packet_sock *po);
2408c2ecf20Sopenharmony_ci
2418c2ecf20Sopenharmony_cistatic int packet_direct_xmit(struct sk_buff *skb)
2428c2ecf20Sopenharmony_ci{
2438c2ecf20Sopenharmony_ci	return dev_direct_xmit(skb, packet_pick_tx_queue(skb));
2448c2ecf20Sopenharmony_ci}
2458c2ecf20Sopenharmony_ci
2468c2ecf20Sopenharmony_cistatic struct net_device *packet_cached_dev_get(struct packet_sock *po)
2478c2ecf20Sopenharmony_ci{
2488c2ecf20Sopenharmony_ci	struct net_device *dev;
2498c2ecf20Sopenharmony_ci
2508c2ecf20Sopenharmony_ci	rcu_read_lock();
2518c2ecf20Sopenharmony_ci	dev = rcu_dereference(po->cached_dev);
2528c2ecf20Sopenharmony_ci	if (likely(dev))
2538c2ecf20Sopenharmony_ci		dev_hold(dev);
2548c2ecf20Sopenharmony_ci	rcu_read_unlock();
2558c2ecf20Sopenharmony_ci
2568c2ecf20Sopenharmony_ci	return dev;
2578c2ecf20Sopenharmony_ci}
2588c2ecf20Sopenharmony_ci
2598c2ecf20Sopenharmony_cistatic void packet_cached_dev_assign(struct packet_sock *po,
2608c2ecf20Sopenharmony_ci				     struct net_device *dev)
2618c2ecf20Sopenharmony_ci{
2628c2ecf20Sopenharmony_ci	rcu_assign_pointer(po->cached_dev, dev);
2638c2ecf20Sopenharmony_ci}
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_cistatic void packet_cached_dev_reset(struct packet_sock *po)
2668c2ecf20Sopenharmony_ci{
2678c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(po->cached_dev, NULL);
2688c2ecf20Sopenharmony_ci}
2698c2ecf20Sopenharmony_ci
2708c2ecf20Sopenharmony_cistatic bool packet_use_direct_xmit(const struct packet_sock *po)
2718c2ecf20Sopenharmony_ci{
2728c2ecf20Sopenharmony_ci	/* Paired with WRITE_ONCE() in packet_setsockopt() */
2738c2ecf20Sopenharmony_ci	return READ_ONCE(po->xmit) == packet_direct_xmit;
2748c2ecf20Sopenharmony_ci}
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_cistatic u16 packet_pick_tx_queue(struct sk_buff *skb)
2778c2ecf20Sopenharmony_ci{
2788c2ecf20Sopenharmony_ci	struct net_device *dev = skb->dev;
2798c2ecf20Sopenharmony_ci	const struct net_device_ops *ops = dev->netdev_ops;
2808c2ecf20Sopenharmony_ci	int cpu = raw_smp_processor_id();
2818c2ecf20Sopenharmony_ci	u16 queue_index;
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci#ifdef CONFIG_XPS
2848c2ecf20Sopenharmony_ci	skb->sender_cpu = cpu + 1;
2858c2ecf20Sopenharmony_ci#endif
2868c2ecf20Sopenharmony_ci	skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues);
2878c2ecf20Sopenharmony_ci	if (ops->ndo_select_queue) {
2888c2ecf20Sopenharmony_ci		queue_index = ops->ndo_select_queue(dev, skb, NULL);
2898c2ecf20Sopenharmony_ci		queue_index = netdev_cap_txqueue(dev, queue_index);
2908c2ecf20Sopenharmony_ci	} else {
2918c2ecf20Sopenharmony_ci		queue_index = netdev_pick_tx(dev, skb, NULL);
2928c2ecf20Sopenharmony_ci	}
2938c2ecf20Sopenharmony_ci
2948c2ecf20Sopenharmony_ci	return queue_index;
2958c2ecf20Sopenharmony_ci}
2968c2ecf20Sopenharmony_ci
2978c2ecf20Sopenharmony_ci/* __register_prot_hook must be invoked through register_prot_hook
2988c2ecf20Sopenharmony_ci * or from a context in which asynchronous accesses to the packet
2998c2ecf20Sopenharmony_ci * socket is not possible (packet_create()).
3008c2ecf20Sopenharmony_ci */
3018c2ecf20Sopenharmony_cistatic void __register_prot_hook(struct sock *sk)
3028c2ecf20Sopenharmony_ci{
3038c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
3048c2ecf20Sopenharmony_ci
3058c2ecf20Sopenharmony_ci	if (!po->running) {
3068c2ecf20Sopenharmony_ci		if (po->fanout)
3078c2ecf20Sopenharmony_ci			__fanout_link(sk, po);
3088c2ecf20Sopenharmony_ci		else
3098c2ecf20Sopenharmony_ci			dev_add_pack(&po->prot_hook);
3108c2ecf20Sopenharmony_ci
3118c2ecf20Sopenharmony_ci		sock_hold(sk);
3128c2ecf20Sopenharmony_ci		po->running = 1;
3138c2ecf20Sopenharmony_ci	}
3148c2ecf20Sopenharmony_ci}
3158c2ecf20Sopenharmony_ci
3168c2ecf20Sopenharmony_cistatic void register_prot_hook(struct sock *sk)
3178c2ecf20Sopenharmony_ci{
3188c2ecf20Sopenharmony_ci	lockdep_assert_held_once(&pkt_sk(sk)->bind_lock);
3198c2ecf20Sopenharmony_ci	__register_prot_hook(sk);
3208c2ecf20Sopenharmony_ci}
3218c2ecf20Sopenharmony_ci
3228c2ecf20Sopenharmony_ci/* If the sync parameter is true, we will temporarily drop
3238c2ecf20Sopenharmony_ci * the po->bind_lock and do a synchronize_net to make sure no
3248c2ecf20Sopenharmony_ci * asynchronous packet processing paths still refer to the elements
3258c2ecf20Sopenharmony_ci * of po->prot_hook.  If the sync parameter is false, it is the
3268c2ecf20Sopenharmony_ci * callers responsibility to take care of this.
3278c2ecf20Sopenharmony_ci */
3288c2ecf20Sopenharmony_cistatic void __unregister_prot_hook(struct sock *sk, bool sync)
3298c2ecf20Sopenharmony_ci{
3308c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
3318c2ecf20Sopenharmony_ci
3328c2ecf20Sopenharmony_ci	lockdep_assert_held_once(&po->bind_lock);
3338c2ecf20Sopenharmony_ci
3348c2ecf20Sopenharmony_ci	po->running = 0;
3358c2ecf20Sopenharmony_ci
3368c2ecf20Sopenharmony_ci	if (po->fanout)
3378c2ecf20Sopenharmony_ci		__fanout_unlink(sk, po);
3388c2ecf20Sopenharmony_ci	else
3398c2ecf20Sopenharmony_ci		__dev_remove_pack(&po->prot_hook);
3408c2ecf20Sopenharmony_ci
3418c2ecf20Sopenharmony_ci	__sock_put(sk);
3428c2ecf20Sopenharmony_ci
3438c2ecf20Sopenharmony_ci	if (sync) {
3448c2ecf20Sopenharmony_ci		spin_unlock(&po->bind_lock);
3458c2ecf20Sopenharmony_ci		synchronize_net();
3468c2ecf20Sopenharmony_ci		spin_lock(&po->bind_lock);
3478c2ecf20Sopenharmony_ci	}
3488c2ecf20Sopenharmony_ci}
3498c2ecf20Sopenharmony_ci
3508c2ecf20Sopenharmony_cistatic void unregister_prot_hook(struct sock *sk, bool sync)
3518c2ecf20Sopenharmony_ci{
3528c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	if (po->running)
3558c2ecf20Sopenharmony_ci		__unregister_prot_hook(sk, sync);
3568c2ecf20Sopenharmony_ci}
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_cistatic inline struct page * __pure pgv_to_page(void *addr)
3598c2ecf20Sopenharmony_ci{
3608c2ecf20Sopenharmony_ci	if (is_vmalloc_addr(addr))
3618c2ecf20Sopenharmony_ci		return vmalloc_to_page(addr);
3628c2ecf20Sopenharmony_ci	return virt_to_page(addr);
3638c2ecf20Sopenharmony_ci}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_cistatic void __packet_set_status(struct packet_sock *po, void *frame, int status)
3668c2ecf20Sopenharmony_ci{
3678c2ecf20Sopenharmony_ci	union tpacket_uhdr h;
3688c2ecf20Sopenharmony_ci
3698c2ecf20Sopenharmony_ci	/* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */
3708c2ecf20Sopenharmony_ci
3718c2ecf20Sopenharmony_ci	h.raw = frame;
3728c2ecf20Sopenharmony_ci	switch (po->tp_version) {
3738c2ecf20Sopenharmony_ci	case TPACKET_V1:
3748c2ecf20Sopenharmony_ci		WRITE_ONCE(h.h1->tp_status, status);
3758c2ecf20Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
3768c2ecf20Sopenharmony_ci		break;
3778c2ecf20Sopenharmony_ci	case TPACKET_V2:
3788c2ecf20Sopenharmony_ci		WRITE_ONCE(h.h2->tp_status, status);
3798c2ecf20Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
3808c2ecf20Sopenharmony_ci		break;
3818c2ecf20Sopenharmony_ci	case TPACKET_V3:
3828c2ecf20Sopenharmony_ci		WRITE_ONCE(h.h3->tp_status, status);
3838c2ecf20Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h3->tp_status));
3848c2ecf20Sopenharmony_ci		break;
3858c2ecf20Sopenharmony_ci	default:
3868c2ecf20Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
3878c2ecf20Sopenharmony_ci		BUG();
3888c2ecf20Sopenharmony_ci	}
3898c2ecf20Sopenharmony_ci
3908c2ecf20Sopenharmony_ci	smp_wmb();
3918c2ecf20Sopenharmony_ci}
3928c2ecf20Sopenharmony_ci
3938c2ecf20Sopenharmony_cistatic int __packet_get_status(const struct packet_sock *po, void *frame)
3948c2ecf20Sopenharmony_ci{
3958c2ecf20Sopenharmony_ci	union tpacket_uhdr h;
3968c2ecf20Sopenharmony_ci
3978c2ecf20Sopenharmony_ci	smp_rmb();
3988c2ecf20Sopenharmony_ci
3998c2ecf20Sopenharmony_ci	/* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */
4008c2ecf20Sopenharmony_ci
4018c2ecf20Sopenharmony_ci	h.raw = frame;
4028c2ecf20Sopenharmony_ci	switch (po->tp_version) {
4038c2ecf20Sopenharmony_ci	case TPACKET_V1:
4048c2ecf20Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h1->tp_status));
4058c2ecf20Sopenharmony_ci		return READ_ONCE(h.h1->tp_status);
4068c2ecf20Sopenharmony_ci	case TPACKET_V2:
4078c2ecf20Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h2->tp_status));
4088c2ecf20Sopenharmony_ci		return READ_ONCE(h.h2->tp_status);
4098c2ecf20Sopenharmony_ci	case TPACKET_V3:
4108c2ecf20Sopenharmony_ci		flush_dcache_page(pgv_to_page(&h.h3->tp_status));
4118c2ecf20Sopenharmony_ci		return READ_ONCE(h.h3->tp_status);
4128c2ecf20Sopenharmony_ci	default:
4138c2ecf20Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
4148c2ecf20Sopenharmony_ci		BUG();
4158c2ecf20Sopenharmony_ci		return 0;
4168c2ecf20Sopenharmony_ci	}
4178c2ecf20Sopenharmony_ci}
4188c2ecf20Sopenharmony_ci
4198c2ecf20Sopenharmony_cistatic __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts,
4208c2ecf20Sopenharmony_ci				   unsigned int flags)
4218c2ecf20Sopenharmony_ci{
4228c2ecf20Sopenharmony_ci	struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
4238c2ecf20Sopenharmony_ci
4248c2ecf20Sopenharmony_ci	if (shhwtstamps &&
4258c2ecf20Sopenharmony_ci	    (flags & SOF_TIMESTAMPING_RAW_HARDWARE) &&
4268c2ecf20Sopenharmony_ci	    ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts))
4278c2ecf20Sopenharmony_ci		return TP_STATUS_TS_RAW_HARDWARE;
4288c2ecf20Sopenharmony_ci
4298c2ecf20Sopenharmony_ci	if ((flags & SOF_TIMESTAMPING_SOFTWARE) &&
4308c2ecf20Sopenharmony_ci	    ktime_to_timespec64_cond(skb->tstamp, ts))
4318c2ecf20Sopenharmony_ci		return TP_STATUS_TS_SOFTWARE;
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	return 0;
4348c2ecf20Sopenharmony_ci}
4358c2ecf20Sopenharmony_ci
4368c2ecf20Sopenharmony_cistatic __u32 __packet_set_timestamp(struct packet_sock *po, void *frame,
4378c2ecf20Sopenharmony_ci				    struct sk_buff *skb)
4388c2ecf20Sopenharmony_ci{
4398c2ecf20Sopenharmony_ci	union tpacket_uhdr h;
4408c2ecf20Sopenharmony_ci	struct timespec64 ts;
4418c2ecf20Sopenharmony_ci	__u32 ts_status;
4428c2ecf20Sopenharmony_ci
4438c2ecf20Sopenharmony_ci	if (!(ts_status = tpacket_get_timestamp(skb, &ts, po->tp_tstamp)))
4448c2ecf20Sopenharmony_ci		return 0;
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_ci	h.raw = frame;
4478c2ecf20Sopenharmony_ci	/*
4488c2ecf20Sopenharmony_ci	 * versions 1 through 3 overflow the timestamps in y2106, since they
4498c2ecf20Sopenharmony_ci	 * all store the seconds in a 32-bit unsigned integer.
4508c2ecf20Sopenharmony_ci	 * If we create a version 4, that should have a 64-bit timestamp,
4518c2ecf20Sopenharmony_ci	 * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit
4528c2ecf20Sopenharmony_ci	 * nanoseconds.
4538c2ecf20Sopenharmony_ci	 */
4548c2ecf20Sopenharmony_ci	switch (po->tp_version) {
4558c2ecf20Sopenharmony_ci	case TPACKET_V1:
4568c2ecf20Sopenharmony_ci		h.h1->tp_sec = ts.tv_sec;
4578c2ecf20Sopenharmony_ci		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
4588c2ecf20Sopenharmony_ci		break;
4598c2ecf20Sopenharmony_ci	case TPACKET_V2:
4608c2ecf20Sopenharmony_ci		h.h2->tp_sec = ts.tv_sec;
4618c2ecf20Sopenharmony_ci		h.h2->tp_nsec = ts.tv_nsec;
4628c2ecf20Sopenharmony_ci		break;
4638c2ecf20Sopenharmony_ci	case TPACKET_V3:
4648c2ecf20Sopenharmony_ci		h.h3->tp_sec = ts.tv_sec;
4658c2ecf20Sopenharmony_ci		h.h3->tp_nsec = ts.tv_nsec;
4668c2ecf20Sopenharmony_ci		break;
4678c2ecf20Sopenharmony_ci	default:
4688c2ecf20Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
4698c2ecf20Sopenharmony_ci		BUG();
4708c2ecf20Sopenharmony_ci	}
4718c2ecf20Sopenharmony_ci
4728c2ecf20Sopenharmony_ci	/* one flush is safe, as both fields always lie on the same cacheline */
4738c2ecf20Sopenharmony_ci	flush_dcache_page(pgv_to_page(&h.h1->tp_sec));
4748c2ecf20Sopenharmony_ci	smp_wmb();
4758c2ecf20Sopenharmony_ci
4768c2ecf20Sopenharmony_ci	return ts_status;
4778c2ecf20Sopenharmony_ci}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_cistatic void *packet_lookup_frame(const struct packet_sock *po,
4808c2ecf20Sopenharmony_ci				 const struct packet_ring_buffer *rb,
4818c2ecf20Sopenharmony_ci				 unsigned int position,
4828c2ecf20Sopenharmony_ci				 int status)
4838c2ecf20Sopenharmony_ci{
4848c2ecf20Sopenharmony_ci	unsigned int pg_vec_pos, frame_offset;
4858c2ecf20Sopenharmony_ci	union tpacket_uhdr h;
4868c2ecf20Sopenharmony_ci
4878c2ecf20Sopenharmony_ci	pg_vec_pos = position / rb->frames_per_block;
4888c2ecf20Sopenharmony_ci	frame_offset = position % rb->frames_per_block;
4898c2ecf20Sopenharmony_ci
4908c2ecf20Sopenharmony_ci	h.raw = rb->pg_vec[pg_vec_pos].buffer +
4918c2ecf20Sopenharmony_ci		(frame_offset * rb->frame_size);
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci	if (status != __packet_get_status(po, h.raw))
4948c2ecf20Sopenharmony_ci		return NULL;
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci	return h.raw;
4978c2ecf20Sopenharmony_ci}
4988c2ecf20Sopenharmony_ci
4998c2ecf20Sopenharmony_cistatic void *packet_current_frame(struct packet_sock *po,
5008c2ecf20Sopenharmony_ci		struct packet_ring_buffer *rb,
5018c2ecf20Sopenharmony_ci		int status)
5028c2ecf20Sopenharmony_ci{
5038c2ecf20Sopenharmony_ci	return packet_lookup_frame(po, rb, rb->head, status);
5048c2ecf20Sopenharmony_ci}
5058c2ecf20Sopenharmony_ci
5068c2ecf20Sopenharmony_cistatic void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc)
5078c2ecf20Sopenharmony_ci{
5088c2ecf20Sopenharmony_ci	del_timer_sync(&pkc->retire_blk_timer);
5098c2ecf20Sopenharmony_ci}
5108c2ecf20Sopenharmony_ci
5118c2ecf20Sopenharmony_cistatic void prb_shutdown_retire_blk_timer(struct packet_sock *po,
5128c2ecf20Sopenharmony_ci		struct sk_buff_head *rb_queue)
5138c2ecf20Sopenharmony_ci{
5148c2ecf20Sopenharmony_ci	struct tpacket_kbdq_core *pkc;
5158c2ecf20Sopenharmony_ci
5168c2ecf20Sopenharmony_ci	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
5178c2ecf20Sopenharmony_ci
5188c2ecf20Sopenharmony_ci	spin_lock_bh(&rb_queue->lock);
5198c2ecf20Sopenharmony_ci	pkc->delete_blk_timer = 1;
5208c2ecf20Sopenharmony_ci	spin_unlock_bh(&rb_queue->lock);
5218c2ecf20Sopenharmony_ci
5228c2ecf20Sopenharmony_ci	prb_del_retire_blk_timer(pkc);
5238c2ecf20Sopenharmony_ci}
5248c2ecf20Sopenharmony_ci
5258c2ecf20Sopenharmony_cistatic void prb_setup_retire_blk_timer(struct packet_sock *po)
5268c2ecf20Sopenharmony_ci{
5278c2ecf20Sopenharmony_ci	struct tpacket_kbdq_core *pkc;
5288c2ecf20Sopenharmony_ci
5298c2ecf20Sopenharmony_ci	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
5308c2ecf20Sopenharmony_ci	timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired,
5318c2ecf20Sopenharmony_ci		    0);
5328c2ecf20Sopenharmony_ci	pkc->retire_blk_timer.expires = jiffies;
5338c2ecf20Sopenharmony_ci}
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_cistatic int prb_calc_retire_blk_tmo(struct packet_sock *po,
5368c2ecf20Sopenharmony_ci				int blk_size_in_bytes)
5378c2ecf20Sopenharmony_ci{
5388c2ecf20Sopenharmony_ci	struct net_device *dev;
5398c2ecf20Sopenharmony_ci	unsigned int mbits, div;
5408c2ecf20Sopenharmony_ci	struct ethtool_link_ksettings ecmd;
5418c2ecf20Sopenharmony_ci	int err;
5428c2ecf20Sopenharmony_ci
5438c2ecf20Sopenharmony_ci	rtnl_lock();
5448c2ecf20Sopenharmony_ci	dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex);
5458c2ecf20Sopenharmony_ci	if (unlikely(!dev)) {
5468c2ecf20Sopenharmony_ci		rtnl_unlock();
5478c2ecf20Sopenharmony_ci		return DEFAULT_PRB_RETIRE_TOV;
5488c2ecf20Sopenharmony_ci	}
5498c2ecf20Sopenharmony_ci	err = __ethtool_get_link_ksettings(dev, &ecmd);
5508c2ecf20Sopenharmony_ci	rtnl_unlock();
5518c2ecf20Sopenharmony_ci	if (err)
5528c2ecf20Sopenharmony_ci		return DEFAULT_PRB_RETIRE_TOV;
5538c2ecf20Sopenharmony_ci
5548c2ecf20Sopenharmony_ci	/* If the link speed is so slow you don't really
5558c2ecf20Sopenharmony_ci	 * need to worry about perf anyways
5568c2ecf20Sopenharmony_ci	 */
5578c2ecf20Sopenharmony_ci	if (ecmd.base.speed < SPEED_1000 ||
5588c2ecf20Sopenharmony_ci	    ecmd.base.speed == SPEED_UNKNOWN)
5598c2ecf20Sopenharmony_ci		return DEFAULT_PRB_RETIRE_TOV;
5608c2ecf20Sopenharmony_ci
5618c2ecf20Sopenharmony_ci	div = ecmd.base.speed / 1000;
5628c2ecf20Sopenharmony_ci	mbits = (blk_size_in_bytes * 8) / (1024 * 1024);
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	if (div)
5658c2ecf20Sopenharmony_ci		mbits /= div;
5668c2ecf20Sopenharmony_ci
5678c2ecf20Sopenharmony_ci	if (div)
5688c2ecf20Sopenharmony_ci		return mbits + 1;
5698c2ecf20Sopenharmony_ci	return mbits;
5708c2ecf20Sopenharmony_ci}
5718c2ecf20Sopenharmony_ci
5728c2ecf20Sopenharmony_cistatic void prb_init_ft_ops(struct tpacket_kbdq_core *p1,
5738c2ecf20Sopenharmony_ci			union tpacket_req_u *req_u)
5748c2ecf20Sopenharmony_ci{
5758c2ecf20Sopenharmony_ci	p1->feature_req_word = req_u->req3.tp_feature_req_word;
5768c2ecf20Sopenharmony_ci}
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_cistatic void init_prb_bdqc(struct packet_sock *po,
5798c2ecf20Sopenharmony_ci			struct packet_ring_buffer *rb,
5808c2ecf20Sopenharmony_ci			struct pgv *pg_vec,
5818c2ecf20Sopenharmony_ci			union tpacket_req_u *req_u)
5828c2ecf20Sopenharmony_ci{
5838c2ecf20Sopenharmony_ci	struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb);
5848c2ecf20Sopenharmony_ci	struct tpacket_block_desc *pbd;
5858c2ecf20Sopenharmony_ci
5868c2ecf20Sopenharmony_ci	memset(p1, 0x0, sizeof(*p1));
5878c2ecf20Sopenharmony_ci
5888c2ecf20Sopenharmony_ci	p1->knxt_seq_num = 1;
5898c2ecf20Sopenharmony_ci	p1->pkbdq = pg_vec;
5908c2ecf20Sopenharmony_ci	pbd = (struct tpacket_block_desc *)pg_vec[0].buffer;
5918c2ecf20Sopenharmony_ci	p1->pkblk_start	= pg_vec[0].buffer;
5928c2ecf20Sopenharmony_ci	p1->kblk_size = req_u->req3.tp_block_size;
5938c2ecf20Sopenharmony_ci	p1->knum_blocks	= req_u->req3.tp_block_nr;
5948c2ecf20Sopenharmony_ci	p1->hdrlen = po->tp_hdrlen;
5958c2ecf20Sopenharmony_ci	p1->version = po->tp_version;
5968c2ecf20Sopenharmony_ci	p1->last_kactive_blk_num = 0;
5978c2ecf20Sopenharmony_ci	po->stats.stats3.tp_freeze_q_cnt = 0;
5988c2ecf20Sopenharmony_ci	if (req_u->req3.tp_retire_blk_tov)
5998c2ecf20Sopenharmony_ci		p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov;
6008c2ecf20Sopenharmony_ci	else
6018c2ecf20Sopenharmony_ci		p1->retire_blk_tov = prb_calc_retire_blk_tmo(po,
6028c2ecf20Sopenharmony_ci						req_u->req3.tp_block_size);
6038c2ecf20Sopenharmony_ci	p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov);
6048c2ecf20Sopenharmony_ci	p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv;
6058c2ecf20Sopenharmony_ci	rwlock_init(&p1->blk_fill_in_prog_lock);
6068c2ecf20Sopenharmony_ci
6078c2ecf20Sopenharmony_ci	p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv);
6088c2ecf20Sopenharmony_ci	prb_init_ft_ops(p1, req_u);
6098c2ecf20Sopenharmony_ci	prb_setup_retire_blk_timer(po);
6108c2ecf20Sopenharmony_ci	prb_open_block(p1, pbd);
6118c2ecf20Sopenharmony_ci}
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ci/*  Do NOT update the last_blk_num first.
6148c2ecf20Sopenharmony_ci *  Assumes sk_buff_head lock is held.
6158c2ecf20Sopenharmony_ci */
6168c2ecf20Sopenharmony_cistatic void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc)
6178c2ecf20Sopenharmony_ci{
6188c2ecf20Sopenharmony_ci	mod_timer(&pkc->retire_blk_timer,
6198c2ecf20Sopenharmony_ci			jiffies + pkc->tov_in_jiffies);
6208c2ecf20Sopenharmony_ci	pkc->last_kactive_blk_num = pkc->kactive_blk_num;
6218c2ecf20Sopenharmony_ci}
6228c2ecf20Sopenharmony_ci
6238c2ecf20Sopenharmony_ci/*
6248c2ecf20Sopenharmony_ci * Timer logic:
6258c2ecf20Sopenharmony_ci * 1) We refresh the timer only when we open a block.
6268c2ecf20Sopenharmony_ci *    By doing this we don't waste cycles refreshing the timer
6278c2ecf20Sopenharmony_ci *	  on packet-by-packet basis.
6288c2ecf20Sopenharmony_ci *
6298c2ecf20Sopenharmony_ci * With a 1MB block-size, on a 1Gbps line, it will take
6308c2ecf20Sopenharmony_ci * i) ~8 ms to fill a block + ii) memcpy etc.
6318c2ecf20Sopenharmony_ci * In this cut we are not accounting for the memcpy time.
6328c2ecf20Sopenharmony_ci *
6338c2ecf20Sopenharmony_ci * So, if the user sets the 'tmo' to 10ms then the timer
6348c2ecf20Sopenharmony_ci * will never fire while the block is still getting filled
6358c2ecf20Sopenharmony_ci * (which is what we want). However, the user could choose
6368c2ecf20Sopenharmony_ci * to close a block early and that's fine.
6378c2ecf20Sopenharmony_ci *
6388c2ecf20Sopenharmony_ci * But when the timer does fire, we check whether or not to refresh it.
6398c2ecf20Sopenharmony_ci * Since the tmo granularity is in msecs, it is not too expensive
6408c2ecf20Sopenharmony_ci * to refresh the timer, lets say every '8' msecs.
6418c2ecf20Sopenharmony_ci * Either the user can set the 'tmo' or we can derive it based on
6428c2ecf20Sopenharmony_ci * a) line-speed and b) block-size.
6438c2ecf20Sopenharmony_ci * prb_calc_retire_blk_tmo() calculates the tmo.
6448c2ecf20Sopenharmony_ci *
6458c2ecf20Sopenharmony_ci */
6468c2ecf20Sopenharmony_cistatic void prb_retire_rx_blk_timer_expired(struct timer_list *t)
6478c2ecf20Sopenharmony_ci{
6488c2ecf20Sopenharmony_ci	struct packet_sock *po =
6498c2ecf20Sopenharmony_ci		from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer);
6508c2ecf20Sopenharmony_ci	struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
6518c2ecf20Sopenharmony_ci	unsigned int frozen;
6528c2ecf20Sopenharmony_ci	struct tpacket_block_desc *pbd;
6538c2ecf20Sopenharmony_ci
6548c2ecf20Sopenharmony_ci	spin_lock(&po->sk.sk_receive_queue.lock);
6558c2ecf20Sopenharmony_ci
6568c2ecf20Sopenharmony_ci	frozen = prb_queue_frozen(pkc);
6578c2ecf20Sopenharmony_ci	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	if (unlikely(pkc->delete_blk_timer))
6608c2ecf20Sopenharmony_ci		goto out;
6618c2ecf20Sopenharmony_ci
6628c2ecf20Sopenharmony_ci	/* We only need to plug the race when the block is partially filled.
6638c2ecf20Sopenharmony_ci	 * tpacket_rcv:
6648c2ecf20Sopenharmony_ci	 *		lock(); increment BLOCK_NUM_PKTS; unlock()
6658c2ecf20Sopenharmony_ci	 *		copy_bits() is in progress ...
6668c2ecf20Sopenharmony_ci	 *		timer fires on other cpu:
6678c2ecf20Sopenharmony_ci	 *		we can't retire the current block because copy_bits
6688c2ecf20Sopenharmony_ci	 *		is in progress.
6698c2ecf20Sopenharmony_ci	 *
6708c2ecf20Sopenharmony_ci	 */
6718c2ecf20Sopenharmony_ci	if (BLOCK_NUM_PKTS(pbd)) {
6728c2ecf20Sopenharmony_ci		/* Waiting for skb_copy_bits to finish... */
6738c2ecf20Sopenharmony_ci		write_lock(&pkc->blk_fill_in_prog_lock);
6748c2ecf20Sopenharmony_ci		write_unlock(&pkc->blk_fill_in_prog_lock);
6758c2ecf20Sopenharmony_ci	}
6768c2ecf20Sopenharmony_ci
6778c2ecf20Sopenharmony_ci	if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) {
6788c2ecf20Sopenharmony_ci		if (!frozen) {
6798c2ecf20Sopenharmony_ci			if (!BLOCK_NUM_PKTS(pbd)) {
6808c2ecf20Sopenharmony_ci				/* An empty block. Just refresh the timer. */
6818c2ecf20Sopenharmony_ci				goto refresh_timer;
6828c2ecf20Sopenharmony_ci			}
6838c2ecf20Sopenharmony_ci			prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO);
6848c2ecf20Sopenharmony_ci			if (!prb_dispatch_next_block(pkc, po))
6858c2ecf20Sopenharmony_ci				goto refresh_timer;
6868c2ecf20Sopenharmony_ci			else
6878c2ecf20Sopenharmony_ci				goto out;
6888c2ecf20Sopenharmony_ci		} else {
6898c2ecf20Sopenharmony_ci			/* Case 1. Queue was frozen because user-space was
6908c2ecf20Sopenharmony_ci			 *	   lagging behind.
6918c2ecf20Sopenharmony_ci			 */
6928c2ecf20Sopenharmony_ci			if (prb_curr_blk_in_use(pbd)) {
6938c2ecf20Sopenharmony_ci				/*
6948c2ecf20Sopenharmony_ci				 * Ok, user-space is still behind.
6958c2ecf20Sopenharmony_ci				 * So just refresh the timer.
6968c2ecf20Sopenharmony_ci				 */
6978c2ecf20Sopenharmony_ci				goto refresh_timer;
6988c2ecf20Sopenharmony_ci			} else {
6998c2ecf20Sopenharmony_ci			       /* Case 2. queue was frozen,user-space caught up,
7008c2ecf20Sopenharmony_ci				* now the link went idle && the timer fired.
7018c2ecf20Sopenharmony_ci				* We don't have a block to close.So we open this
7028c2ecf20Sopenharmony_ci				* block and restart the timer.
7038c2ecf20Sopenharmony_ci				* opening a block thaws the queue,restarts timer
7048c2ecf20Sopenharmony_ci				* Thawing/timer-refresh is a side effect.
7058c2ecf20Sopenharmony_ci				*/
7068c2ecf20Sopenharmony_ci				prb_open_block(pkc, pbd);
7078c2ecf20Sopenharmony_ci				goto out;
7088c2ecf20Sopenharmony_ci			}
7098c2ecf20Sopenharmony_ci		}
7108c2ecf20Sopenharmony_ci	}
7118c2ecf20Sopenharmony_ci
7128c2ecf20Sopenharmony_cirefresh_timer:
7138c2ecf20Sopenharmony_ci	_prb_refresh_rx_retire_blk_timer(pkc);
7148c2ecf20Sopenharmony_ci
7158c2ecf20Sopenharmony_ciout:
7168c2ecf20Sopenharmony_ci	spin_unlock(&po->sk.sk_receive_queue.lock);
7178c2ecf20Sopenharmony_ci}
7188c2ecf20Sopenharmony_ci
7198c2ecf20Sopenharmony_cistatic void prb_flush_block(struct tpacket_kbdq_core *pkc1,
7208c2ecf20Sopenharmony_ci		struct tpacket_block_desc *pbd1, __u32 status)
7218c2ecf20Sopenharmony_ci{
7228c2ecf20Sopenharmony_ci	/* Flush everything minus the block header */
7238c2ecf20Sopenharmony_ci
7248c2ecf20Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
7258c2ecf20Sopenharmony_ci	u8 *start, *end;
7268c2ecf20Sopenharmony_ci
7278c2ecf20Sopenharmony_ci	start = (u8 *)pbd1;
7288c2ecf20Sopenharmony_ci
7298c2ecf20Sopenharmony_ci	/* Skip the block header(we know header WILL fit in 4K) */
7308c2ecf20Sopenharmony_ci	start += PAGE_SIZE;
7318c2ecf20Sopenharmony_ci
7328c2ecf20Sopenharmony_ci	end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end);
7338c2ecf20Sopenharmony_ci	for (; start < end; start += PAGE_SIZE)
7348c2ecf20Sopenharmony_ci		flush_dcache_page(pgv_to_page(start));
7358c2ecf20Sopenharmony_ci
7368c2ecf20Sopenharmony_ci	smp_wmb();
7378c2ecf20Sopenharmony_ci#endif
7388c2ecf20Sopenharmony_ci
7398c2ecf20Sopenharmony_ci	/* Now update the block status. */
7408c2ecf20Sopenharmony_ci
7418c2ecf20Sopenharmony_ci	BLOCK_STATUS(pbd1) = status;
7428c2ecf20Sopenharmony_ci
7438c2ecf20Sopenharmony_ci	/* Flush the block header */
7448c2ecf20Sopenharmony_ci
7458c2ecf20Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
7468c2ecf20Sopenharmony_ci	start = (u8 *)pbd1;
7478c2ecf20Sopenharmony_ci	flush_dcache_page(pgv_to_page(start));
7488c2ecf20Sopenharmony_ci
7498c2ecf20Sopenharmony_ci	smp_wmb();
7508c2ecf20Sopenharmony_ci#endif
7518c2ecf20Sopenharmony_ci}
7528c2ecf20Sopenharmony_ci
7538c2ecf20Sopenharmony_ci/*
7548c2ecf20Sopenharmony_ci * Side effect:
7558c2ecf20Sopenharmony_ci *
7568c2ecf20Sopenharmony_ci * 1) flush the block
7578c2ecf20Sopenharmony_ci * 2) Increment active_blk_num
7588c2ecf20Sopenharmony_ci *
7598c2ecf20Sopenharmony_ci * Note:We DONT refresh the timer on purpose.
7608c2ecf20Sopenharmony_ci *	Because almost always the next block will be opened.
7618c2ecf20Sopenharmony_ci */
7628c2ecf20Sopenharmony_cistatic void prb_close_block(struct tpacket_kbdq_core *pkc1,
7638c2ecf20Sopenharmony_ci		struct tpacket_block_desc *pbd1,
7648c2ecf20Sopenharmony_ci		struct packet_sock *po, unsigned int stat)
7658c2ecf20Sopenharmony_ci{
7668c2ecf20Sopenharmony_ci	__u32 status = TP_STATUS_USER | stat;
7678c2ecf20Sopenharmony_ci
7688c2ecf20Sopenharmony_ci	struct tpacket3_hdr *last_pkt;
7698c2ecf20Sopenharmony_ci	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
7708c2ecf20Sopenharmony_ci	struct sock *sk = &po->sk;
7718c2ecf20Sopenharmony_ci
7728c2ecf20Sopenharmony_ci	if (atomic_read(&po->tp_drops))
7738c2ecf20Sopenharmony_ci		status |= TP_STATUS_LOSING;
7748c2ecf20Sopenharmony_ci
7758c2ecf20Sopenharmony_ci	last_pkt = (struct tpacket3_hdr *)pkc1->prev;
7768c2ecf20Sopenharmony_ci	last_pkt->tp_next_offset = 0;
7778c2ecf20Sopenharmony_ci
7788c2ecf20Sopenharmony_ci	/* Get the ts of the last pkt */
7798c2ecf20Sopenharmony_ci	if (BLOCK_NUM_PKTS(pbd1)) {
7808c2ecf20Sopenharmony_ci		h1->ts_last_pkt.ts_sec = last_pkt->tp_sec;
7818c2ecf20Sopenharmony_ci		h1->ts_last_pkt.ts_nsec	= last_pkt->tp_nsec;
7828c2ecf20Sopenharmony_ci	} else {
7838c2ecf20Sopenharmony_ci		/* Ok, we tmo'd - so get the current time.
7848c2ecf20Sopenharmony_ci		 *
7858c2ecf20Sopenharmony_ci		 * It shouldn't really happen as we don't close empty
7868c2ecf20Sopenharmony_ci		 * blocks. See prb_retire_rx_blk_timer_expired().
7878c2ecf20Sopenharmony_ci		 */
7888c2ecf20Sopenharmony_ci		struct timespec64 ts;
7898c2ecf20Sopenharmony_ci		ktime_get_real_ts64(&ts);
7908c2ecf20Sopenharmony_ci		h1->ts_last_pkt.ts_sec = ts.tv_sec;
7918c2ecf20Sopenharmony_ci		h1->ts_last_pkt.ts_nsec	= ts.tv_nsec;
7928c2ecf20Sopenharmony_ci	}
7938c2ecf20Sopenharmony_ci
7948c2ecf20Sopenharmony_ci	smp_wmb();
7958c2ecf20Sopenharmony_ci
7968c2ecf20Sopenharmony_ci	/* Flush the block */
7978c2ecf20Sopenharmony_ci	prb_flush_block(pkc1, pbd1, status);
7988c2ecf20Sopenharmony_ci
7998c2ecf20Sopenharmony_ci	sk->sk_data_ready(sk);
8008c2ecf20Sopenharmony_ci
8018c2ecf20Sopenharmony_ci	pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1);
8028c2ecf20Sopenharmony_ci}
8038c2ecf20Sopenharmony_ci
8048c2ecf20Sopenharmony_cistatic void prb_thaw_queue(struct tpacket_kbdq_core *pkc)
8058c2ecf20Sopenharmony_ci{
8068c2ecf20Sopenharmony_ci	pkc->reset_pending_on_curr_blk = 0;
8078c2ecf20Sopenharmony_ci}
8088c2ecf20Sopenharmony_ci
8098c2ecf20Sopenharmony_ci/*
8108c2ecf20Sopenharmony_ci * Side effect of opening a block:
8118c2ecf20Sopenharmony_ci *
8128c2ecf20Sopenharmony_ci * 1) prb_queue is thawed.
8138c2ecf20Sopenharmony_ci * 2) retire_blk_timer is refreshed.
8148c2ecf20Sopenharmony_ci *
8158c2ecf20Sopenharmony_ci */
8168c2ecf20Sopenharmony_cistatic void prb_open_block(struct tpacket_kbdq_core *pkc1,
8178c2ecf20Sopenharmony_ci	struct tpacket_block_desc *pbd1)
8188c2ecf20Sopenharmony_ci{
8198c2ecf20Sopenharmony_ci	struct timespec64 ts;
8208c2ecf20Sopenharmony_ci	struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1;
8218c2ecf20Sopenharmony_ci
8228c2ecf20Sopenharmony_ci	smp_rmb();
8238c2ecf20Sopenharmony_ci
8248c2ecf20Sopenharmony_ci	/* We could have just memset this but we will lose the
8258c2ecf20Sopenharmony_ci	 * flexibility of making the priv area sticky
8268c2ecf20Sopenharmony_ci	 */
8278c2ecf20Sopenharmony_ci
8288c2ecf20Sopenharmony_ci	BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++;
8298c2ecf20Sopenharmony_ci	BLOCK_NUM_PKTS(pbd1) = 0;
8308c2ecf20Sopenharmony_ci	BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
8318c2ecf20Sopenharmony_ci
8328c2ecf20Sopenharmony_ci	ktime_get_real_ts64(&ts);
8338c2ecf20Sopenharmony_ci
8348c2ecf20Sopenharmony_ci	h1->ts_first_pkt.ts_sec = ts.tv_sec;
8358c2ecf20Sopenharmony_ci	h1->ts_first_pkt.ts_nsec = ts.tv_nsec;
8368c2ecf20Sopenharmony_ci
8378c2ecf20Sopenharmony_ci	pkc1->pkblk_start = (char *)pbd1;
8388c2ecf20Sopenharmony_ci	pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
8398c2ecf20Sopenharmony_ci
8408c2ecf20Sopenharmony_ci	BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv);
8418c2ecf20Sopenharmony_ci	BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN;
8428c2ecf20Sopenharmony_ci
8438c2ecf20Sopenharmony_ci	pbd1->version = pkc1->version;
8448c2ecf20Sopenharmony_ci	pkc1->prev = pkc1->nxt_offset;
8458c2ecf20Sopenharmony_ci	pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size;
8468c2ecf20Sopenharmony_ci
8478c2ecf20Sopenharmony_ci	prb_thaw_queue(pkc1);
8488c2ecf20Sopenharmony_ci	_prb_refresh_rx_retire_blk_timer(pkc1);
8498c2ecf20Sopenharmony_ci
8508c2ecf20Sopenharmony_ci	smp_wmb();
8518c2ecf20Sopenharmony_ci}
8528c2ecf20Sopenharmony_ci
8538c2ecf20Sopenharmony_ci/*
8548c2ecf20Sopenharmony_ci * Queue freeze logic:
8558c2ecf20Sopenharmony_ci * 1) Assume tp_block_nr = 8 blocks.
8568c2ecf20Sopenharmony_ci * 2) At time 't0', user opens Rx ring.
8578c2ecf20Sopenharmony_ci * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7
8588c2ecf20Sopenharmony_ci * 4) user-space is either sleeping or processing block '0'.
8598c2ecf20Sopenharmony_ci * 5) tpacket_rcv is currently filling block '7', since there is no space left,
8608c2ecf20Sopenharmony_ci *    it will close block-7,loop around and try to fill block '0'.
8618c2ecf20Sopenharmony_ci *    call-flow:
8628c2ecf20Sopenharmony_ci *    __packet_lookup_frame_in_block
8638c2ecf20Sopenharmony_ci *      prb_retire_current_block()
8648c2ecf20Sopenharmony_ci *      prb_dispatch_next_block()
8658c2ecf20Sopenharmony_ci *        |->(BLOCK_STATUS == USER) evaluates to true
8668c2ecf20Sopenharmony_ci *    5.1) Since block-0 is currently in-use, we just freeze the queue.
8678c2ecf20Sopenharmony_ci * 6) Now there are two cases:
8688c2ecf20Sopenharmony_ci *    6.1) Link goes idle right after the queue is frozen.
8698c2ecf20Sopenharmony_ci *         But remember, the last open_block() refreshed the timer.
8708c2ecf20Sopenharmony_ci *         When this timer expires,it will refresh itself so that we can
8718c2ecf20Sopenharmony_ci *         re-open block-0 in near future.
8728c2ecf20Sopenharmony_ci *    6.2) Link is busy and keeps on receiving packets. This is a simple
8738c2ecf20Sopenharmony_ci *         case and __packet_lookup_frame_in_block will check if block-0
8748c2ecf20Sopenharmony_ci *         is free and can now be re-used.
8758c2ecf20Sopenharmony_ci */
8768c2ecf20Sopenharmony_cistatic void prb_freeze_queue(struct tpacket_kbdq_core *pkc,
8778c2ecf20Sopenharmony_ci				  struct packet_sock *po)
8788c2ecf20Sopenharmony_ci{
8798c2ecf20Sopenharmony_ci	pkc->reset_pending_on_curr_blk = 1;
8808c2ecf20Sopenharmony_ci	po->stats.stats3.tp_freeze_q_cnt++;
8818c2ecf20Sopenharmony_ci}
8828c2ecf20Sopenharmony_ci
8838c2ecf20Sopenharmony_ci#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT))
8848c2ecf20Sopenharmony_ci
8858c2ecf20Sopenharmony_ci/*
8868c2ecf20Sopenharmony_ci * If the next block is free then we will dispatch it
8878c2ecf20Sopenharmony_ci * and return a good offset.
8888c2ecf20Sopenharmony_ci * Else, we will freeze the queue.
8898c2ecf20Sopenharmony_ci * So, caller must check the return value.
8908c2ecf20Sopenharmony_ci */
8918c2ecf20Sopenharmony_cistatic void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc,
8928c2ecf20Sopenharmony_ci		struct packet_sock *po)
8938c2ecf20Sopenharmony_ci{
8948c2ecf20Sopenharmony_ci	struct tpacket_block_desc *pbd;
8958c2ecf20Sopenharmony_ci
8968c2ecf20Sopenharmony_ci	smp_rmb();
8978c2ecf20Sopenharmony_ci
8988c2ecf20Sopenharmony_ci	/* 1. Get current block num */
8998c2ecf20Sopenharmony_ci	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
9008c2ecf20Sopenharmony_ci
9018c2ecf20Sopenharmony_ci	/* 2. If this block is currently in_use then freeze the queue */
9028c2ecf20Sopenharmony_ci	if (TP_STATUS_USER & BLOCK_STATUS(pbd)) {
9038c2ecf20Sopenharmony_ci		prb_freeze_queue(pkc, po);
9048c2ecf20Sopenharmony_ci		return NULL;
9058c2ecf20Sopenharmony_ci	}
9068c2ecf20Sopenharmony_ci
9078c2ecf20Sopenharmony_ci	/*
9088c2ecf20Sopenharmony_ci	 * 3.
9098c2ecf20Sopenharmony_ci	 * open this block and return the offset where the first packet
9108c2ecf20Sopenharmony_ci	 * needs to get stored.
9118c2ecf20Sopenharmony_ci	 */
9128c2ecf20Sopenharmony_ci	prb_open_block(pkc, pbd);
9138c2ecf20Sopenharmony_ci	return (void *)pkc->nxt_offset;
9148c2ecf20Sopenharmony_ci}
9158c2ecf20Sopenharmony_ci
9168c2ecf20Sopenharmony_cistatic void prb_retire_current_block(struct tpacket_kbdq_core *pkc,
9178c2ecf20Sopenharmony_ci		struct packet_sock *po, unsigned int status)
9188c2ecf20Sopenharmony_ci{
9198c2ecf20Sopenharmony_ci	struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
9208c2ecf20Sopenharmony_ci
9218c2ecf20Sopenharmony_ci	/* retire/close the current block */
9228c2ecf20Sopenharmony_ci	if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) {
9238c2ecf20Sopenharmony_ci		/*
9248c2ecf20Sopenharmony_ci		 * Plug the case where copy_bits() is in progress on
9258c2ecf20Sopenharmony_ci		 * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't
9268c2ecf20Sopenharmony_ci		 * have space to copy the pkt in the current block and
9278c2ecf20Sopenharmony_ci		 * called prb_retire_current_block()
9288c2ecf20Sopenharmony_ci		 *
9298c2ecf20Sopenharmony_ci		 * We don't need to worry about the TMO case because
9308c2ecf20Sopenharmony_ci		 * the timer-handler already handled this case.
9318c2ecf20Sopenharmony_ci		 */
9328c2ecf20Sopenharmony_ci		if (!(status & TP_STATUS_BLK_TMO)) {
9338c2ecf20Sopenharmony_ci			/* Waiting for skb_copy_bits to finish... */
9348c2ecf20Sopenharmony_ci			write_lock(&pkc->blk_fill_in_prog_lock);
9358c2ecf20Sopenharmony_ci			write_unlock(&pkc->blk_fill_in_prog_lock);
9368c2ecf20Sopenharmony_ci		}
9378c2ecf20Sopenharmony_ci		prb_close_block(pkc, pbd, po, status);
9388c2ecf20Sopenharmony_ci		return;
9398c2ecf20Sopenharmony_ci	}
9408c2ecf20Sopenharmony_ci}
9418c2ecf20Sopenharmony_ci
9428c2ecf20Sopenharmony_cistatic int prb_curr_blk_in_use(struct tpacket_block_desc *pbd)
9438c2ecf20Sopenharmony_ci{
9448c2ecf20Sopenharmony_ci	return TP_STATUS_USER & BLOCK_STATUS(pbd);
9458c2ecf20Sopenharmony_ci}
9468c2ecf20Sopenharmony_ci
9478c2ecf20Sopenharmony_cistatic int prb_queue_frozen(struct tpacket_kbdq_core *pkc)
9488c2ecf20Sopenharmony_ci{
9498c2ecf20Sopenharmony_ci	return pkc->reset_pending_on_curr_blk;
9508c2ecf20Sopenharmony_ci}
9518c2ecf20Sopenharmony_ci
9528c2ecf20Sopenharmony_cistatic void prb_clear_blk_fill_status(struct packet_ring_buffer *rb)
9538c2ecf20Sopenharmony_ci	__releases(&pkc->blk_fill_in_prog_lock)
9548c2ecf20Sopenharmony_ci{
9558c2ecf20Sopenharmony_ci	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
9568c2ecf20Sopenharmony_ci
9578c2ecf20Sopenharmony_ci	read_unlock(&pkc->blk_fill_in_prog_lock);
9588c2ecf20Sopenharmony_ci}
9598c2ecf20Sopenharmony_ci
9608c2ecf20Sopenharmony_cistatic void prb_fill_rxhash(struct tpacket_kbdq_core *pkc,
9618c2ecf20Sopenharmony_ci			struct tpacket3_hdr *ppd)
9628c2ecf20Sopenharmony_ci{
9638c2ecf20Sopenharmony_ci	ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb);
9648c2ecf20Sopenharmony_ci}
9658c2ecf20Sopenharmony_ci
9668c2ecf20Sopenharmony_cistatic void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
9678c2ecf20Sopenharmony_ci			struct tpacket3_hdr *ppd)
9688c2ecf20Sopenharmony_ci{
9698c2ecf20Sopenharmony_ci	ppd->hv1.tp_rxhash = 0;
9708c2ecf20Sopenharmony_ci}
9718c2ecf20Sopenharmony_ci
9728c2ecf20Sopenharmony_cistatic void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
9738c2ecf20Sopenharmony_ci			struct tpacket3_hdr *ppd)
9748c2ecf20Sopenharmony_ci{
9758c2ecf20Sopenharmony_ci	if (skb_vlan_tag_present(pkc->skb)) {
9768c2ecf20Sopenharmony_ci		ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
9778c2ecf20Sopenharmony_ci		ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
9788c2ecf20Sopenharmony_ci		ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
9798c2ecf20Sopenharmony_ci	} else {
9808c2ecf20Sopenharmony_ci		ppd->hv1.tp_vlan_tci = 0;
9818c2ecf20Sopenharmony_ci		ppd->hv1.tp_vlan_tpid = 0;
9828c2ecf20Sopenharmony_ci		ppd->tp_status = TP_STATUS_AVAILABLE;
9838c2ecf20Sopenharmony_ci	}
9848c2ecf20Sopenharmony_ci}
9858c2ecf20Sopenharmony_ci
9868c2ecf20Sopenharmony_cistatic void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc,
9878c2ecf20Sopenharmony_ci			struct tpacket3_hdr *ppd)
9888c2ecf20Sopenharmony_ci{
9898c2ecf20Sopenharmony_ci	ppd->hv1.tp_padding = 0;
9908c2ecf20Sopenharmony_ci	prb_fill_vlan_info(pkc, ppd);
9918c2ecf20Sopenharmony_ci
9928c2ecf20Sopenharmony_ci	if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH)
9938c2ecf20Sopenharmony_ci		prb_fill_rxhash(pkc, ppd);
9948c2ecf20Sopenharmony_ci	else
9958c2ecf20Sopenharmony_ci		prb_clear_rxhash(pkc, ppd);
9968c2ecf20Sopenharmony_ci}
9978c2ecf20Sopenharmony_ci
9988c2ecf20Sopenharmony_cistatic void prb_fill_curr_block(char *curr,
9998c2ecf20Sopenharmony_ci				struct tpacket_kbdq_core *pkc,
10008c2ecf20Sopenharmony_ci				struct tpacket_block_desc *pbd,
10018c2ecf20Sopenharmony_ci				unsigned int len)
10028c2ecf20Sopenharmony_ci	__acquires(&pkc->blk_fill_in_prog_lock)
10038c2ecf20Sopenharmony_ci{
10048c2ecf20Sopenharmony_ci	struct tpacket3_hdr *ppd;
10058c2ecf20Sopenharmony_ci
10068c2ecf20Sopenharmony_ci	ppd  = (struct tpacket3_hdr *)curr;
10078c2ecf20Sopenharmony_ci	ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len);
10088c2ecf20Sopenharmony_ci	pkc->prev = curr;
10098c2ecf20Sopenharmony_ci	pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len);
10108c2ecf20Sopenharmony_ci	BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len);
10118c2ecf20Sopenharmony_ci	BLOCK_NUM_PKTS(pbd) += 1;
10128c2ecf20Sopenharmony_ci	read_lock(&pkc->blk_fill_in_prog_lock);
10138c2ecf20Sopenharmony_ci	prb_run_all_ft_ops(pkc, ppd);
10148c2ecf20Sopenharmony_ci}
10158c2ecf20Sopenharmony_ci
10168c2ecf20Sopenharmony_ci/* Assumes caller has the sk->rx_queue.lock */
10178c2ecf20Sopenharmony_cistatic void *__packet_lookup_frame_in_block(struct packet_sock *po,
10188c2ecf20Sopenharmony_ci					    struct sk_buff *skb,
10198c2ecf20Sopenharmony_ci					    unsigned int len
10208c2ecf20Sopenharmony_ci					    )
10218c2ecf20Sopenharmony_ci{
10228c2ecf20Sopenharmony_ci	struct tpacket_kbdq_core *pkc;
10238c2ecf20Sopenharmony_ci	struct tpacket_block_desc *pbd;
10248c2ecf20Sopenharmony_ci	char *curr, *end;
10258c2ecf20Sopenharmony_ci
10268c2ecf20Sopenharmony_ci	pkc = GET_PBDQC_FROM_RB(&po->rx_ring);
10278c2ecf20Sopenharmony_ci	pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
10288c2ecf20Sopenharmony_ci
10298c2ecf20Sopenharmony_ci	/* Queue is frozen when user space is lagging behind */
10308c2ecf20Sopenharmony_ci	if (prb_queue_frozen(pkc)) {
10318c2ecf20Sopenharmony_ci		/*
10328c2ecf20Sopenharmony_ci		 * Check if that last block which caused the queue to freeze,
10338c2ecf20Sopenharmony_ci		 * is still in_use by user-space.
10348c2ecf20Sopenharmony_ci		 */
10358c2ecf20Sopenharmony_ci		if (prb_curr_blk_in_use(pbd)) {
10368c2ecf20Sopenharmony_ci			/* Can't record this packet */
10378c2ecf20Sopenharmony_ci			return NULL;
10388c2ecf20Sopenharmony_ci		} else {
10398c2ecf20Sopenharmony_ci			/*
10408c2ecf20Sopenharmony_ci			 * Ok, the block was released by user-space.
10418c2ecf20Sopenharmony_ci			 * Now let's open that block.
10428c2ecf20Sopenharmony_ci			 * opening a block also thaws the queue.
10438c2ecf20Sopenharmony_ci			 * Thawing is a side effect.
10448c2ecf20Sopenharmony_ci			 */
10458c2ecf20Sopenharmony_ci			prb_open_block(pkc, pbd);
10468c2ecf20Sopenharmony_ci		}
10478c2ecf20Sopenharmony_ci	}
10488c2ecf20Sopenharmony_ci
10498c2ecf20Sopenharmony_ci	smp_mb();
10508c2ecf20Sopenharmony_ci	curr = pkc->nxt_offset;
10518c2ecf20Sopenharmony_ci	pkc->skb = skb;
10528c2ecf20Sopenharmony_ci	end = (char *)pbd + pkc->kblk_size;
10538c2ecf20Sopenharmony_ci
10548c2ecf20Sopenharmony_ci	/* first try the current block */
10558c2ecf20Sopenharmony_ci	if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) {
10568c2ecf20Sopenharmony_ci		prb_fill_curr_block(curr, pkc, pbd, len);
10578c2ecf20Sopenharmony_ci		return (void *)curr;
10588c2ecf20Sopenharmony_ci	}
10598c2ecf20Sopenharmony_ci
10608c2ecf20Sopenharmony_ci	/* Ok, close the current block */
10618c2ecf20Sopenharmony_ci	prb_retire_current_block(pkc, po, 0);
10628c2ecf20Sopenharmony_ci
10638c2ecf20Sopenharmony_ci	/* Now, try to dispatch the next block */
10648c2ecf20Sopenharmony_ci	curr = (char *)prb_dispatch_next_block(pkc, po);
10658c2ecf20Sopenharmony_ci	if (curr) {
10668c2ecf20Sopenharmony_ci		pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc);
10678c2ecf20Sopenharmony_ci		prb_fill_curr_block(curr, pkc, pbd, len);
10688c2ecf20Sopenharmony_ci		return (void *)curr;
10698c2ecf20Sopenharmony_ci	}
10708c2ecf20Sopenharmony_ci
10718c2ecf20Sopenharmony_ci	/*
10728c2ecf20Sopenharmony_ci	 * No free blocks are available.user_space hasn't caught up yet.
10738c2ecf20Sopenharmony_ci	 * Queue was just frozen and now this packet will get dropped.
10748c2ecf20Sopenharmony_ci	 */
10758c2ecf20Sopenharmony_ci	return NULL;
10768c2ecf20Sopenharmony_ci}
10778c2ecf20Sopenharmony_ci
10788c2ecf20Sopenharmony_cistatic void *packet_current_rx_frame(struct packet_sock *po,
10798c2ecf20Sopenharmony_ci					    struct sk_buff *skb,
10808c2ecf20Sopenharmony_ci					    int status, unsigned int len)
10818c2ecf20Sopenharmony_ci{
10828c2ecf20Sopenharmony_ci	char *curr = NULL;
10838c2ecf20Sopenharmony_ci	switch (po->tp_version) {
10848c2ecf20Sopenharmony_ci	case TPACKET_V1:
10858c2ecf20Sopenharmony_ci	case TPACKET_V2:
10868c2ecf20Sopenharmony_ci		curr = packet_lookup_frame(po, &po->rx_ring,
10878c2ecf20Sopenharmony_ci					po->rx_ring.head, status);
10888c2ecf20Sopenharmony_ci		return curr;
10898c2ecf20Sopenharmony_ci	case TPACKET_V3:
10908c2ecf20Sopenharmony_ci		return __packet_lookup_frame_in_block(po, skb, len);
10918c2ecf20Sopenharmony_ci	default:
10928c2ecf20Sopenharmony_ci		WARN(1, "TPACKET version not supported\n");
10938c2ecf20Sopenharmony_ci		BUG();
10948c2ecf20Sopenharmony_ci		return NULL;
10958c2ecf20Sopenharmony_ci	}
10968c2ecf20Sopenharmony_ci}
10978c2ecf20Sopenharmony_ci
10988c2ecf20Sopenharmony_cistatic void *prb_lookup_block(const struct packet_sock *po,
10998c2ecf20Sopenharmony_ci			      const struct packet_ring_buffer *rb,
11008c2ecf20Sopenharmony_ci			      unsigned int idx,
11018c2ecf20Sopenharmony_ci			      int status)
11028c2ecf20Sopenharmony_ci{
11038c2ecf20Sopenharmony_ci	struct tpacket_kbdq_core *pkc  = GET_PBDQC_FROM_RB(rb);
11048c2ecf20Sopenharmony_ci	struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx);
11058c2ecf20Sopenharmony_ci
11068c2ecf20Sopenharmony_ci	if (status != BLOCK_STATUS(pbd))
11078c2ecf20Sopenharmony_ci		return NULL;
11088c2ecf20Sopenharmony_ci	return pbd;
11098c2ecf20Sopenharmony_ci}
11108c2ecf20Sopenharmony_ci
11118c2ecf20Sopenharmony_cistatic int prb_previous_blk_num(struct packet_ring_buffer *rb)
11128c2ecf20Sopenharmony_ci{
11138c2ecf20Sopenharmony_ci	unsigned int prev;
11148c2ecf20Sopenharmony_ci	if (rb->prb_bdqc.kactive_blk_num)
11158c2ecf20Sopenharmony_ci		prev = rb->prb_bdqc.kactive_blk_num-1;
11168c2ecf20Sopenharmony_ci	else
11178c2ecf20Sopenharmony_ci		prev = rb->prb_bdqc.knum_blocks-1;
11188c2ecf20Sopenharmony_ci	return prev;
11198c2ecf20Sopenharmony_ci}
11208c2ecf20Sopenharmony_ci
11218c2ecf20Sopenharmony_ci/* Assumes caller has held the rx_queue.lock */
11228c2ecf20Sopenharmony_cistatic void *__prb_previous_block(struct packet_sock *po,
11238c2ecf20Sopenharmony_ci					 struct packet_ring_buffer *rb,
11248c2ecf20Sopenharmony_ci					 int status)
11258c2ecf20Sopenharmony_ci{
11268c2ecf20Sopenharmony_ci	unsigned int previous = prb_previous_blk_num(rb);
11278c2ecf20Sopenharmony_ci	return prb_lookup_block(po, rb, previous, status);
11288c2ecf20Sopenharmony_ci}
11298c2ecf20Sopenharmony_ci
11308c2ecf20Sopenharmony_cistatic void *packet_previous_rx_frame(struct packet_sock *po,
11318c2ecf20Sopenharmony_ci					     struct packet_ring_buffer *rb,
11328c2ecf20Sopenharmony_ci					     int status)
11338c2ecf20Sopenharmony_ci{
11348c2ecf20Sopenharmony_ci	if (po->tp_version <= TPACKET_V2)
11358c2ecf20Sopenharmony_ci		return packet_previous_frame(po, rb, status);
11368c2ecf20Sopenharmony_ci
11378c2ecf20Sopenharmony_ci	return __prb_previous_block(po, rb, status);
11388c2ecf20Sopenharmony_ci}
11398c2ecf20Sopenharmony_ci
11408c2ecf20Sopenharmony_cistatic void packet_increment_rx_head(struct packet_sock *po,
11418c2ecf20Sopenharmony_ci					    struct packet_ring_buffer *rb)
11428c2ecf20Sopenharmony_ci{
11438c2ecf20Sopenharmony_ci	switch (po->tp_version) {
11448c2ecf20Sopenharmony_ci	case TPACKET_V1:
11458c2ecf20Sopenharmony_ci	case TPACKET_V2:
11468c2ecf20Sopenharmony_ci		return packet_increment_head(rb);
11478c2ecf20Sopenharmony_ci	case TPACKET_V3:
11488c2ecf20Sopenharmony_ci	default:
11498c2ecf20Sopenharmony_ci		WARN(1, "TPACKET version not supported.\n");
11508c2ecf20Sopenharmony_ci		BUG();
11518c2ecf20Sopenharmony_ci		return;
11528c2ecf20Sopenharmony_ci	}
11538c2ecf20Sopenharmony_ci}
11548c2ecf20Sopenharmony_ci
11558c2ecf20Sopenharmony_cistatic void *packet_previous_frame(struct packet_sock *po,
11568c2ecf20Sopenharmony_ci		struct packet_ring_buffer *rb,
11578c2ecf20Sopenharmony_ci		int status)
11588c2ecf20Sopenharmony_ci{
11598c2ecf20Sopenharmony_ci	unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
11608c2ecf20Sopenharmony_ci	return packet_lookup_frame(po, rb, previous, status);
11618c2ecf20Sopenharmony_ci}
11628c2ecf20Sopenharmony_ci
11638c2ecf20Sopenharmony_cistatic void packet_increment_head(struct packet_ring_buffer *buff)
11648c2ecf20Sopenharmony_ci{
11658c2ecf20Sopenharmony_ci	buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
11668c2ecf20Sopenharmony_ci}
11678c2ecf20Sopenharmony_ci
11688c2ecf20Sopenharmony_cistatic void packet_inc_pending(struct packet_ring_buffer *rb)
11698c2ecf20Sopenharmony_ci{
11708c2ecf20Sopenharmony_ci	this_cpu_inc(*rb->pending_refcnt);
11718c2ecf20Sopenharmony_ci}
11728c2ecf20Sopenharmony_ci
11738c2ecf20Sopenharmony_cistatic void packet_dec_pending(struct packet_ring_buffer *rb)
11748c2ecf20Sopenharmony_ci{
11758c2ecf20Sopenharmony_ci	this_cpu_dec(*rb->pending_refcnt);
11768c2ecf20Sopenharmony_ci}
11778c2ecf20Sopenharmony_ci
11788c2ecf20Sopenharmony_cistatic unsigned int packet_read_pending(const struct packet_ring_buffer *rb)
11798c2ecf20Sopenharmony_ci{
11808c2ecf20Sopenharmony_ci	unsigned int refcnt = 0;
11818c2ecf20Sopenharmony_ci	int cpu;
11828c2ecf20Sopenharmony_ci
11838c2ecf20Sopenharmony_ci	/* We don't use pending refcount in rx_ring. */
11848c2ecf20Sopenharmony_ci	if (rb->pending_refcnt == NULL)
11858c2ecf20Sopenharmony_ci		return 0;
11868c2ecf20Sopenharmony_ci
11878c2ecf20Sopenharmony_ci	for_each_possible_cpu(cpu)
11888c2ecf20Sopenharmony_ci		refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu);
11898c2ecf20Sopenharmony_ci
11908c2ecf20Sopenharmony_ci	return refcnt;
11918c2ecf20Sopenharmony_ci}
11928c2ecf20Sopenharmony_ci
11938c2ecf20Sopenharmony_cistatic int packet_alloc_pending(struct packet_sock *po)
11948c2ecf20Sopenharmony_ci{
11958c2ecf20Sopenharmony_ci	po->rx_ring.pending_refcnt = NULL;
11968c2ecf20Sopenharmony_ci
11978c2ecf20Sopenharmony_ci	po->tx_ring.pending_refcnt = alloc_percpu(unsigned int);
11988c2ecf20Sopenharmony_ci	if (unlikely(po->tx_ring.pending_refcnt == NULL))
11998c2ecf20Sopenharmony_ci		return -ENOBUFS;
12008c2ecf20Sopenharmony_ci
12018c2ecf20Sopenharmony_ci	return 0;
12028c2ecf20Sopenharmony_ci}
12038c2ecf20Sopenharmony_ci
12048c2ecf20Sopenharmony_cistatic void packet_free_pending(struct packet_sock *po)
12058c2ecf20Sopenharmony_ci{
12068c2ecf20Sopenharmony_ci	free_percpu(po->tx_ring.pending_refcnt);
12078c2ecf20Sopenharmony_ci}
12088c2ecf20Sopenharmony_ci
12098c2ecf20Sopenharmony_ci#define ROOM_POW_OFF	2
12108c2ecf20Sopenharmony_ci#define ROOM_NONE	0x0
12118c2ecf20Sopenharmony_ci#define ROOM_LOW	0x1
12128c2ecf20Sopenharmony_ci#define ROOM_NORMAL	0x2
12138c2ecf20Sopenharmony_ci
12148c2ecf20Sopenharmony_cistatic bool __tpacket_has_room(const struct packet_sock *po, int pow_off)
12158c2ecf20Sopenharmony_ci{
12168c2ecf20Sopenharmony_ci	int idx, len;
12178c2ecf20Sopenharmony_ci
12188c2ecf20Sopenharmony_ci	len = READ_ONCE(po->rx_ring.frame_max) + 1;
12198c2ecf20Sopenharmony_ci	idx = READ_ONCE(po->rx_ring.head);
12208c2ecf20Sopenharmony_ci	if (pow_off)
12218c2ecf20Sopenharmony_ci		idx += len >> pow_off;
12228c2ecf20Sopenharmony_ci	if (idx >= len)
12238c2ecf20Sopenharmony_ci		idx -= len;
12248c2ecf20Sopenharmony_ci	return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
12258c2ecf20Sopenharmony_ci}
12268c2ecf20Sopenharmony_ci
12278c2ecf20Sopenharmony_cistatic bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off)
12288c2ecf20Sopenharmony_ci{
12298c2ecf20Sopenharmony_ci	int idx, len;
12308c2ecf20Sopenharmony_ci
12318c2ecf20Sopenharmony_ci	len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks);
12328c2ecf20Sopenharmony_ci	idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num);
12338c2ecf20Sopenharmony_ci	if (pow_off)
12348c2ecf20Sopenharmony_ci		idx += len >> pow_off;
12358c2ecf20Sopenharmony_ci	if (idx >= len)
12368c2ecf20Sopenharmony_ci		idx -= len;
12378c2ecf20Sopenharmony_ci	return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
12388c2ecf20Sopenharmony_ci}
12398c2ecf20Sopenharmony_ci
12408c2ecf20Sopenharmony_cistatic int __packet_rcv_has_room(const struct packet_sock *po,
12418c2ecf20Sopenharmony_ci				 const struct sk_buff *skb)
12428c2ecf20Sopenharmony_ci{
12438c2ecf20Sopenharmony_ci	const struct sock *sk = &po->sk;
12448c2ecf20Sopenharmony_ci	int ret = ROOM_NONE;
12458c2ecf20Sopenharmony_ci
12468c2ecf20Sopenharmony_ci	if (po->prot_hook.func != tpacket_rcv) {
12478c2ecf20Sopenharmony_ci		int rcvbuf = READ_ONCE(sk->sk_rcvbuf);
12488c2ecf20Sopenharmony_ci		int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc)
12498c2ecf20Sopenharmony_ci				   - (skb ? skb->truesize : 0);
12508c2ecf20Sopenharmony_ci
12518c2ecf20Sopenharmony_ci		if (avail > (rcvbuf >> ROOM_POW_OFF))
12528c2ecf20Sopenharmony_ci			return ROOM_NORMAL;
12538c2ecf20Sopenharmony_ci		else if (avail > 0)
12548c2ecf20Sopenharmony_ci			return ROOM_LOW;
12558c2ecf20Sopenharmony_ci		else
12568c2ecf20Sopenharmony_ci			return ROOM_NONE;
12578c2ecf20Sopenharmony_ci	}
12588c2ecf20Sopenharmony_ci
12598c2ecf20Sopenharmony_ci	if (po->tp_version == TPACKET_V3) {
12608c2ecf20Sopenharmony_ci		if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
12618c2ecf20Sopenharmony_ci			ret = ROOM_NORMAL;
12628c2ecf20Sopenharmony_ci		else if (__tpacket_v3_has_room(po, 0))
12638c2ecf20Sopenharmony_ci			ret = ROOM_LOW;
12648c2ecf20Sopenharmony_ci	} else {
12658c2ecf20Sopenharmony_ci		if (__tpacket_has_room(po, ROOM_POW_OFF))
12668c2ecf20Sopenharmony_ci			ret = ROOM_NORMAL;
12678c2ecf20Sopenharmony_ci		else if (__tpacket_has_room(po, 0))
12688c2ecf20Sopenharmony_ci			ret = ROOM_LOW;
12698c2ecf20Sopenharmony_ci	}
12708c2ecf20Sopenharmony_ci
12718c2ecf20Sopenharmony_ci	return ret;
12728c2ecf20Sopenharmony_ci}
12738c2ecf20Sopenharmony_ci
12748c2ecf20Sopenharmony_cistatic int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
12758c2ecf20Sopenharmony_ci{
12768c2ecf20Sopenharmony_ci	int pressure, ret;
12778c2ecf20Sopenharmony_ci
12788c2ecf20Sopenharmony_ci	ret = __packet_rcv_has_room(po, skb);
12798c2ecf20Sopenharmony_ci	pressure = ret != ROOM_NORMAL;
12808c2ecf20Sopenharmony_ci
12818c2ecf20Sopenharmony_ci	if (READ_ONCE(po->pressure) != pressure)
12828c2ecf20Sopenharmony_ci		WRITE_ONCE(po->pressure, pressure);
12838c2ecf20Sopenharmony_ci
12848c2ecf20Sopenharmony_ci	return ret;
12858c2ecf20Sopenharmony_ci}
12868c2ecf20Sopenharmony_ci
12878c2ecf20Sopenharmony_cistatic void packet_rcv_try_clear_pressure(struct packet_sock *po)
12888c2ecf20Sopenharmony_ci{
12898c2ecf20Sopenharmony_ci	if (READ_ONCE(po->pressure) &&
12908c2ecf20Sopenharmony_ci	    __packet_rcv_has_room(po, NULL) == ROOM_NORMAL)
12918c2ecf20Sopenharmony_ci		WRITE_ONCE(po->pressure,  0);
12928c2ecf20Sopenharmony_ci}
12938c2ecf20Sopenharmony_ci
12948c2ecf20Sopenharmony_cistatic void packet_sock_destruct(struct sock *sk)
12958c2ecf20Sopenharmony_ci{
12968c2ecf20Sopenharmony_ci	skb_queue_purge(&sk->sk_error_queue);
12978c2ecf20Sopenharmony_ci
12988c2ecf20Sopenharmony_ci	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
12998c2ecf20Sopenharmony_ci	WARN_ON(refcount_read(&sk->sk_wmem_alloc));
13008c2ecf20Sopenharmony_ci
13018c2ecf20Sopenharmony_ci	if (!sock_flag(sk, SOCK_DEAD)) {
13028c2ecf20Sopenharmony_ci		pr_err("Attempt to release alive packet socket: %p\n", sk);
13038c2ecf20Sopenharmony_ci		return;
13048c2ecf20Sopenharmony_ci	}
13058c2ecf20Sopenharmony_ci
13068c2ecf20Sopenharmony_ci	sk_refcnt_debug_dec(sk);
13078c2ecf20Sopenharmony_ci}
13088c2ecf20Sopenharmony_ci
13098c2ecf20Sopenharmony_cistatic bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
13108c2ecf20Sopenharmony_ci{
13118c2ecf20Sopenharmony_ci	u32 *history = po->rollover->history;
13128c2ecf20Sopenharmony_ci	u32 victim, rxhash;
13138c2ecf20Sopenharmony_ci	int i, count = 0;
13148c2ecf20Sopenharmony_ci
13158c2ecf20Sopenharmony_ci	rxhash = skb_get_hash(skb);
13168c2ecf20Sopenharmony_ci	for (i = 0; i < ROLLOVER_HLEN; i++)
13178c2ecf20Sopenharmony_ci		if (READ_ONCE(history[i]) == rxhash)
13188c2ecf20Sopenharmony_ci			count++;
13198c2ecf20Sopenharmony_ci
13208c2ecf20Sopenharmony_ci	victim = prandom_u32() % ROLLOVER_HLEN;
13218c2ecf20Sopenharmony_ci
13228c2ecf20Sopenharmony_ci	/* Avoid dirtying the cache line if possible */
13238c2ecf20Sopenharmony_ci	if (READ_ONCE(history[victim]) != rxhash)
13248c2ecf20Sopenharmony_ci		WRITE_ONCE(history[victim], rxhash);
13258c2ecf20Sopenharmony_ci
13268c2ecf20Sopenharmony_ci	return count > (ROLLOVER_HLEN >> 1);
13278c2ecf20Sopenharmony_ci}
13288c2ecf20Sopenharmony_ci
13298c2ecf20Sopenharmony_cistatic unsigned int fanout_demux_hash(struct packet_fanout *f,
13308c2ecf20Sopenharmony_ci				      struct sk_buff *skb,
13318c2ecf20Sopenharmony_ci				      unsigned int num)
13328c2ecf20Sopenharmony_ci{
13338c2ecf20Sopenharmony_ci	return reciprocal_scale(__skb_get_hash_symmetric(skb), num);
13348c2ecf20Sopenharmony_ci}
13358c2ecf20Sopenharmony_ci
13368c2ecf20Sopenharmony_cistatic unsigned int fanout_demux_lb(struct packet_fanout *f,
13378c2ecf20Sopenharmony_ci				    struct sk_buff *skb,
13388c2ecf20Sopenharmony_ci				    unsigned int num)
13398c2ecf20Sopenharmony_ci{
13408c2ecf20Sopenharmony_ci	unsigned int val = atomic_inc_return(&f->rr_cur);
13418c2ecf20Sopenharmony_ci
13428c2ecf20Sopenharmony_ci	return val % num;
13438c2ecf20Sopenharmony_ci}
13448c2ecf20Sopenharmony_ci
13458c2ecf20Sopenharmony_cistatic unsigned int fanout_demux_cpu(struct packet_fanout *f,
13468c2ecf20Sopenharmony_ci				     struct sk_buff *skb,
13478c2ecf20Sopenharmony_ci				     unsigned int num)
13488c2ecf20Sopenharmony_ci{
13498c2ecf20Sopenharmony_ci	return smp_processor_id() % num;
13508c2ecf20Sopenharmony_ci}
13518c2ecf20Sopenharmony_ci
13528c2ecf20Sopenharmony_cistatic unsigned int fanout_demux_rnd(struct packet_fanout *f,
13538c2ecf20Sopenharmony_ci				     struct sk_buff *skb,
13548c2ecf20Sopenharmony_ci				     unsigned int num)
13558c2ecf20Sopenharmony_ci{
13568c2ecf20Sopenharmony_ci	return prandom_u32_max(num);
13578c2ecf20Sopenharmony_ci}
13588c2ecf20Sopenharmony_ci
13598c2ecf20Sopenharmony_cistatic unsigned int fanout_demux_rollover(struct packet_fanout *f,
13608c2ecf20Sopenharmony_ci					  struct sk_buff *skb,
13618c2ecf20Sopenharmony_ci					  unsigned int idx, bool try_self,
13628c2ecf20Sopenharmony_ci					  unsigned int num)
13638c2ecf20Sopenharmony_ci{
13648c2ecf20Sopenharmony_ci	struct packet_sock *po, *po_next, *po_skip = NULL;
13658c2ecf20Sopenharmony_ci	unsigned int i, j, room = ROOM_NONE;
13668c2ecf20Sopenharmony_ci
13678c2ecf20Sopenharmony_ci	po = pkt_sk(rcu_dereference(f->arr[idx]));
13688c2ecf20Sopenharmony_ci
13698c2ecf20Sopenharmony_ci	if (try_self) {
13708c2ecf20Sopenharmony_ci		room = packet_rcv_has_room(po, skb);
13718c2ecf20Sopenharmony_ci		if (room == ROOM_NORMAL ||
13728c2ecf20Sopenharmony_ci		    (room == ROOM_LOW && !fanout_flow_is_huge(po, skb)))
13738c2ecf20Sopenharmony_ci			return idx;
13748c2ecf20Sopenharmony_ci		po_skip = po;
13758c2ecf20Sopenharmony_ci	}
13768c2ecf20Sopenharmony_ci
13778c2ecf20Sopenharmony_ci	i = j = min_t(int, po->rollover->sock, num - 1);
13788c2ecf20Sopenharmony_ci	do {
13798c2ecf20Sopenharmony_ci		po_next = pkt_sk(rcu_dereference(f->arr[i]));
13808c2ecf20Sopenharmony_ci		if (po_next != po_skip && !READ_ONCE(po_next->pressure) &&
13818c2ecf20Sopenharmony_ci		    packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
13828c2ecf20Sopenharmony_ci			if (i != j)
13838c2ecf20Sopenharmony_ci				po->rollover->sock = i;
13848c2ecf20Sopenharmony_ci			atomic_long_inc(&po->rollover->num);
13858c2ecf20Sopenharmony_ci			if (room == ROOM_LOW)
13868c2ecf20Sopenharmony_ci				atomic_long_inc(&po->rollover->num_huge);
13878c2ecf20Sopenharmony_ci			return i;
13888c2ecf20Sopenharmony_ci		}
13898c2ecf20Sopenharmony_ci
13908c2ecf20Sopenharmony_ci		if (++i == num)
13918c2ecf20Sopenharmony_ci			i = 0;
13928c2ecf20Sopenharmony_ci	} while (i != j);
13938c2ecf20Sopenharmony_ci
13948c2ecf20Sopenharmony_ci	atomic_long_inc(&po->rollover->num_failed);
13958c2ecf20Sopenharmony_ci	return idx;
13968c2ecf20Sopenharmony_ci}
13978c2ecf20Sopenharmony_ci
13988c2ecf20Sopenharmony_cistatic unsigned int fanout_demux_qm(struct packet_fanout *f,
13998c2ecf20Sopenharmony_ci				    struct sk_buff *skb,
14008c2ecf20Sopenharmony_ci				    unsigned int num)
14018c2ecf20Sopenharmony_ci{
14028c2ecf20Sopenharmony_ci	return skb_get_queue_mapping(skb) % num;
14038c2ecf20Sopenharmony_ci}
14048c2ecf20Sopenharmony_ci
14058c2ecf20Sopenharmony_cistatic unsigned int fanout_demux_bpf(struct packet_fanout *f,
14068c2ecf20Sopenharmony_ci				     struct sk_buff *skb,
14078c2ecf20Sopenharmony_ci				     unsigned int num)
14088c2ecf20Sopenharmony_ci{
14098c2ecf20Sopenharmony_ci	struct bpf_prog *prog;
14108c2ecf20Sopenharmony_ci	unsigned int ret = 0;
14118c2ecf20Sopenharmony_ci
14128c2ecf20Sopenharmony_ci	rcu_read_lock();
14138c2ecf20Sopenharmony_ci	prog = rcu_dereference(f->bpf_prog);
14148c2ecf20Sopenharmony_ci	if (prog)
14158c2ecf20Sopenharmony_ci		ret = bpf_prog_run_clear_cb(prog, skb) % num;
14168c2ecf20Sopenharmony_ci	rcu_read_unlock();
14178c2ecf20Sopenharmony_ci
14188c2ecf20Sopenharmony_ci	return ret;
14198c2ecf20Sopenharmony_ci}
14208c2ecf20Sopenharmony_ci
14218c2ecf20Sopenharmony_cistatic bool fanout_has_flag(struct packet_fanout *f, u16 flag)
14228c2ecf20Sopenharmony_ci{
14238c2ecf20Sopenharmony_ci	return f->flags & (flag >> 8);
14248c2ecf20Sopenharmony_ci}
14258c2ecf20Sopenharmony_ci
14268c2ecf20Sopenharmony_cistatic int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
14278c2ecf20Sopenharmony_ci			     struct packet_type *pt, struct net_device *orig_dev)
14288c2ecf20Sopenharmony_ci{
14298c2ecf20Sopenharmony_ci	struct packet_fanout *f = pt->af_packet_priv;
14308c2ecf20Sopenharmony_ci	unsigned int num = READ_ONCE(f->num_members);
14318c2ecf20Sopenharmony_ci	struct net *net = read_pnet(&f->net);
14328c2ecf20Sopenharmony_ci	struct packet_sock *po;
14338c2ecf20Sopenharmony_ci	unsigned int idx;
14348c2ecf20Sopenharmony_ci
14358c2ecf20Sopenharmony_ci	if (!net_eq(dev_net(dev), net) || !num) {
14368c2ecf20Sopenharmony_ci		kfree_skb(skb);
14378c2ecf20Sopenharmony_ci		return 0;
14388c2ecf20Sopenharmony_ci	}
14398c2ecf20Sopenharmony_ci
14408c2ecf20Sopenharmony_ci	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) {
14418c2ecf20Sopenharmony_ci		skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET);
14428c2ecf20Sopenharmony_ci		if (!skb)
14438c2ecf20Sopenharmony_ci			return 0;
14448c2ecf20Sopenharmony_ci	}
14458c2ecf20Sopenharmony_ci	switch (f->type) {
14468c2ecf20Sopenharmony_ci	case PACKET_FANOUT_HASH:
14478c2ecf20Sopenharmony_ci	default:
14488c2ecf20Sopenharmony_ci		idx = fanout_demux_hash(f, skb, num);
14498c2ecf20Sopenharmony_ci		break;
14508c2ecf20Sopenharmony_ci	case PACKET_FANOUT_LB:
14518c2ecf20Sopenharmony_ci		idx = fanout_demux_lb(f, skb, num);
14528c2ecf20Sopenharmony_ci		break;
14538c2ecf20Sopenharmony_ci	case PACKET_FANOUT_CPU:
14548c2ecf20Sopenharmony_ci		idx = fanout_demux_cpu(f, skb, num);
14558c2ecf20Sopenharmony_ci		break;
14568c2ecf20Sopenharmony_ci	case PACKET_FANOUT_RND:
14578c2ecf20Sopenharmony_ci		idx = fanout_demux_rnd(f, skb, num);
14588c2ecf20Sopenharmony_ci		break;
14598c2ecf20Sopenharmony_ci	case PACKET_FANOUT_QM:
14608c2ecf20Sopenharmony_ci		idx = fanout_demux_qm(f, skb, num);
14618c2ecf20Sopenharmony_ci		break;
14628c2ecf20Sopenharmony_ci	case PACKET_FANOUT_ROLLOVER:
14638c2ecf20Sopenharmony_ci		idx = fanout_demux_rollover(f, skb, 0, false, num);
14648c2ecf20Sopenharmony_ci		break;
14658c2ecf20Sopenharmony_ci	case PACKET_FANOUT_CBPF:
14668c2ecf20Sopenharmony_ci	case PACKET_FANOUT_EBPF:
14678c2ecf20Sopenharmony_ci		idx = fanout_demux_bpf(f, skb, num);
14688c2ecf20Sopenharmony_ci		break;
14698c2ecf20Sopenharmony_ci	}
14708c2ecf20Sopenharmony_ci
14718c2ecf20Sopenharmony_ci	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
14728c2ecf20Sopenharmony_ci		idx = fanout_demux_rollover(f, skb, idx, true, num);
14738c2ecf20Sopenharmony_ci
14748c2ecf20Sopenharmony_ci	po = pkt_sk(rcu_dereference(f->arr[idx]));
14758c2ecf20Sopenharmony_ci	return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
14768c2ecf20Sopenharmony_ci}
14778c2ecf20Sopenharmony_ci
14788c2ecf20Sopenharmony_ciDEFINE_MUTEX(fanout_mutex);
14798c2ecf20Sopenharmony_ciEXPORT_SYMBOL_GPL(fanout_mutex);
14808c2ecf20Sopenharmony_cistatic LIST_HEAD(fanout_list);
14818c2ecf20Sopenharmony_cistatic u16 fanout_next_id;
14828c2ecf20Sopenharmony_ci
14838c2ecf20Sopenharmony_cistatic void __fanout_link(struct sock *sk, struct packet_sock *po)
14848c2ecf20Sopenharmony_ci{
14858c2ecf20Sopenharmony_ci	struct packet_fanout *f = po->fanout;
14868c2ecf20Sopenharmony_ci
14878c2ecf20Sopenharmony_ci	spin_lock(&f->lock);
14888c2ecf20Sopenharmony_ci	rcu_assign_pointer(f->arr[f->num_members], sk);
14898c2ecf20Sopenharmony_ci	smp_wmb();
14908c2ecf20Sopenharmony_ci	f->num_members++;
14918c2ecf20Sopenharmony_ci	if (f->num_members == 1)
14928c2ecf20Sopenharmony_ci		dev_add_pack(&f->prot_hook);
14938c2ecf20Sopenharmony_ci	spin_unlock(&f->lock);
14948c2ecf20Sopenharmony_ci}
14958c2ecf20Sopenharmony_ci
14968c2ecf20Sopenharmony_cistatic void __fanout_unlink(struct sock *sk, struct packet_sock *po)
14978c2ecf20Sopenharmony_ci{
14988c2ecf20Sopenharmony_ci	struct packet_fanout *f = po->fanout;
14998c2ecf20Sopenharmony_ci	int i;
15008c2ecf20Sopenharmony_ci
15018c2ecf20Sopenharmony_ci	spin_lock(&f->lock);
15028c2ecf20Sopenharmony_ci	for (i = 0; i < f->num_members; i++) {
15038c2ecf20Sopenharmony_ci		if (rcu_dereference_protected(f->arr[i],
15048c2ecf20Sopenharmony_ci					      lockdep_is_held(&f->lock)) == sk)
15058c2ecf20Sopenharmony_ci			break;
15068c2ecf20Sopenharmony_ci	}
15078c2ecf20Sopenharmony_ci	BUG_ON(i >= f->num_members);
15088c2ecf20Sopenharmony_ci	rcu_assign_pointer(f->arr[i],
15098c2ecf20Sopenharmony_ci			   rcu_dereference_protected(f->arr[f->num_members - 1],
15108c2ecf20Sopenharmony_ci						     lockdep_is_held(&f->lock)));
15118c2ecf20Sopenharmony_ci	f->num_members--;
15128c2ecf20Sopenharmony_ci	if (f->num_members == 0)
15138c2ecf20Sopenharmony_ci		__dev_remove_pack(&f->prot_hook);
15148c2ecf20Sopenharmony_ci	spin_unlock(&f->lock);
15158c2ecf20Sopenharmony_ci}
15168c2ecf20Sopenharmony_ci
15178c2ecf20Sopenharmony_cistatic bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
15188c2ecf20Sopenharmony_ci{
15198c2ecf20Sopenharmony_ci	if (sk->sk_family != PF_PACKET)
15208c2ecf20Sopenharmony_ci		return false;
15218c2ecf20Sopenharmony_ci
15228c2ecf20Sopenharmony_ci	return ptype->af_packet_priv == pkt_sk(sk)->fanout;
15238c2ecf20Sopenharmony_ci}
15248c2ecf20Sopenharmony_ci
15258c2ecf20Sopenharmony_cistatic void fanout_init_data(struct packet_fanout *f)
15268c2ecf20Sopenharmony_ci{
15278c2ecf20Sopenharmony_ci	switch (f->type) {
15288c2ecf20Sopenharmony_ci	case PACKET_FANOUT_LB:
15298c2ecf20Sopenharmony_ci		atomic_set(&f->rr_cur, 0);
15308c2ecf20Sopenharmony_ci		break;
15318c2ecf20Sopenharmony_ci	case PACKET_FANOUT_CBPF:
15328c2ecf20Sopenharmony_ci	case PACKET_FANOUT_EBPF:
15338c2ecf20Sopenharmony_ci		RCU_INIT_POINTER(f->bpf_prog, NULL);
15348c2ecf20Sopenharmony_ci		break;
15358c2ecf20Sopenharmony_ci	}
15368c2ecf20Sopenharmony_ci}
15378c2ecf20Sopenharmony_ci
15388c2ecf20Sopenharmony_cistatic void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
15398c2ecf20Sopenharmony_ci{
15408c2ecf20Sopenharmony_ci	struct bpf_prog *old;
15418c2ecf20Sopenharmony_ci
15428c2ecf20Sopenharmony_ci	spin_lock(&f->lock);
15438c2ecf20Sopenharmony_ci	old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
15448c2ecf20Sopenharmony_ci	rcu_assign_pointer(f->bpf_prog, new);
15458c2ecf20Sopenharmony_ci	spin_unlock(&f->lock);
15468c2ecf20Sopenharmony_ci
15478c2ecf20Sopenharmony_ci	if (old) {
15488c2ecf20Sopenharmony_ci		synchronize_net();
15498c2ecf20Sopenharmony_ci		bpf_prog_destroy(old);
15508c2ecf20Sopenharmony_ci	}
15518c2ecf20Sopenharmony_ci}
15528c2ecf20Sopenharmony_ci
15538c2ecf20Sopenharmony_cistatic int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data,
15548c2ecf20Sopenharmony_ci				unsigned int len)
15558c2ecf20Sopenharmony_ci{
15568c2ecf20Sopenharmony_ci	struct bpf_prog *new;
15578c2ecf20Sopenharmony_ci	struct sock_fprog fprog;
15588c2ecf20Sopenharmony_ci	int ret;
15598c2ecf20Sopenharmony_ci
15608c2ecf20Sopenharmony_ci	if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
15618c2ecf20Sopenharmony_ci		return -EPERM;
15628c2ecf20Sopenharmony_ci
15638c2ecf20Sopenharmony_ci	ret = copy_bpf_fprog_from_user(&fprog, data, len);
15648c2ecf20Sopenharmony_ci	if (ret)
15658c2ecf20Sopenharmony_ci		return ret;
15668c2ecf20Sopenharmony_ci
15678c2ecf20Sopenharmony_ci	ret = bpf_prog_create_from_user(&new, &fprog, NULL, false);
15688c2ecf20Sopenharmony_ci	if (ret)
15698c2ecf20Sopenharmony_ci		return ret;
15708c2ecf20Sopenharmony_ci
15718c2ecf20Sopenharmony_ci	__fanout_set_data_bpf(po->fanout, new);
15728c2ecf20Sopenharmony_ci	return 0;
15738c2ecf20Sopenharmony_ci}
15748c2ecf20Sopenharmony_ci
15758c2ecf20Sopenharmony_cistatic int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data,
15768c2ecf20Sopenharmony_ci				unsigned int len)
15778c2ecf20Sopenharmony_ci{
15788c2ecf20Sopenharmony_ci	struct bpf_prog *new;
15798c2ecf20Sopenharmony_ci	u32 fd;
15808c2ecf20Sopenharmony_ci
15818c2ecf20Sopenharmony_ci	if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
15828c2ecf20Sopenharmony_ci		return -EPERM;
15838c2ecf20Sopenharmony_ci	if (len != sizeof(fd))
15848c2ecf20Sopenharmony_ci		return -EINVAL;
15858c2ecf20Sopenharmony_ci	if (copy_from_sockptr(&fd, data, len))
15868c2ecf20Sopenharmony_ci		return -EFAULT;
15878c2ecf20Sopenharmony_ci
15888c2ecf20Sopenharmony_ci	new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER);
15898c2ecf20Sopenharmony_ci	if (IS_ERR(new))
15908c2ecf20Sopenharmony_ci		return PTR_ERR(new);
15918c2ecf20Sopenharmony_ci
15928c2ecf20Sopenharmony_ci	__fanout_set_data_bpf(po->fanout, new);
15938c2ecf20Sopenharmony_ci	return 0;
15948c2ecf20Sopenharmony_ci}
15958c2ecf20Sopenharmony_ci
15968c2ecf20Sopenharmony_cistatic int fanout_set_data(struct packet_sock *po, sockptr_t data,
15978c2ecf20Sopenharmony_ci			   unsigned int len)
15988c2ecf20Sopenharmony_ci{
15998c2ecf20Sopenharmony_ci	switch (po->fanout->type) {
16008c2ecf20Sopenharmony_ci	case PACKET_FANOUT_CBPF:
16018c2ecf20Sopenharmony_ci		return fanout_set_data_cbpf(po, data, len);
16028c2ecf20Sopenharmony_ci	case PACKET_FANOUT_EBPF:
16038c2ecf20Sopenharmony_ci		return fanout_set_data_ebpf(po, data, len);
16048c2ecf20Sopenharmony_ci	default:
16058c2ecf20Sopenharmony_ci		return -EINVAL;
16068c2ecf20Sopenharmony_ci	}
16078c2ecf20Sopenharmony_ci}
16088c2ecf20Sopenharmony_ci
16098c2ecf20Sopenharmony_cistatic void fanout_release_data(struct packet_fanout *f)
16108c2ecf20Sopenharmony_ci{
16118c2ecf20Sopenharmony_ci	switch (f->type) {
16128c2ecf20Sopenharmony_ci	case PACKET_FANOUT_CBPF:
16138c2ecf20Sopenharmony_ci	case PACKET_FANOUT_EBPF:
16148c2ecf20Sopenharmony_ci		__fanout_set_data_bpf(f, NULL);
16158c2ecf20Sopenharmony_ci	}
16168c2ecf20Sopenharmony_ci}
16178c2ecf20Sopenharmony_ci
16188c2ecf20Sopenharmony_cistatic bool __fanout_id_is_free(struct sock *sk, u16 candidate_id)
16198c2ecf20Sopenharmony_ci{
16208c2ecf20Sopenharmony_ci	struct packet_fanout *f;
16218c2ecf20Sopenharmony_ci
16228c2ecf20Sopenharmony_ci	list_for_each_entry(f, &fanout_list, list) {
16238c2ecf20Sopenharmony_ci		if (f->id == candidate_id &&
16248c2ecf20Sopenharmony_ci		    read_pnet(&f->net) == sock_net(sk)) {
16258c2ecf20Sopenharmony_ci			return false;
16268c2ecf20Sopenharmony_ci		}
16278c2ecf20Sopenharmony_ci	}
16288c2ecf20Sopenharmony_ci	return true;
16298c2ecf20Sopenharmony_ci}
16308c2ecf20Sopenharmony_ci
16318c2ecf20Sopenharmony_cistatic bool fanout_find_new_id(struct sock *sk, u16 *new_id)
16328c2ecf20Sopenharmony_ci{
16338c2ecf20Sopenharmony_ci	u16 id = fanout_next_id;
16348c2ecf20Sopenharmony_ci
16358c2ecf20Sopenharmony_ci	do {
16368c2ecf20Sopenharmony_ci		if (__fanout_id_is_free(sk, id)) {
16378c2ecf20Sopenharmony_ci			*new_id = id;
16388c2ecf20Sopenharmony_ci			fanout_next_id = id + 1;
16398c2ecf20Sopenharmony_ci			return true;
16408c2ecf20Sopenharmony_ci		}
16418c2ecf20Sopenharmony_ci
16428c2ecf20Sopenharmony_ci		id++;
16438c2ecf20Sopenharmony_ci	} while (id != fanout_next_id);
16448c2ecf20Sopenharmony_ci
16458c2ecf20Sopenharmony_ci	return false;
16468c2ecf20Sopenharmony_ci}
16478c2ecf20Sopenharmony_ci
16488c2ecf20Sopenharmony_cistatic int fanout_add(struct sock *sk, struct fanout_args *args)
16498c2ecf20Sopenharmony_ci{
16508c2ecf20Sopenharmony_ci	struct packet_rollover *rollover = NULL;
16518c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
16528c2ecf20Sopenharmony_ci	u16 type_flags = args->type_flags;
16538c2ecf20Sopenharmony_ci	struct packet_fanout *f, *match;
16548c2ecf20Sopenharmony_ci	u8 type = type_flags & 0xff;
16558c2ecf20Sopenharmony_ci	u8 flags = type_flags >> 8;
16568c2ecf20Sopenharmony_ci	u16 id = args->id;
16578c2ecf20Sopenharmony_ci	int err;
16588c2ecf20Sopenharmony_ci
16598c2ecf20Sopenharmony_ci	switch (type) {
16608c2ecf20Sopenharmony_ci	case PACKET_FANOUT_ROLLOVER:
16618c2ecf20Sopenharmony_ci		if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)
16628c2ecf20Sopenharmony_ci			return -EINVAL;
16638c2ecf20Sopenharmony_ci	case PACKET_FANOUT_HASH:
16648c2ecf20Sopenharmony_ci	case PACKET_FANOUT_LB:
16658c2ecf20Sopenharmony_ci	case PACKET_FANOUT_CPU:
16668c2ecf20Sopenharmony_ci	case PACKET_FANOUT_RND:
16678c2ecf20Sopenharmony_ci	case PACKET_FANOUT_QM:
16688c2ecf20Sopenharmony_ci	case PACKET_FANOUT_CBPF:
16698c2ecf20Sopenharmony_ci	case PACKET_FANOUT_EBPF:
16708c2ecf20Sopenharmony_ci		break;
16718c2ecf20Sopenharmony_ci	default:
16728c2ecf20Sopenharmony_ci		return -EINVAL;
16738c2ecf20Sopenharmony_ci	}
16748c2ecf20Sopenharmony_ci
16758c2ecf20Sopenharmony_ci	mutex_lock(&fanout_mutex);
16768c2ecf20Sopenharmony_ci
16778c2ecf20Sopenharmony_ci	err = -EALREADY;
16788c2ecf20Sopenharmony_ci	if (po->fanout)
16798c2ecf20Sopenharmony_ci		goto out;
16808c2ecf20Sopenharmony_ci
16818c2ecf20Sopenharmony_ci	if (type == PACKET_FANOUT_ROLLOVER ||
16828c2ecf20Sopenharmony_ci	    (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) {
16838c2ecf20Sopenharmony_ci		err = -ENOMEM;
16848c2ecf20Sopenharmony_ci		rollover = kzalloc(sizeof(*rollover), GFP_KERNEL);
16858c2ecf20Sopenharmony_ci		if (!rollover)
16868c2ecf20Sopenharmony_ci			goto out;
16878c2ecf20Sopenharmony_ci		atomic_long_set(&rollover->num, 0);
16888c2ecf20Sopenharmony_ci		atomic_long_set(&rollover->num_huge, 0);
16898c2ecf20Sopenharmony_ci		atomic_long_set(&rollover->num_failed, 0);
16908c2ecf20Sopenharmony_ci	}
16918c2ecf20Sopenharmony_ci
16928c2ecf20Sopenharmony_ci	if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) {
16938c2ecf20Sopenharmony_ci		if (id != 0) {
16948c2ecf20Sopenharmony_ci			err = -EINVAL;
16958c2ecf20Sopenharmony_ci			goto out;
16968c2ecf20Sopenharmony_ci		}
16978c2ecf20Sopenharmony_ci		if (!fanout_find_new_id(sk, &id)) {
16988c2ecf20Sopenharmony_ci			err = -ENOMEM;
16998c2ecf20Sopenharmony_ci			goto out;
17008c2ecf20Sopenharmony_ci		}
17018c2ecf20Sopenharmony_ci		/* ephemeral flag for the first socket in the group: drop it */
17028c2ecf20Sopenharmony_ci		flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8);
17038c2ecf20Sopenharmony_ci	}
17048c2ecf20Sopenharmony_ci
17058c2ecf20Sopenharmony_ci	match = NULL;
17068c2ecf20Sopenharmony_ci	list_for_each_entry(f, &fanout_list, list) {
17078c2ecf20Sopenharmony_ci		if (f->id == id &&
17088c2ecf20Sopenharmony_ci		    read_pnet(&f->net) == sock_net(sk)) {
17098c2ecf20Sopenharmony_ci			match = f;
17108c2ecf20Sopenharmony_ci			break;
17118c2ecf20Sopenharmony_ci		}
17128c2ecf20Sopenharmony_ci	}
17138c2ecf20Sopenharmony_ci	err = -EINVAL;
17148c2ecf20Sopenharmony_ci	if (match) {
17158c2ecf20Sopenharmony_ci		if (match->flags != flags)
17168c2ecf20Sopenharmony_ci			goto out;
17178c2ecf20Sopenharmony_ci		if (args->max_num_members &&
17188c2ecf20Sopenharmony_ci		    args->max_num_members != match->max_num_members)
17198c2ecf20Sopenharmony_ci			goto out;
17208c2ecf20Sopenharmony_ci	} else {
17218c2ecf20Sopenharmony_ci		if (args->max_num_members > PACKET_FANOUT_MAX)
17228c2ecf20Sopenharmony_ci			goto out;
17238c2ecf20Sopenharmony_ci		if (!args->max_num_members)
17248c2ecf20Sopenharmony_ci			/* legacy PACKET_FANOUT_MAX */
17258c2ecf20Sopenharmony_ci			args->max_num_members = 256;
17268c2ecf20Sopenharmony_ci		err = -ENOMEM;
17278c2ecf20Sopenharmony_ci		match = kvzalloc(struct_size(match, arr, args->max_num_members),
17288c2ecf20Sopenharmony_ci				 GFP_KERNEL);
17298c2ecf20Sopenharmony_ci		if (!match)
17308c2ecf20Sopenharmony_ci			goto out;
17318c2ecf20Sopenharmony_ci		write_pnet(&match->net, sock_net(sk));
17328c2ecf20Sopenharmony_ci		match->id = id;
17338c2ecf20Sopenharmony_ci		match->type = type;
17348c2ecf20Sopenharmony_ci		match->flags = flags;
17358c2ecf20Sopenharmony_ci		INIT_LIST_HEAD(&match->list);
17368c2ecf20Sopenharmony_ci		spin_lock_init(&match->lock);
17378c2ecf20Sopenharmony_ci		refcount_set(&match->sk_ref, 0);
17388c2ecf20Sopenharmony_ci		fanout_init_data(match);
17398c2ecf20Sopenharmony_ci		match->prot_hook.type = po->prot_hook.type;
17408c2ecf20Sopenharmony_ci		match->prot_hook.dev = po->prot_hook.dev;
17418c2ecf20Sopenharmony_ci		match->prot_hook.func = packet_rcv_fanout;
17428c2ecf20Sopenharmony_ci		match->prot_hook.af_packet_priv = match;
17438c2ecf20Sopenharmony_ci		match->prot_hook.af_packet_net = read_pnet(&match->net);
17448c2ecf20Sopenharmony_ci		match->prot_hook.id_match = match_fanout_group;
17458c2ecf20Sopenharmony_ci		match->max_num_members = args->max_num_members;
17468c2ecf20Sopenharmony_ci		list_add(&match->list, &fanout_list);
17478c2ecf20Sopenharmony_ci	}
17488c2ecf20Sopenharmony_ci	err = -EINVAL;
17498c2ecf20Sopenharmony_ci
17508c2ecf20Sopenharmony_ci	spin_lock(&po->bind_lock);
17518c2ecf20Sopenharmony_ci	if (po->running &&
17528c2ecf20Sopenharmony_ci	    match->type == type &&
17538c2ecf20Sopenharmony_ci	    match->prot_hook.type == po->prot_hook.type &&
17548c2ecf20Sopenharmony_ci	    match->prot_hook.dev == po->prot_hook.dev) {
17558c2ecf20Sopenharmony_ci		err = -ENOSPC;
17568c2ecf20Sopenharmony_ci		if (refcount_read(&match->sk_ref) < match->max_num_members) {
17578c2ecf20Sopenharmony_ci			__dev_remove_pack(&po->prot_hook);
17588c2ecf20Sopenharmony_ci
17598c2ecf20Sopenharmony_ci			/* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
17608c2ecf20Sopenharmony_ci			WRITE_ONCE(po->fanout, match);
17618c2ecf20Sopenharmony_ci
17628c2ecf20Sopenharmony_ci			po->rollover = rollover;
17638c2ecf20Sopenharmony_ci			rollover = NULL;
17648c2ecf20Sopenharmony_ci			refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
17658c2ecf20Sopenharmony_ci			__fanout_link(sk, po);
17668c2ecf20Sopenharmony_ci			err = 0;
17678c2ecf20Sopenharmony_ci		}
17688c2ecf20Sopenharmony_ci	}
17698c2ecf20Sopenharmony_ci	spin_unlock(&po->bind_lock);
17708c2ecf20Sopenharmony_ci
17718c2ecf20Sopenharmony_ci	if (err && !refcount_read(&match->sk_ref)) {
17728c2ecf20Sopenharmony_ci		list_del(&match->list);
17738c2ecf20Sopenharmony_ci		kvfree(match);
17748c2ecf20Sopenharmony_ci	}
17758c2ecf20Sopenharmony_ci
17768c2ecf20Sopenharmony_ciout:
17778c2ecf20Sopenharmony_ci	kfree(rollover);
17788c2ecf20Sopenharmony_ci	mutex_unlock(&fanout_mutex);
17798c2ecf20Sopenharmony_ci	return err;
17808c2ecf20Sopenharmony_ci}
17818c2ecf20Sopenharmony_ci
17828c2ecf20Sopenharmony_ci/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes
17838c2ecf20Sopenharmony_ci * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout.
17848c2ecf20Sopenharmony_ci * It is the responsibility of the caller to call fanout_release_data() and
17858c2ecf20Sopenharmony_ci * free the returned packet_fanout (after synchronize_net())
17868c2ecf20Sopenharmony_ci */
17878c2ecf20Sopenharmony_cistatic struct packet_fanout *fanout_release(struct sock *sk)
17888c2ecf20Sopenharmony_ci{
17898c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
17908c2ecf20Sopenharmony_ci	struct packet_fanout *f;
17918c2ecf20Sopenharmony_ci
17928c2ecf20Sopenharmony_ci	mutex_lock(&fanout_mutex);
17938c2ecf20Sopenharmony_ci	f = po->fanout;
17948c2ecf20Sopenharmony_ci	if (f) {
17958c2ecf20Sopenharmony_ci		po->fanout = NULL;
17968c2ecf20Sopenharmony_ci
17978c2ecf20Sopenharmony_ci		if (refcount_dec_and_test(&f->sk_ref))
17988c2ecf20Sopenharmony_ci			list_del(&f->list);
17998c2ecf20Sopenharmony_ci		else
18008c2ecf20Sopenharmony_ci			f = NULL;
18018c2ecf20Sopenharmony_ci	}
18028c2ecf20Sopenharmony_ci	mutex_unlock(&fanout_mutex);
18038c2ecf20Sopenharmony_ci
18048c2ecf20Sopenharmony_ci	return f;
18058c2ecf20Sopenharmony_ci}
18068c2ecf20Sopenharmony_ci
18078c2ecf20Sopenharmony_cistatic bool packet_extra_vlan_len_allowed(const struct net_device *dev,
18088c2ecf20Sopenharmony_ci					  struct sk_buff *skb)
18098c2ecf20Sopenharmony_ci{
18108c2ecf20Sopenharmony_ci	/* Earlier code assumed this would be a VLAN pkt, double-check
18118c2ecf20Sopenharmony_ci	 * this now that we have the actual packet in hand. We can only
18128c2ecf20Sopenharmony_ci	 * do this check on Ethernet devices.
18138c2ecf20Sopenharmony_ci	 */
18148c2ecf20Sopenharmony_ci	if (unlikely(dev->type != ARPHRD_ETHER))
18158c2ecf20Sopenharmony_ci		return false;
18168c2ecf20Sopenharmony_ci
18178c2ecf20Sopenharmony_ci	skb_reset_mac_header(skb);
18188c2ecf20Sopenharmony_ci	return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q));
18198c2ecf20Sopenharmony_ci}
18208c2ecf20Sopenharmony_ci
18218c2ecf20Sopenharmony_cistatic const struct proto_ops packet_ops;
18228c2ecf20Sopenharmony_ci
18238c2ecf20Sopenharmony_cistatic const struct proto_ops packet_ops_spkt;
18248c2ecf20Sopenharmony_ci
18258c2ecf20Sopenharmony_cistatic int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
18268c2ecf20Sopenharmony_ci			   struct packet_type *pt, struct net_device *orig_dev)
18278c2ecf20Sopenharmony_ci{
18288c2ecf20Sopenharmony_ci	struct sock *sk;
18298c2ecf20Sopenharmony_ci	struct sockaddr_pkt *spkt;
18308c2ecf20Sopenharmony_ci
18318c2ecf20Sopenharmony_ci	/*
18328c2ecf20Sopenharmony_ci	 *	When we registered the protocol we saved the socket in the data
18338c2ecf20Sopenharmony_ci	 *	field for just this event.
18348c2ecf20Sopenharmony_ci	 */
18358c2ecf20Sopenharmony_ci
18368c2ecf20Sopenharmony_ci	sk = pt->af_packet_priv;
18378c2ecf20Sopenharmony_ci
18388c2ecf20Sopenharmony_ci	/*
18398c2ecf20Sopenharmony_ci	 *	Yank back the headers [hope the device set this
18408c2ecf20Sopenharmony_ci	 *	right or kerboom...]
18418c2ecf20Sopenharmony_ci	 *
18428c2ecf20Sopenharmony_ci	 *	Incoming packets have ll header pulled,
18438c2ecf20Sopenharmony_ci	 *	push it back.
18448c2ecf20Sopenharmony_ci	 *
18458c2ecf20Sopenharmony_ci	 *	For outgoing ones skb->data == skb_mac_header(skb)
18468c2ecf20Sopenharmony_ci	 *	so that this procedure is noop.
18478c2ecf20Sopenharmony_ci	 */
18488c2ecf20Sopenharmony_ci
18498c2ecf20Sopenharmony_ci	if (skb->pkt_type == PACKET_LOOPBACK)
18508c2ecf20Sopenharmony_ci		goto out;
18518c2ecf20Sopenharmony_ci
18528c2ecf20Sopenharmony_ci	if (!net_eq(dev_net(dev), sock_net(sk)))
18538c2ecf20Sopenharmony_ci		goto out;
18548c2ecf20Sopenharmony_ci
18558c2ecf20Sopenharmony_ci	skb = skb_share_check(skb, GFP_ATOMIC);
18568c2ecf20Sopenharmony_ci	if (skb == NULL)
18578c2ecf20Sopenharmony_ci		goto oom;
18588c2ecf20Sopenharmony_ci
18598c2ecf20Sopenharmony_ci	/* drop any routing info */
18608c2ecf20Sopenharmony_ci	skb_dst_drop(skb);
18618c2ecf20Sopenharmony_ci
18628c2ecf20Sopenharmony_ci	/* drop conntrack reference */
18638c2ecf20Sopenharmony_ci	nf_reset_ct(skb);
18648c2ecf20Sopenharmony_ci
18658c2ecf20Sopenharmony_ci	spkt = &PACKET_SKB_CB(skb)->sa.pkt;
18668c2ecf20Sopenharmony_ci
18678c2ecf20Sopenharmony_ci	skb_push(skb, skb->data - skb_mac_header(skb));
18688c2ecf20Sopenharmony_ci
18698c2ecf20Sopenharmony_ci	/*
18708c2ecf20Sopenharmony_ci	 *	The SOCK_PACKET socket receives _all_ frames.
18718c2ecf20Sopenharmony_ci	 */
18728c2ecf20Sopenharmony_ci
18738c2ecf20Sopenharmony_ci	spkt->spkt_family = dev->type;
18748c2ecf20Sopenharmony_ci	strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
18758c2ecf20Sopenharmony_ci	spkt->spkt_protocol = skb->protocol;
18768c2ecf20Sopenharmony_ci
18778c2ecf20Sopenharmony_ci	/*
18788c2ecf20Sopenharmony_ci	 *	Charge the memory to the socket. This is done specifically
18798c2ecf20Sopenharmony_ci	 *	to prevent sockets using all the memory up.
18808c2ecf20Sopenharmony_ci	 */
18818c2ecf20Sopenharmony_ci
18828c2ecf20Sopenharmony_ci	if (sock_queue_rcv_skb(sk, skb) == 0)
18838c2ecf20Sopenharmony_ci		return 0;
18848c2ecf20Sopenharmony_ci
18858c2ecf20Sopenharmony_ciout:
18868c2ecf20Sopenharmony_ci	kfree_skb(skb);
18878c2ecf20Sopenharmony_cioom:
18888c2ecf20Sopenharmony_ci	return 0;
18898c2ecf20Sopenharmony_ci}
18908c2ecf20Sopenharmony_ci
18918c2ecf20Sopenharmony_cistatic void packet_parse_headers(struct sk_buff *skb, struct socket *sock)
18928c2ecf20Sopenharmony_ci{
18938c2ecf20Sopenharmony_ci	int depth;
18948c2ecf20Sopenharmony_ci
18958c2ecf20Sopenharmony_ci	if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) &&
18968c2ecf20Sopenharmony_ci	    sock->type == SOCK_RAW) {
18978c2ecf20Sopenharmony_ci		skb_reset_mac_header(skb);
18988c2ecf20Sopenharmony_ci		skb->protocol = dev_parse_header_protocol(skb);
18998c2ecf20Sopenharmony_ci	}
19008c2ecf20Sopenharmony_ci
19018c2ecf20Sopenharmony_ci	/* Move network header to the right position for VLAN tagged packets */
19028c2ecf20Sopenharmony_ci	if (likely(skb->dev->type == ARPHRD_ETHER) &&
19038c2ecf20Sopenharmony_ci	    eth_type_vlan(skb->protocol) &&
19048c2ecf20Sopenharmony_ci	    vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0)
19058c2ecf20Sopenharmony_ci		skb_set_network_header(skb, depth);
19068c2ecf20Sopenharmony_ci
19078c2ecf20Sopenharmony_ci	skb_probe_transport_header(skb);
19088c2ecf20Sopenharmony_ci}
19098c2ecf20Sopenharmony_ci
19108c2ecf20Sopenharmony_ci/*
19118c2ecf20Sopenharmony_ci *	Output a raw packet to a device layer. This bypasses all the other
19128c2ecf20Sopenharmony_ci *	protocol layers and you must therefore supply it with a complete frame
19138c2ecf20Sopenharmony_ci */
19148c2ecf20Sopenharmony_ci
19158c2ecf20Sopenharmony_cistatic int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
19168c2ecf20Sopenharmony_ci			       size_t len)
19178c2ecf20Sopenharmony_ci{
19188c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
19198c2ecf20Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
19208c2ecf20Sopenharmony_ci	struct sk_buff *skb = NULL;
19218c2ecf20Sopenharmony_ci	struct net_device *dev;
19228c2ecf20Sopenharmony_ci	struct sockcm_cookie sockc;
19238c2ecf20Sopenharmony_ci	__be16 proto = 0;
19248c2ecf20Sopenharmony_ci	int err;
19258c2ecf20Sopenharmony_ci	int extra_len = 0;
19268c2ecf20Sopenharmony_ci
19278c2ecf20Sopenharmony_ci	/*
19288c2ecf20Sopenharmony_ci	 *	Get and verify the address.
19298c2ecf20Sopenharmony_ci	 */
19308c2ecf20Sopenharmony_ci
19318c2ecf20Sopenharmony_ci	if (saddr) {
19328c2ecf20Sopenharmony_ci		if (msg->msg_namelen < sizeof(struct sockaddr))
19338c2ecf20Sopenharmony_ci			return -EINVAL;
19348c2ecf20Sopenharmony_ci		if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
19358c2ecf20Sopenharmony_ci			proto = saddr->spkt_protocol;
19368c2ecf20Sopenharmony_ci	} else
19378c2ecf20Sopenharmony_ci		return -ENOTCONN;	/* SOCK_PACKET must be sent giving an address */
19388c2ecf20Sopenharmony_ci
19398c2ecf20Sopenharmony_ci	/*
19408c2ecf20Sopenharmony_ci	 *	Find the device first to size check it
19418c2ecf20Sopenharmony_ci	 */
19428c2ecf20Sopenharmony_ci
19438c2ecf20Sopenharmony_ci	saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0;
19448c2ecf20Sopenharmony_ciretry:
19458c2ecf20Sopenharmony_ci	rcu_read_lock();
19468c2ecf20Sopenharmony_ci	dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
19478c2ecf20Sopenharmony_ci	err = -ENODEV;
19488c2ecf20Sopenharmony_ci	if (dev == NULL)
19498c2ecf20Sopenharmony_ci		goto out_unlock;
19508c2ecf20Sopenharmony_ci
19518c2ecf20Sopenharmony_ci	err = -ENETDOWN;
19528c2ecf20Sopenharmony_ci	if (!(dev->flags & IFF_UP))
19538c2ecf20Sopenharmony_ci		goto out_unlock;
19548c2ecf20Sopenharmony_ci
19558c2ecf20Sopenharmony_ci	/*
19568c2ecf20Sopenharmony_ci	 * You may not queue a frame bigger than the mtu. This is the lowest level
19578c2ecf20Sopenharmony_ci	 * raw protocol and you must do your own fragmentation at this level.
19588c2ecf20Sopenharmony_ci	 */
19598c2ecf20Sopenharmony_ci
19608c2ecf20Sopenharmony_ci	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
19618c2ecf20Sopenharmony_ci		if (!netif_supports_nofcs(dev)) {
19628c2ecf20Sopenharmony_ci			err = -EPROTONOSUPPORT;
19638c2ecf20Sopenharmony_ci			goto out_unlock;
19648c2ecf20Sopenharmony_ci		}
19658c2ecf20Sopenharmony_ci		extra_len = 4; /* We're doing our own CRC */
19668c2ecf20Sopenharmony_ci	}
19678c2ecf20Sopenharmony_ci
19688c2ecf20Sopenharmony_ci	err = -EMSGSIZE;
19698c2ecf20Sopenharmony_ci	if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len)
19708c2ecf20Sopenharmony_ci		goto out_unlock;
19718c2ecf20Sopenharmony_ci
19728c2ecf20Sopenharmony_ci	if (!skb) {
19738c2ecf20Sopenharmony_ci		size_t reserved = LL_RESERVED_SPACE(dev);
19748c2ecf20Sopenharmony_ci		int tlen = dev->needed_tailroom;
19758c2ecf20Sopenharmony_ci		unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
19768c2ecf20Sopenharmony_ci
19778c2ecf20Sopenharmony_ci		rcu_read_unlock();
19788c2ecf20Sopenharmony_ci		skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL);
19798c2ecf20Sopenharmony_ci		if (skb == NULL)
19808c2ecf20Sopenharmony_ci			return -ENOBUFS;
19818c2ecf20Sopenharmony_ci		/* FIXME: Save some space for broken drivers that write a hard
19828c2ecf20Sopenharmony_ci		 * header at transmission time by themselves. PPP is the notable
19838c2ecf20Sopenharmony_ci		 * one here. This should really be fixed at the driver level.
19848c2ecf20Sopenharmony_ci		 */
19858c2ecf20Sopenharmony_ci		skb_reserve(skb, reserved);
19868c2ecf20Sopenharmony_ci		skb_reset_network_header(skb);
19878c2ecf20Sopenharmony_ci
19888c2ecf20Sopenharmony_ci		/* Try to align data part correctly */
19898c2ecf20Sopenharmony_ci		if (hhlen) {
19908c2ecf20Sopenharmony_ci			skb->data -= hhlen;
19918c2ecf20Sopenharmony_ci			skb->tail -= hhlen;
19928c2ecf20Sopenharmony_ci			if (len < hhlen)
19938c2ecf20Sopenharmony_ci				skb_reset_network_header(skb);
19948c2ecf20Sopenharmony_ci		}
19958c2ecf20Sopenharmony_ci		err = memcpy_from_msg(skb_put(skb, len), msg, len);
19968c2ecf20Sopenharmony_ci		if (err)
19978c2ecf20Sopenharmony_ci			goto out_free;
19988c2ecf20Sopenharmony_ci		goto retry;
19998c2ecf20Sopenharmony_ci	}
20008c2ecf20Sopenharmony_ci
20018c2ecf20Sopenharmony_ci	if (!dev_validate_header(dev, skb->data, len) || !skb->len) {
20028c2ecf20Sopenharmony_ci		err = -EINVAL;
20038c2ecf20Sopenharmony_ci		goto out_unlock;
20048c2ecf20Sopenharmony_ci	}
20058c2ecf20Sopenharmony_ci	if (len > (dev->mtu + dev->hard_header_len + extra_len) &&
20068c2ecf20Sopenharmony_ci	    !packet_extra_vlan_len_allowed(dev, skb)) {
20078c2ecf20Sopenharmony_ci		err = -EMSGSIZE;
20088c2ecf20Sopenharmony_ci		goto out_unlock;
20098c2ecf20Sopenharmony_ci	}
20108c2ecf20Sopenharmony_ci
20118c2ecf20Sopenharmony_ci	sockcm_init(&sockc, sk);
20128c2ecf20Sopenharmony_ci	if (msg->msg_controllen) {
20138c2ecf20Sopenharmony_ci		err = sock_cmsg_send(sk, msg, &sockc);
20148c2ecf20Sopenharmony_ci		if (unlikely(err))
20158c2ecf20Sopenharmony_ci			goto out_unlock;
20168c2ecf20Sopenharmony_ci	}
20178c2ecf20Sopenharmony_ci
20188c2ecf20Sopenharmony_ci	skb->protocol = proto;
20198c2ecf20Sopenharmony_ci	skb->dev = dev;
20208c2ecf20Sopenharmony_ci	skb->priority = sk->sk_priority;
20218c2ecf20Sopenharmony_ci	skb->mark = sk->sk_mark;
20228c2ecf20Sopenharmony_ci	skb->tstamp = sockc.transmit_time;
20238c2ecf20Sopenharmony_ci
20248c2ecf20Sopenharmony_ci	skb_setup_tx_timestamp(skb, sockc.tsflags);
20258c2ecf20Sopenharmony_ci
20268c2ecf20Sopenharmony_ci	if (unlikely(extra_len == 4))
20278c2ecf20Sopenharmony_ci		skb->no_fcs = 1;
20288c2ecf20Sopenharmony_ci
20298c2ecf20Sopenharmony_ci	packet_parse_headers(skb, sock);
20308c2ecf20Sopenharmony_ci
20318c2ecf20Sopenharmony_ci	dev_queue_xmit(skb);
20328c2ecf20Sopenharmony_ci	rcu_read_unlock();
20338c2ecf20Sopenharmony_ci	return len;
20348c2ecf20Sopenharmony_ci
20358c2ecf20Sopenharmony_ciout_unlock:
20368c2ecf20Sopenharmony_ci	rcu_read_unlock();
20378c2ecf20Sopenharmony_ciout_free:
20388c2ecf20Sopenharmony_ci	kfree_skb(skb);
20398c2ecf20Sopenharmony_ci	return err;
20408c2ecf20Sopenharmony_ci}
20418c2ecf20Sopenharmony_ci
20428c2ecf20Sopenharmony_cistatic unsigned int run_filter(struct sk_buff *skb,
20438c2ecf20Sopenharmony_ci			       const struct sock *sk,
20448c2ecf20Sopenharmony_ci			       unsigned int res)
20458c2ecf20Sopenharmony_ci{
20468c2ecf20Sopenharmony_ci	struct sk_filter *filter;
20478c2ecf20Sopenharmony_ci
20488c2ecf20Sopenharmony_ci	rcu_read_lock();
20498c2ecf20Sopenharmony_ci	filter = rcu_dereference(sk->sk_filter);
20508c2ecf20Sopenharmony_ci	if (filter != NULL)
20518c2ecf20Sopenharmony_ci		res = bpf_prog_run_clear_cb(filter->prog, skb);
20528c2ecf20Sopenharmony_ci	rcu_read_unlock();
20538c2ecf20Sopenharmony_ci
20548c2ecf20Sopenharmony_ci	return res;
20558c2ecf20Sopenharmony_ci}
20568c2ecf20Sopenharmony_ci
20578c2ecf20Sopenharmony_cistatic int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb,
20588c2ecf20Sopenharmony_ci			   size_t *len)
20598c2ecf20Sopenharmony_ci{
20608c2ecf20Sopenharmony_ci	struct virtio_net_hdr vnet_hdr;
20618c2ecf20Sopenharmony_ci
20628c2ecf20Sopenharmony_ci	if (*len < sizeof(vnet_hdr))
20638c2ecf20Sopenharmony_ci		return -EINVAL;
20648c2ecf20Sopenharmony_ci	*len -= sizeof(vnet_hdr);
20658c2ecf20Sopenharmony_ci
20668c2ecf20Sopenharmony_ci	if (virtio_net_hdr_from_skb(skb, &vnet_hdr, vio_le(), true, 0))
20678c2ecf20Sopenharmony_ci		return -EINVAL;
20688c2ecf20Sopenharmony_ci
20698c2ecf20Sopenharmony_ci	return memcpy_to_msg(msg, (void *)&vnet_hdr, sizeof(vnet_hdr));
20708c2ecf20Sopenharmony_ci}
20718c2ecf20Sopenharmony_ci
20728c2ecf20Sopenharmony_ci/*
20738c2ecf20Sopenharmony_ci * This function makes lazy skb cloning in hope that most of packets
20748c2ecf20Sopenharmony_ci * are discarded by BPF.
20758c2ecf20Sopenharmony_ci *
20768c2ecf20Sopenharmony_ci * Note tricky part: we DO mangle shared skb! skb->data, skb->len
20778c2ecf20Sopenharmony_ci * and skb->cb are mangled. It works because (and until) packets
20788c2ecf20Sopenharmony_ci * falling here are owned by current CPU. Output packets are cloned
20798c2ecf20Sopenharmony_ci * by dev_queue_xmit_nit(), input packets are processed by net_bh
20808c2ecf20Sopenharmony_ci * sequencially, so that if we return skb to original state on exit,
20818c2ecf20Sopenharmony_ci * we will not harm anyone.
20828c2ecf20Sopenharmony_ci */
20838c2ecf20Sopenharmony_ci
20848c2ecf20Sopenharmony_cistatic int packet_rcv(struct sk_buff *skb, struct net_device *dev,
20858c2ecf20Sopenharmony_ci		      struct packet_type *pt, struct net_device *orig_dev)
20868c2ecf20Sopenharmony_ci{
20878c2ecf20Sopenharmony_ci	struct sock *sk;
20888c2ecf20Sopenharmony_ci	struct sockaddr_ll *sll;
20898c2ecf20Sopenharmony_ci	struct packet_sock *po;
20908c2ecf20Sopenharmony_ci	u8 *skb_head = skb->data;
20918c2ecf20Sopenharmony_ci	int skb_len = skb->len;
20928c2ecf20Sopenharmony_ci	unsigned int snaplen, res;
20938c2ecf20Sopenharmony_ci	bool is_drop_n_account = false;
20948c2ecf20Sopenharmony_ci
20958c2ecf20Sopenharmony_ci	if (skb->pkt_type == PACKET_LOOPBACK)
20968c2ecf20Sopenharmony_ci		goto drop;
20978c2ecf20Sopenharmony_ci
20988c2ecf20Sopenharmony_ci	sk = pt->af_packet_priv;
20998c2ecf20Sopenharmony_ci	po = pkt_sk(sk);
21008c2ecf20Sopenharmony_ci
21018c2ecf20Sopenharmony_ci	if (!net_eq(dev_net(dev), sock_net(sk)))
21028c2ecf20Sopenharmony_ci		goto drop;
21038c2ecf20Sopenharmony_ci
21048c2ecf20Sopenharmony_ci	skb->dev = dev;
21058c2ecf20Sopenharmony_ci
21068c2ecf20Sopenharmony_ci	if (dev_has_header(dev)) {
21078c2ecf20Sopenharmony_ci		/* The device has an explicit notion of ll header,
21088c2ecf20Sopenharmony_ci		 * exported to higher levels.
21098c2ecf20Sopenharmony_ci		 *
21108c2ecf20Sopenharmony_ci		 * Otherwise, the device hides details of its frame
21118c2ecf20Sopenharmony_ci		 * structure, so that corresponding packet head is
21128c2ecf20Sopenharmony_ci		 * never delivered to user.
21138c2ecf20Sopenharmony_ci		 */
21148c2ecf20Sopenharmony_ci		if (sk->sk_type != SOCK_DGRAM)
21158c2ecf20Sopenharmony_ci			skb_push(skb, skb->data - skb_mac_header(skb));
21168c2ecf20Sopenharmony_ci		else if (skb->pkt_type == PACKET_OUTGOING) {
21178c2ecf20Sopenharmony_ci			/* Special case: outgoing packets have ll header at head */
21188c2ecf20Sopenharmony_ci			skb_pull(skb, skb_network_offset(skb));
21198c2ecf20Sopenharmony_ci		}
21208c2ecf20Sopenharmony_ci	}
21218c2ecf20Sopenharmony_ci
21228c2ecf20Sopenharmony_ci	snaplen = skb->len;
21238c2ecf20Sopenharmony_ci
21248c2ecf20Sopenharmony_ci	res = run_filter(skb, sk, snaplen);
21258c2ecf20Sopenharmony_ci	if (!res)
21268c2ecf20Sopenharmony_ci		goto drop_n_restore;
21278c2ecf20Sopenharmony_ci	if (snaplen > res)
21288c2ecf20Sopenharmony_ci		snaplen = res;
21298c2ecf20Sopenharmony_ci
21308c2ecf20Sopenharmony_ci	if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
21318c2ecf20Sopenharmony_ci		goto drop_n_acct;
21328c2ecf20Sopenharmony_ci
21338c2ecf20Sopenharmony_ci	if (skb_shared(skb)) {
21348c2ecf20Sopenharmony_ci		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
21358c2ecf20Sopenharmony_ci		if (nskb == NULL)
21368c2ecf20Sopenharmony_ci			goto drop_n_acct;
21378c2ecf20Sopenharmony_ci
21388c2ecf20Sopenharmony_ci		if (skb_head != skb->data) {
21398c2ecf20Sopenharmony_ci			skb->data = skb_head;
21408c2ecf20Sopenharmony_ci			skb->len = skb_len;
21418c2ecf20Sopenharmony_ci		}
21428c2ecf20Sopenharmony_ci		consume_skb(skb);
21438c2ecf20Sopenharmony_ci		skb = nskb;
21448c2ecf20Sopenharmony_ci	}
21458c2ecf20Sopenharmony_ci
21468c2ecf20Sopenharmony_ci	sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
21478c2ecf20Sopenharmony_ci
21488c2ecf20Sopenharmony_ci	sll = &PACKET_SKB_CB(skb)->sa.ll;
21498c2ecf20Sopenharmony_ci	sll->sll_hatype = dev->type;
21508c2ecf20Sopenharmony_ci	sll->sll_pkttype = skb->pkt_type;
21518c2ecf20Sopenharmony_ci	if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
21528c2ecf20Sopenharmony_ci		sll->sll_ifindex = orig_dev->ifindex;
21538c2ecf20Sopenharmony_ci	else
21548c2ecf20Sopenharmony_ci		sll->sll_ifindex = dev->ifindex;
21558c2ecf20Sopenharmony_ci
21568c2ecf20Sopenharmony_ci	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
21578c2ecf20Sopenharmony_ci
21588c2ecf20Sopenharmony_ci	/* sll->sll_family and sll->sll_protocol are set in packet_recvmsg().
21598c2ecf20Sopenharmony_ci	 * Use their space for storing the original skb length.
21608c2ecf20Sopenharmony_ci	 */
21618c2ecf20Sopenharmony_ci	PACKET_SKB_CB(skb)->sa.origlen = skb->len;
21628c2ecf20Sopenharmony_ci
21638c2ecf20Sopenharmony_ci	if (pskb_trim(skb, snaplen))
21648c2ecf20Sopenharmony_ci		goto drop_n_acct;
21658c2ecf20Sopenharmony_ci
21668c2ecf20Sopenharmony_ci	skb_set_owner_r(skb, sk);
21678c2ecf20Sopenharmony_ci	skb->dev = NULL;
21688c2ecf20Sopenharmony_ci	skb_dst_drop(skb);
21698c2ecf20Sopenharmony_ci
21708c2ecf20Sopenharmony_ci	/* drop conntrack reference */
21718c2ecf20Sopenharmony_ci	nf_reset_ct(skb);
21728c2ecf20Sopenharmony_ci
21738c2ecf20Sopenharmony_ci	spin_lock(&sk->sk_receive_queue.lock);
21748c2ecf20Sopenharmony_ci	po->stats.stats1.tp_packets++;
21758c2ecf20Sopenharmony_ci	sock_skb_set_dropcount(sk, skb);
21768c2ecf20Sopenharmony_ci	__skb_queue_tail(&sk->sk_receive_queue, skb);
21778c2ecf20Sopenharmony_ci	spin_unlock(&sk->sk_receive_queue.lock);
21788c2ecf20Sopenharmony_ci	sk->sk_data_ready(sk);
21798c2ecf20Sopenharmony_ci	return 0;
21808c2ecf20Sopenharmony_ci
21818c2ecf20Sopenharmony_cidrop_n_acct:
21828c2ecf20Sopenharmony_ci	is_drop_n_account = true;
21838c2ecf20Sopenharmony_ci	atomic_inc(&po->tp_drops);
21848c2ecf20Sopenharmony_ci	atomic_inc(&sk->sk_drops);
21858c2ecf20Sopenharmony_ci
21868c2ecf20Sopenharmony_cidrop_n_restore:
21878c2ecf20Sopenharmony_ci	if (skb_head != skb->data && skb_shared(skb)) {
21888c2ecf20Sopenharmony_ci		skb->data = skb_head;
21898c2ecf20Sopenharmony_ci		skb->len = skb_len;
21908c2ecf20Sopenharmony_ci	}
21918c2ecf20Sopenharmony_cidrop:
21928c2ecf20Sopenharmony_ci	if (!is_drop_n_account)
21938c2ecf20Sopenharmony_ci		consume_skb(skb);
21948c2ecf20Sopenharmony_ci	else
21958c2ecf20Sopenharmony_ci		kfree_skb(skb);
21968c2ecf20Sopenharmony_ci	return 0;
21978c2ecf20Sopenharmony_ci}
21988c2ecf20Sopenharmony_ci
21998c2ecf20Sopenharmony_cistatic int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
22008c2ecf20Sopenharmony_ci		       struct packet_type *pt, struct net_device *orig_dev)
22018c2ecf20Sopenharmony_ci{
22028c2ecf20Sopenharmony_ci	struct sock *sk;
22038c2ecf20Sopenharmony_ci	struct packet_sock *po;
22048c2ecf20Sopenharmony_ci	struct sockaddr_ll *sll;
22058c2ecf20Sopenharmony_ci	union tpacket_uhdr h;
22068c2ecf20Sopenharmony_ci	u8 *skb_head = skb->data;
22078c2ecf20Sopenharmony_ci	int skb_len = skb->len;
22088c2ecf20Sopenharmony_ci	unsigned int snaplen, res;
22098c2ecf20Sopenharmony_ci	unsigned long status = TP_STATUS_USER;
22108c2ecf20Sopenharmony_ci	unsigned short macoff, hdrlen;
22118c2ecf20Sopenharmony_ci	unsigned int netoff;
22128c2ecf20Sopenharmony_ci	struct sk_buff *copy_skb = NULL;
22138c2ecf20Sopenharmony_ci	struct timespec64 ts;
22148c2ecf20Sopenharmony_ci	__u32 ts_status;
22158c2ecf20Sopenharmony_ci	bool is_drop_n_account = false;
22168c2ecf20Sopenharmony_ci	unsigned int slot_id = 0;
22178c2ecf20Sopenharmony_ci	bool do_vnet = false;
22188c2ecf20Sopenharmony_ci
22198c2ecf20Sopenharmony_ci	/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
22208c2ecf20Sopenharmony_ci	 * We may add members to them until current aligned size without forcing
22218c2ecf20Sopenharmony_ci	 * userspace to call getsockopt(..., PACKET_HDRLEN, ...).
22228c2ecf20Sopenharmony_ci	 */
22238c2ecf20Sopenharmony_ci	BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32);
22248c2ecf20Sopenharmony_ci	BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48);
22258c2ecf20Sopenharmony_ci
22268c2ecf20Sopenharmony_ci	if (skb->pkt_type == PACKET_LOOPBACK)
22278c2ecf20Sopenharmony_ci		goto drop;
22288c2ecf20Sopenharmony_ci
22298c2ecf20Sopenharmony_ci	sk = pt->af_packet_priv;
22308c2ecf20Sopenharmony_ci	po = pkt_sk(sk);
22318c2ecf20Sopenharmony_ci
22328c2ecf20Sopenharmony_ci	if (!net_eq(dev_net(dev), sock_net(sk)))
22338c2ecf20Sopenharmony_ci		goto drop;
22348c2ecf20Sopenharmony_ci
22358c2ecf20Sopenharmony_ci	if (dev_has_header(dev)) {
22368c2ecf20Sopenharmony_ci		if (sk->sk_type != SOCK_DGRAM)
22378c2ecf20Sopenharmony_ci			skb_push(skb, skb->data - skb_mac_header(skb));
22388c2ecf20Sopenharmony_ci		else if (skb->pkt_type == PACKET_OUTGOING) {
22398c2ecf20Sopenharmony_ci			/* Special case: outgoing packets have ll header at head */
22408c2ecf20Sopenharmony_ci			skb_pull(skb, skb_network_offset(skb));
22418c2ecf20Sopenharmony_ci		}
22428c2ecf20Sopenharmony_ci	}
22438c2ecf20Sopenharmony_ci
22448c2ecf20Sopenharmony_ci	snaplen = skb->len;
22458c2ecf20Sopenharmony_ci
22468c2ecf20Sopenharmony_ci	res = run_filter(skb, sk, snaplen);
22478c2ecf20Sopenharmony_ci	if (!res)
22488c2ecf20Sopenharmony_ci		goto drop_n_restore;
22498c2ecf20Sopenharmony_ci
22508c2ecf20Sopenharmony_ci	/* If we are flooded, just give up */
22518c2ecf20Sopenharmony_ci	if (__packet_rcv_has_room(po, skb) == ROOM_NONE) {
22528c2ecf20Sopenharmony_ci		atomic_inc(&po->tp_drops);
22538c2ecf20Sopenharmony_ci		goto drop_n_restore;
22548c2ecf20Sopenharmony_ci	}
22558c2ecf20Sopenharmony_ci
22568c2ecf20Sopenharmony_ci	if (skb->ip_summed == CHECKSUM_PARTIAL)
22578c2ecf20Sopenharmony_ci		status |= TP_STATUS_CSUMNOTREADY;
22588c2ecf20Sopenharmony_ci	else if (skb->pkt_type != PACKET_OUTGOING &&
22598c2ecf20Sopenharmony_ci		 skb_csum_unnecessary(skb))
22608c2ecf20Sopenharmony_ci		status |= TP_STATUS_CSUM_VALID;
22618c2ecf20Sopenharmony_ci
22628c2ecf20Sopenharmony_ci	if (snaplen > res)
22638c2ecf20Sopenharmony_ci		snaplen = res;
22648c2ecf20Sopenharmony_ci
22658c2ecf20Sopenharmony_ci	if (sk->sk_type == SOCK_DGRAM) {
22668c2ecf20Sopenharmony_ci		macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
22678c2ecf20Sopenharmony_ci				  po->tp_reserve;
22688c2ecf20Sopenharmony_ci	} else {
22698c2ecf20Sopenharmony_ci		unsigned int maclen = skb_network_offset(skb);
22708c2ecf20Sopenharmony_ci		netoff = TPACKET_ALIGN(po->tp_hdrlen +
22718c2ecf20Sopenharmony_ci				       (maclen < 16 ? 16 : maclen)) +
22728c2ecf20Sopenharmony_ci				       po->tp_reserve;
22738c2ecf20Sopenharmony_ci		if (po->has_vnet_hdr) {
22748c2ecf20Sopenharmony_ci			netoff += sizeof(struct virtio_net_hdr);
22758c2ecf20Sopenharmony_ci			do_vnet = true;
22768c2ecf20Sopenharmony_ci		}
22778c2ecf20Sopenharmony_ci		macoff = netoff - maclen;
22788c2ecf20Sopenharmony_ci	}
22798c2ecf20Sopenharmony_ci	if (netoff > USHRT_MAX) {
22808c2ecf20Sopenharmony_ci		atomic_inc(&po->tp_drops);
22818c2ecf20Sopenharmony_ci		goto drop_n_restore;
22828c2ecf20Sopenharmony_ci	}
22838c2ecf20Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
22848c2ecf20Sopenharmony_ci		if (macoff + snaplen > po->rx_ring.frame_size) {
22858c2ecf20Sopenharmony_ci			if (po->copy_thresh &&
22868c2ecf20Sopenharmony_ci			    atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) {
22878c2ecf20Sopenharmony_ci				if (skb_shared(skb)) {
22888c2ecf20Sopenharmony_ci					copy_skb = skb_clone(skb, GFP_ATOMIC);
22898c2ecf20Sopenharmony_ci				} else {
22908c2ecf20Sopenharmony_ci					copy_skb = skb_get(skb);
22918c2ecf20Sopenharmony_ci					skb_head = skb->data;
22928c2ecf20Sopenharmony_ci				}
22938c2ecf20Sopenharmony_ci				if (copy_skb) {
22948c2ecf20Sopenharmony_ci					memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
22958c2ecf20Sopenharmony_ci					       sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
22968c2ecf20Sopenharmony_ci					skb_set_owner_r(copy_skb, sk);
22978c2ecf20Sopenharmony_ci				}
22988c2ecf20Sopenharmony_ci			}
22998c2ecf20Sopenharmony_ci			snaplen = po->rx_ring.frame_size - macoff;
23008c2ecf20Sopenharmony_ci			if ((int)snaplen < 0) {
23018c2ecf20Sopenharmony_ci				snaplen = 0;
23028c2ecf20Sopenharmony_ci				do_vnet = false;
23038c2ecf20Sopenharmony_ci			}
23048c2ecf20Sopenharmony_ci		}
23058c2ecf20Sopenharmony_ci	} else if (unlikely(macoff + snaplen >
23068c2ecf20Sopenharmony_ci			    GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
23078c2ecf20Sopenharmony_ci		u32 nval;
23088c2ecf20Sopenharmony_ci
23098c2ecf20Sopenharmony_ci		nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff;
23108c2ecf20Sopenharmony_ci		pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n",
23118c2ecf20Sopenharmony_ci			    snaplen, nval, macoff);
23128c2ecf20Sopenharmony_ci		snaplen = nval;
23138c2ecf20Sopenharmony_ci		if (unlikely((int)snaplen < 0)) {
23148c2ecf20Sopenharmony_ci			snaplen = 0;
23158c2ecf20Sopenharmony_ci			macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
23168c2ecf20Sopenharmony_ci			do_vnet = false;
23178c2ecf20Sopenharmony_ci		}
23188c2ecf20Sopenharmony_ci	}
23198c2ecf20Sopenharmony_ci	spin_lock(&sk->sk_receive_queue.lock);
23208c2ecf20Sopenharmony_ci	h.raw = packet_current_rx_frame(po, skb,
23218c2ecf20Sopenharmony_ci					TP_STATUS_KERNEL, (macoff+snaplen));
23228c2ecf20Sopenharmony_ci	if (!h.raw)
23238c2ecf20Sopenharmony_ci		goto drop_n_account;
23248c2ecf20Sopenharmony_ci
23258c2ecf20Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
23268c2ecf20Sopenharmony_ci		slot_id = po->rx_ring.head;
23278c2ecf20Sopenharmony_ci		if (test_bit(slot_id, po->rx_ring.rx_owner_map))
23288c2ecf20Sopenharmony_ci			goto drop_n_account;
23298c2ecf20Sopenharmony_ci		__set_bit(slot_id, po->rx_ring.rx_owner_map);
23308c2ecf20Sopenharmony_ci	}
23318c2ecf20Sopenharmony_ci
23328c2ecf20Sopenharmony_ci	if (do_vnet &&
23338c2ecf20Sopenharmony_ci	    virtio_net_hdr_from_skb(skb, h.raw + macoff -
23348c2ecf20Sopenharmony_ci				    sizeof(struct virtio_net_hdr),
23358c2ecf20Sopenharmony_ci				    vio_le(), true, 0)) {
23368c2ecf20Sopenharmony_ci		if (po->tp_version == TPACKET_V3)
23378c2ecf20Sopenharmony_ci			prb_clear_blk_fill_status(&po->rx_ring);
23388c2ecf20Sopenharmony_ci		goto drop_n_account;
23398c2ecf20Sopenharmony_ci	}
23408c2ecf20Sopenharmony_ci
23418c2ecf20Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
23428c2ecf20Sopenharmony_ci		packet_increment_rx_head(po, &po->rx_ring);
23438c2ecf20Sopenharmony_ci	/*
23448c2ecf20Sopenharmony_ci	 * LOSING will be reported till you read the stats,
23458c2ecf20Sopenharmony_ci	 * because it's COR - Clear On Read.
23468c2ecf20Sopenharmony_ci	 * Anyways, moving it for V1/V2 only as V3 doesn't need this
23478c2ecf20Sopenharmony_ci	 * at packet level.
23488c2ecf20Sopenharmony_ci	 */
23498c2ecf20Sopenharmony_ci		if (atomic_read(&po->tp_drops))
23508c2ecf20Sopenharmony_ci			status |= TP_STATUS_LOSING;
23518c2ecf20Sopenharmony_ci	}
23528c2ecf20Sopenharmony_ci
23538c2ecf20Sopenharmony_ci	po->stats.stats1.tp_packets++;
23548c2ecf20Sopenharmony_ci	if (copy_skb) {
23558c2ecf20Sopenharmony_ci		status |= TP_STATUS_COPY;
23568c2ecf20Sopenharmony_ci		__skb_queue_tail(&sk->sk_receive_queue, copy_skb);
23578c2ecf20Sopenharmony_ci	}
23588c2ecf20Sopenharmony_ci	spin_unlock(&sk->sk_receive_queue.lock);
23598c2ecf20Sopenharmony_ci
23608c2ecf20Sopenharmony_ci	skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
23618c2ecf20Sopenharmony_ci
23628c2ecf20Sopenharmony_ci	/* Always timestamp; prefer an existing software timestamp taken
23638c2ecf20Sopenharmony_ci	 * closer to the time of capture.
23648c2ecf20Sopenharmony_ci	 */
23658c2ecf20Sopenharmony_ci	ts_status = tpacket_get_timestamp(skb, &ts,
23668c2ecf20Sopenharmony_ci					  po->tp_tstamp | SOF_TIMESTAMPING_SOFTWARE);
23678c2ecf20Sopenharmony_ci	if (!ts_status)
23688c2ecf20Sopenharmony_ci		ktime_get_real_ts64(&ts);
23698c2ecf20Sopenharmony_ci
23708c2ecf20Sopenharmony_ci	status |= ts_status;
23718c2ecf20Sopenharmony_ci
23728c2ecf20Sopenharmony_ci	switch (po->tp_version) {
23738c2ecf20Sopenharmony_ci	case TPACKET_V1:
23748c2ecf20Sopenharmony_ci		h.h1->tp_len = skb->len;
23758c2ecf20Sopenharmony_ci		h.h1->tp_snaplen = snaplen;
23768c2ecf20Sopenharmony_ci		h.h1->tp_mac = macoff;
23778c2ecf20Sopenharmony_ci		h.h1->tp_net = netoff;
23788c2ecf20Sopenharmony_ci		h.h1->tp_sec = ts.tv_sec;
23798c2ecf20Sopenharmony_ci		h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC;
23808c2ecf20Sopenharmony_ci		hdrlen = sizeof(*h.h1);
23818c2ecf20Sopenharmony_ci		break;
23828c2ecf20Sopenharmony_ci	case TPACKET_V2:
23838c2ecf20Sopenharmony_ci		h.h2->tp_len = skb->len;
23848c2ecf20Sopenharmony_ci		h.h2->tp_snaplen = snaplen;
23858c2ecf20Sopenharmony_ci		h.h2->tp_mac = macoff;
23868c2ecf20Sopenharmony_ci		h.h2->tp_net = netoff;
23878c2ecf20Sopenharmony_ci		h.h2->tp_sec = ts.tv_sec;
23888c2ecf20Sopenharmony_ci		h.h2->tp_nsec = ts.tv_nsec;
23898c2ecf20Sopenharmony_ci		if (skb_vlan_tag_present(skb)) {
23908c2ecf20Sopenharmony_ci			h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
23918c2ecf20Sopenharmony_ci			h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
23928c2ecf20Sopenharmony_ci			status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
23938c2ecf20Sopenharmony_ci		} else {
23948c2ecf20Sopenharmony_ci			h.h2->tp_vlan_tci = 0;
23958c2ecf20Sopenharmony_ci			h.h2->tp_vlan_tpid = 0;
23968c2ecf20Sopenharmony_ci		}
23978c2ecf20Sopenharmony_ci		memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding));
23988c2ecf20Sopenharmony_ci		hdrlen = sizeof(*h.h2);
23998c2ecf20Sopenharmony_ci		break;
24008c2ecf20Sopenharmony_ci	case TPACKET_V3:
24018c2ecf20Sopenharmony_ci		/* tp_nxt_offset,vlan are already populated above.
24028c2ecf20Sopenharmony_ci		 * So DONT clear those fields here
24038c2ecf20Sopenharmony_ci		 */
24048c2ecf20Sopenharmony_ci		h.h3->tp_status |= status;
24058c2ecf20Sopenharmony_ci		h.h3->tp_len = skb->len;
24068c2ecf20Sopenharmony_ci		h.h3->tp_snaplen = snaplen;
24078c2ecf20Sopenharmony_ci		h.h3->tp_mac = macoff;
24088c2ecf20Sopenharmony_ci		h.h3->tp_net = netoff;
24098c2ecf20Sopenharmony_ci		h.h3->tp_sec  = ts.tv_sec;
24108c2ecf20Sopenharmony_ci		h.h3->tp_nsec = ts.tv_nsec;
24118c2ecf20Sopenharmony_ci		memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding));
24128c2ecf20Sopenharmony_ci		hdrlen = sizeof(*h.h3);
24138c2ecf20Sopenharmony_ci		break;
24148c2ecf20Sopenharmony_ci	default:
24158c2ecf20Sopenharmony_ci		BUG();
24168c2ecf20Sopenharmony_ci	}
24178c2ecf20Sopenharmony_ci
24188c2ecf20Sopenharmony_ci	sll = h.raw + TPACKET_ALIGN(hdrlen);
24198c2ecf20Sopenharmony_ci	sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
24208c2ecf20Sopenharmony_ci	sll->sll_family = AF_PACKET;
24218c2ecf20Sopenharmony_ci	sll->sll_hatype = dev->type;
24228c2ecf20Sopenharmony_ci	sll->sll_protocol = skb->protocol;
24238c2ecf20Sopenharmony_ci	sll->sll_pkttype = skb->pkt_type;
24248c2ecf20Sopenharmony_ci	if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV)))
24258c2ecf20Sopenharmony_ci		sll->sll_ifindex = orig_dev->ifindex;
24268c2ecf20Sopenharmony_ci	else
24278c2ecf20Sopenharmony_ci		sll->sll_ifindex = dev->ifindex;
24288c2ecf20Sopenharmony_ci
24298c2ecf20Sopenharmony_ci	smp_mb();
24308c2ecf20Sopenharmony_ci
24318c2ecf20Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
24328c2ecf20Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
24338c2ecf20Sopenharmony_ci		u8 *start, *end;
24348c2ecf20Sopenharmony_ci
24358c2ecf20Sopenharmony_ci		end = (u8 *) PAGE_ALIGN((unsigned long) h.raw +
24368c2ecf20Sopenharmony_ci					macoff + snaplen);
24378c2ecf20Sopenharmony_ci
24388c2ecf20Sopenharmony_ci		for (start = h.raw; start < end; start += PAGE_SIZE)
24398c2ecf20Sopenharmony_ci			flush_dcache_page(pgv_to_page(start));
24408c2ecf20Sopenharmony_ci	}
24418c2ecf20Sopenharmony_ci	smp_wmb();
24428c2ecf20Sopenharmony_ci#endif
24438c2ecf20Sopenharmony_ci
24448c2ecf20Sopenharmony_ci	if (po->tp_version <= TPACKET_V2) {
24458c2ecf20Sopenharmony_ci		spin_lock(&sk->sk_receive_queue.lock);
24468c2ecf20Sopenharmony_ci		__packet_set_status(po, h.raw, status);
24478c2ecf20Sopenharmony_ci		__clear_bit(slot_id, po->rx_ring.rx_owner_map);
24488c2ecf20Sopenharmony_ci		spin_unlock(&sk->sk_receive_queue.lock);
24498c2ecf20Sopenharmony_ci		sk->sk_data_ready(sk);
24508c2ecf20Sopenharmony_ci	} else if (po->tp_version == TPACKET_V3) {
24518c2ecf20Sopenharmony_ci		prb_clear_blk_fill_status(&po->rx_ring);
24528c2ecf20Sopenharmony_ci	}
24538c2ecf20Sopenharmony_ci
24548c2ecf20Sopenharmony_cidrop_n_restore:
24558c2ecf20Sopenharmony_ci	if (skb_head != skb->data && skb_shared(skb)) {
24568c2ecf20Sopenharmony_ci		skb->data = skb_head;
24578c2ecf20Sopenharmony_ci		skb->len = skb_len;
24588c2ecf20Sopenharmony_ci	}
24598c2ecf20Sopenharmony_cidrop:
24608c2ecf20Sopenharmony_ci	if (!is_drop_n_account)
24618c2ecf20Sopenharmony_ci		consume_skb(skb);
24628c2ecf20Sopenharmony_ci	else
24638c2ecf20Sopenharmony_ci		kfree_skb(skb);
24648c2ecf20Sopenharmony_ci	return 0;
24658c2ecf20Sopenharmony_ci
24668c2ecf20Sopenharmony_cidrop_n_account:
24678c2ecf20Sopenharmony_ci	spin_unlock(&sk->sk_receive_queue.lock);
24688c2ecf20Sopenharmony_ci	atomic_inc(&po->tp_drops);
24698c2ecf20Sopenharmony_ci	is_drop_n_account = true;
24708c2ecf20Sopenharmony_ci
24718c2ecf20Sopenharmony_ci	sk->sk_data_ready(sk);
24728c2ecf20Sopenharmony_ci	kfree_skb(copy_skb);
24738c2ecf20Sopenharmony_ci	goto drop_n_restore;
24748c2ecf20Sopenharmony_ci}
24758c2ecf20Sopenharmony_ci
24768c2ecf20Sopenharmony_cistatic void tpacket_destruct_skb(struct sk_buff *skb)
24778c2ecf20Sopenharmony_ci{
24788c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(skb->sk);
24798c2ecf20Sopenharmony_ci
24808c2ecf20Sopenharmony_ci	if (likely(po->tx_ring.pg_vec)) {
24818c2ecf20Sopenharmony_ci		void *ph;
24828c2ecf20Sopenharmony_ci		__u32 ts;
24838c2ecf20Sopenharmony_ci
24848c2ecf20Sopenharmony_ci		ph = skb_zcopy_get_nouarg(skb);
24858c2ecf20Sopenharmony_ci		packet_dec_pending(&po->tx_ring);
24868c2ecf20Sopenharmony_ci
24878c2ecf20Sopenharmony_ci		ts = __packet_set_timestamp(po, ph, skb);
24888c2ecf20Sopenharmony_ci		__packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts);
24898c2ecf20Sopenharmony_ci
24908c2ecf20Sopenharmony_ci		if (!packet_read_pending(&po->tx_ring))
24918c2ecf20Sopenharmony_ci			complete(&po->skb_completion);
24928c2ecf20Sopenharmony_ci	}
24938c2ecf20Sopenharmony_ci
24948c2ecf20Sopenharmony_ci	sock_wfree(skb);
24958c2ecf20Sopenharmony_ci}
24968c2ecf20Sopenharmony_ci
24978c2ecf20Sopenharmony_cistatic int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len)
24988c2ecf20Sopenharmony_ci{
24998c2ecf20Sopenharmony_ci	if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
25008c2ecf20Sopenharmony_ci	    (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
25018c2ecf20Sopenharmony_ci	     __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 >
25028c2ecf20Sopenharmony_ci	      __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len)))
25038c2ecf20Sopenharmony_ci		vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(),
25048c2ecf20Sopenharmony_ci			 __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) +
25058c2ecf20Sopenharmony_ci			__virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2);
25068c2ecf20Sopenharmony_ci
25078c2ecf20Sopenharmony_ci	if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len)
25088c2ecf20Sopenharmony_ci		return -EINVAL;
25098c2ecf20Sopenharmony_ci
25108c2ecf20Sopenharmony_ci	return 0;
25118c2ecf20Sopenharmony_ci}
25128c2ecf20Sopenharmony_ci
25138c2ecf20Sopenharmony_cistatic int packet_snd_vnet_parse(struct msghdr *msg, size_t *len,
25148c2ecf20Sopenharmony_ci				 struct virtio_net_hdr *vnet_hdr)
25158c2ecf20Sopenharmony_ci{
25168c2ecf20Sopenharmony_ci	if (*len < sizeof(*vnet_hdr))
25178c2ecf20Sopenharmony_ci		return -EINVAL;
25188c2ecf20Sopenharmony_ci	*len -= sizeof(*vnet_hdr);
25198c2ecf20Sopenharmony_ci
25208c2ecf20Sopenharmony_ci	if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter))
25218c2ecf20Sopenharmony_ci		return -EFAULT;
25228c2ecf20Sopenharmony_ci
25238c2ecf20Sopenharmony_ci	return __packet_snd_vnet_parse(vnet_hdr, *len);
25248c2ecf20Sopenharmony_ci}
25258c2ecf20Sopenharmony_ci
25268c2ecf20Sopenharmony_cistatic int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
25278c2ecf20Sopenharmony_ci		void *frame, struct net_device *dev, void *data, int tp_len,
25288c2ecf20Sopenharmony_ci		__be16 proto, unsigned char *addr, int hlen, int copylen,
25298c2ecf20Sopenharmony_ci		const struct sockcm_cookie *sockc)
25308c2ecf20Sopenharmony_ci{
25318c2ecf20Sopenharmony_ci	union tpacket_uhdr ph;
25328c2ecf20Sopenharmony_ci	int to_write, offset, len, nr_frags, len_max;
25338c2ecf20Sopenharmony_ci	struct socket *sock = po->sk.sk_socket;
25348c2ecf20Sopenharmony_ci	struct page *page;
25358c2ecf20Sopenharmony_ci	int err;
25368c2ecf20Sopenharmony_ci
25378c2ecf20Sopenharmony_ci	ph.raw = frame;
25388c2ecf20Sopenharmony_ci
25398c2ecf20Sopenharmony_ci	skb->protocol = proto;
25408c2ecf20Sopenharmony_ci	skb->dev = dev;
25418c2ecf20Sopenharmony_ci	skb->priority = po->sk.sk_priority;
25428c2ecf20Sopenharmony_ci	skb->mark = po->sk.sk_mark;
25438c2ecf20Sopenharmony_ci	skb->tstamp = sockc->transmit_time;
25448c2ecf20Sopenharmony_ci	skb_setup_tx_timestamp(skb, sockc->tsflags);
25458c2ecf20Sopenharmony_ci	skb_zcopy_set_nouarg(skb, ph.raw);
25468c2ecf20Sopenharmony_ci
25478c2ecf20Sopenharmony_ci	skb_reserve(skb, hlen);
25488c2ecf20Sopenharmony_ci	skb_reset_network_header(skb);
25498c2ecf20Sopenharmony_ci
25508c2ecf20Sopenharmony_ci	to_write = tp_len;
25518c2ecf20Sopenharmony_ci
25528c2ecf20Sopenharmony_ci	if (sock->type == SOCK_DGRAM) {
25538c2ecf20Sopenharmony_ci		err = dev_hard_header(skb, dev, ntohs(proto), addr,
25548c2ecf20Sopenharmony_ci				NULL, tp_len);
25558c2ecf20Sopenharmony_ci		if (unlikely(err < 0))
25568c2ecf20Sopenharmony_ci			return -EINVAL;
25578c2ecf20Sopenharmony_ci	} else if (copylen) {
25588c2ecf20Sopenharmony_ci		int hdrlen = min_t(int, copylen, tp_len);
25598c2ecf20Sopenharmony_ci
25608c2ecf20Sopenharmony_ci		skb_push(skb, dev->hard_header_len);
25618c2ecf20Sopenharmony_ci		skb_put(skb, copylen - dev->hard_header_len);
25628c2ecf20Sopenharmony_ci		err = skb_store_bits(skb, 0, data, hdrlen);
25638c2ecf20Sopenharmony_ci		if (unlikely(err))
25648c2ecf20Sopenharmony_ci			return err;
25658c2ecf20Sopenharmony_ci		if (!dev_validate_header(dev, skb->data, hdrlen))
25668c2ecf20Sopenharmony_ci			return -EINVAL;
25678c2ecf20Sopenharmony_ci
25688c2ecf20Sopenharmony_ci		data += hdrlen;
25698c2ecf20Sopenharmony_ci		to_write -= hdrlen;
25708c2ecf20Sopenharmony_ci	}
25718c2ecf20Sopenharmony_ci
25728c2ecf20Sopenharmony_ci	offset = offset_in_page(data);
25738c2ecf20Sopenharmony_ci	len_max = PAGE_SIZE - offset;
25748c2ecf20Sopenharmony_ci	len = ((to_write > len_max) ? len_max : to_write);
25758c2ecf20Sopenharmony_ci
25768c2ecf20Sopenharmony_ci	skb->data_len = to_write;
25778c2ecf20Sopenharmony_ci	skb->len += to_write;
25788c2ecf20Sopenharmony_ci	skb->truesize += to_write;
25798c2ecf20Sopenharmony_ci	refcount_add(to_write, &po->sk.sk_wmem_alloc);
25808c2ecf20Sopenharmony_ci
25818c2ecf20Sopenharmony_ci	while (likely(to_write)) {
25828c2ecf20Sopenharmony_ci		nr_frags = skb_shinfo(skb)->nr_frags;
25838c2ecf20Sopenharmony_ci
25848c2ecf20Sopenharmony_ci		if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
25858c2ecf20Sopenharmony_ci			pr_err("Packet exceed the number of skb frags(%lu)\n",
25868c2ecf20Sopenharmony_ci			       MAX_SKB_FRAGS);
25878c2ecf20Sopenharmony_ci			return -EFAULT;
25888c2ecf20Sopenharmony_ci		}
25898c2ecf20Sopenharmony_ci
25908c2ecf20Sopenharmony_ci		page = pgv_to_page(data);
25918c2ecf20Sopenharmony_ci		data += len;
25928c2ecf20Sopenharmony_ci		flush_dcache_page(page);
25938c2ecf20Sopenharmony_ci		get_page(page);
25948c2ecf20Sopenharmony_ci		skb_fill_page_desc(skb, nr_frags, page, offset, len);
25958c2ecf20Sopenharmony_ci		to_write -= len;
25968c2ecf20Sopenharmony_ci		offset = 0;
25978c2ecf20Sopenharmony_ci		len_max = PAGE_SIZE;
25988c2ecf20Sopenharmony_ci		len = ((to_write > len_max) ? len_max : to_write);
25998c2ecf20Sopenharmony_ci	}
26008c2ecf20Sopenharmony_ci
26018c2ecf20Sopenharmony_ci	packet_parse_headers(skb, sock);
26028c2ecf20Sopenharmony_ci
26038c2ecf20Sopenharmony_ci	return tp_len;
26048c2ecf20Sopenharmony_ci}
26058c2ecf20Sopenharmony_ci
26068c2ecf20Sopenharmony_cistatic int tpacket_parse_header(struct packet_sock *po, void *frame,
26078c2ecf20Sopenharmony_ci				int size_max, void **data)
26088c2ecf20Sopenharmony_ci{
26098c2ecf20Sopenharmony_ci	union tpacket_uhdr ph;
26108c2ecf20Sopenharmony_ci	int tp_len, off;
26118c2ecf20Sopenharmony_ci
26128c2ecf20Sopenharmony_ci	ph.raw = frame;
26138c2ecf20Sopenharmony_ci
26148c2ecf20Sopenharmony_ci	switch (po->tp_version) {
26158c2ecf20Sopenharmony_ci	case TPACKET_V3:
26168c2ecf20Sopenharmony_ci		if (ph.h3->tp_next_offset != 0) {
26178c2ecf20Sopenharmony_ci			pr_warn_once("variable sized slot not supported");
26188c2ecf20Sopenharmony_ci			return -EINVAL;
26198c2ecf20Sopenharmony_ci		}
26208c2ecf20Sopenharmony_ci		tp_len = ph.h3->tp_len;
26218c2ecf20Sopenharmony_ci		break;
26228c2ecf20Sopenharmony_ci	case TPACKET_V2:
26238c2ecf20Sopenharmony_ci		tp_len = ph.h2->tp_len;
26248c2ecf20Sopenharmony_ci		break;
26258c2ecf20Sopenharmony_ci	default:
26268c2ecf20Sopenharmony_ci		tp_len = ph.h1->tp_len;
26278c2ecf20Sopenharmony_ci		break;
26288c2ecf20Sopenharmony_ci	}
26298c2ecf20Sopenharmony_ci	if (unlikely(tp_len > size_max)) {
26308c2ecf20Sopenharmony_ci		pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
26318c2ecf20Sopenharmony_ci		return -EMSGSIZE;
26328c2ecf20Sopenharmony_ci	}
26338c2ecf20Sopenharmony_ci
26348c2ecf20Sopenharmony_ci	if (unlikely(po->tp_tx_has_off)) {
26358c2ecf20Sopenharmony_ci		int off_min, off_max;
26368c2ecf20Sopenharmony_ci
26378c2ecf20Sopenharmony_ci		off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll);
26388c2ecf20Sopenharmony_ci		off_max = po->tx_ring.frame_size - tp_len;
26398c2ecf20Sopenharmony_ci		if (po->sk.sk_type == SOCK_DGRAM) {
26408c2ecf20Sopenharmony_ci			switch (po->tp_version) {
26418c2ecf20Sopenharmony_ci			case TPACKET_V3:
26428c2ecf20Sopenharmony_ci				off = ph.h3->tp_net;
26438c2ecf20Sopenharmony_ci				break;
26448c2ecf20Sopenharmony_ci			case TPACKET_V2:
26458c2ecf20Sopenharmony_ci				off = ph.h2->tp_net;
26468c2ecf20Sopenharmony_ci				break;
26478c2ecf20Sopenharmony_ci			default:
26488c2ecf20Sopenharmony_ci				off = ph.h1->tp_net;
26498c2ecf20Sopenharmony_ci				break;
26508c2ecf20Sopenharmony_ci			}
26518c2ecf20Sopenharmony_ci		} else {
26528c2ecf20Sopenharmony_ci			switch (po->tp_version) {
26538c2ecf20Sopenharmony_ci			case TPACKET_V3:
26548c2ecf20Sopenharmony_ci				off = ph.h3->tp_mac;
26558c2ecf20Sopenharmony_ci				break;
26568c2ecf20Sopenharmony_ci			case TPACKET_V2:
26578c2ecf20Sopenharmony_ci				off = ph.h2->tp_mac;
26588c2ecf20Sopenharmony_ci				break;
26598c2ecf20Sopenharmony_ci			default:
26608c2ecf20Sopenharmony_ci				off = ph.h1->tp_mac;
26618c2ecf20Sopenharmony_ci				break;
26628c2ecf20Sopenharmony_ci			}
26638c2ecf20Sopenharmony_ci		}
26648c2ecf20Sopenharmony_ci		if (unlikely((off < off_min) || (off_max < off)))
26658c2ecf20Sopenharmony_ci			return -EINVAL;
26668c2ecf20Sopenharmony_ci	} else {
26678c2ecf20Sopenharmony_ci		off = po->tp_hdrlen - sizeof(struct sockaddr_ll);
26688c2ecf20Sopenharmony_ci	}
26698c2ecf20Sopenharmony_ci
26708c2ecf20Sopenharmony_ci	*data = frame + off;
26718c2ecf20Sopenharmony_ci	return tp_len;
26728c2ecf20Sopenharmony_ci}
26738c2ecf20Sopenharmony_ci
26748c2ecf20Sopenharmony_cistatic int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
26758c2ecf20Sopenharmony_ci{
26768c2ecf20Sopenharmony_ci	struct sk_buff *skb = NULL;
26778c2ecf20Sopenharmony_ci	struct net_device *dev;
26788c2ecf20Sopenharmony_ci	struct virtio_net_hdr *vnet_hdr = NULL;
26798c2ecf20Sopenharmony_ci	struct sockcm_cookie sockc;
26808c2ecf20Sopenharmony_ci	__be16 proto;
26818c2ecf20Sopenharmony_ci	int err, reserve = 0;
26828c2ecf20Sopenharmony_ci	void *ph;
26838c2ecf20Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
26848c2ecf20Sopenharmony_ci	bool need_wait = !(msg->msg_flags & MSG_DONTWAIT);
26858c2ecf20Sopenharmony_ci	unsigned char *addr = NULL;
26868c2ecf20Sopenharmony_ci	int tp_len, size_max;
26878c2ecf20Sopenharmony_ci	void *data;
26888c2ecf20Sopenharmony_ci	int len_sum = 0;
26898c2ecf20Sopenharmony_ci	int status = TP_STATUS_AVAILABLE;
26908c2ecf20Sopenharmony_ci	int hlen, tlen, copylen = 0;
26918c2ecf20Sopenharmony_ci	long timeo = 0;
26928c2ecf20Sopenharmony_ci
26938c2ecf20Sopenharmony_ci	mutex_lock(&po->pg_vec_lock);
26948c2ecf20Sopenharmony_ci
26958c2ecf20Sopenharmony_ci	/* packet_sendmsg() check on tx_ring.pg_vec was lockless,
26968c2ecf20Sopenharmony_ci	 * we need to confirm it under protection of pg_vec_lock.
26978c2ecf20Sopenharmony_ci	 */
26988c2ecf20Sopenharmony_ci	if (unlikely(!po->tx_ring.pg_vec)) {
26998c2ecf20Sopenharmony_ci		err = -EBUSY;
27008c2ecf20Sopenharmony_ci		goto out;
27018c2ecf20Sopenharmony_ci	}
27028c2ecf20Sopenharmony_ci	if (likely(saddr == NULL)) {
27038c2ecf20Sopenharmony_ci		dev	= packet_cached_dev_get(po);
27048c2ecf20Sopenharmony_ci		proto	= READ_ONCE(po->num);
27058c2ecf20Sopenharmony_ci	} else {
27068c2ecf20Sopenharmony_ci		err = -EINVAL;
27078c2ecf20Sopenharmony_ci		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
27088c2ecf20Sopenharmony_ci			goto out;
27098c2ecf20Sopenharmony_ci		if (msg->msg_namelen < (saddr->sll_halen
27108c2ecf20Sopenharmony_ci					+ offsetof(struct sockaddr_ll,
27118c2ecf20Sopenharmony_ci						sll_addr)))
27128c2ecf20Sopenharmony_ci			goto out;
27138c2ecf20Sopenharmony_ci		proto	= saddr->sll_protocol;
27148c2ecf20Sopenharmony_ci		dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex);
27158c2ecf20Sopenharmony_ci		if (po->sk.sk_socket->type == SOCK_DGRAM) {
27168c2ecf20Sopenharmony_ci			if (dev && msg->msg_namelen < dev->addr_len +
27178c2ecf20Sopenharmony_ci				   offsetof(struct sockaddr_ll, sll_addr))
27188c2ecf20Sopenharmony_ci				goto out_put;
27198c2ecf20Sopenharmony_ci			addr = saddr->sll_addr;
27208c2ecf20Sopenharmony_ci		}
27218c2ecf20Sopenharmony_ci	}
27228c2ecf20Sopenharmony_ci
27238c2ecf20Sopenharmony_ci	err = -ENXIO;
27248c2ecf20Sopenharmony_ci	if (unlikely(dev == NULL))
27258c2ecf20Sopenharmony_ci		goto out;
27268c2ecf20Sopenharmony_ci	err = -ENETDOWN;
27278c2ecf20Sopenharmony_ci	if (unlikely(!(dev->flags & IFF_UP)))
27288c2ecf20Sopenharmony_ci		goto out_put;
27298c2ecf20Sopenharmony_ci
27308c2ecf20Sopenharmony_ci	sockcm_init(&sockc, &po->sk);
27318c2ecf20Sopenharmony_ci	if (msg->msg_controllen) {
27328c2ecf20Sopenharmony_ci		err = sock_cmsg_send(&po->sk, msg, &sockc);
27338c2ecf20Sopenharmony_ci		if (unlikely(err))
27348c2ecf20Sopenharmony_ci			goto out_put;
27358c2ecf20Sopenharmony_ci	}
27368c2ecf20Sopenharmony_ci
27378c2ecf20Sopenharmony_ci	if (po->sk.sk_socket->type == SOCK_RAW)
27388c2ecf20Sopenharmony_ci		reserve = dev->hard_header_len;
27398c2ecf20Sopenharmony_ci	size_max = po->tx_ring.frame_size
27408c2ecf20Sopenharmony_ci		- (po->tp_hdrlen - sizeof(struct sockaddr_ll));
27418c2ecf20Sopenharmony_ci
27428c2ecf20Sopenharmony_ci	if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !po->has_vnet_hdr)
27438c2ecf20Sopenharmony_ci		size_max = dev->mtu + reserve + VLAN_HLEN;
27448c2ecf20Sopenharmony_ci
27458c2ecf20Sopenharmony_ci	reinit_completion(&po->skb_completion);
27468c2ecf20Sopenharmony_ci
27478c2ecf20Sopenharmony_ci	do {
27488c2ecf20Sopenharmony_ci		ph = packet_current_frame(po, &po->tx_ring,
27498c2ecf20Sopenharmony_ci					  TP_STATUS_SEND_REQUEST);
27508c2ecf20Sopenharmony_ci		if (unlikely(ph == NULL)) {
27518c2ecf20Sopenharmony_ci			if (need_wait && skb) {
27528c2ecf20Sopenharmony_ci				timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT);
27538c2ecf20Sopenharmony_ci				timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo);
27548c2ecf20Sopenharmony_ci				if (timeo <= 0) {
27558c2ecf20Sopenharmony_ci					err = !timeo ? -ETIMEDOUT : -ERESTARTSYS;
27568c2ecf20Sopenharmony_ci					goto out_put;
27578c2ecf20Sopenharmony_ci				}
27588c2ecf20Sopenharmony_ci			}
27598c2ecf20Sopenharmony_ci			/* check for additional frames */
27608c2ecf20Sopenharmony_ci			continue;
27618c2ecf20Sopenharmony_ci		}
27628c2ecf20Sopenharmony_ci
27638c2ecf20Sopenharmony_ci		skb = NULL;
27648c2ecf20Sopenharmony_ci		tp_len = tpacket_parse_header(po, ph, size_max, &data);
27658c2ecf20Sopenharmony_ci		if (tp_len < 0)
27668c2ecf20Sopenharmony_ci			goto tpacket_error;
27678c2ecf20Sopenharmony_ci
27688c2ecf20Sopenharmony_ci		status = TP_STATUS_SEND_REQUEST;
27698c2ecf20Sopenharmony_ci		hlen = LL_RESERVED_SPACE(dev);
27708c2ecf20Sopenharmony_ci		tlen = dev->needed_tailroom;
27718c2ecf20Sopenharmony_ci		if (po->has_vnet_hdr) {
27728c2ecf20Sopenharmony_ci			vnet_hdr = data;
27738c2ecf20Sopenharmony_ci			data += sizeof(*vnet_hdr);
27748c2ecf20Sopenharmony_ci			tp_len -= sizeof(*vnet_hdr);
27758c2ecf20Sopenharmony_ci			if (tp_len < 0 ||
27768c2ecf20Sopenharmony_ci			    __packet_snd_vnet_parse(vnet_hdr, tp_len)) {
27778c2ecf20Sopenharmony_ci				tp_len = -EINVAL;
27788c2ecf20Sopenharmony_ci				goto tpacket_error;
27798c2ecf20Sopenharmony_ci			}
27808c2ecf20Sopenharmony_ci			copylen = __virtio16_to_cpu(vio_le(),
27818c2ecf20Sopenharmony_ci						    vnet_hdr->hdr_len);
27828c2ecf20Sopenharmony_ci		}
27838c2ecf20Sopenharmony_ci		copylen = max_t(int, copylen, dev->hard_header_len);
27848c2ecf20Sopenharmony_ci		skb = sock_alloc_send_skb(&po->sk,
27858c2ecf20Sopenharmony_ci				hlen + tlen + sizeof(struct sockaddr_ll) +
27868c2ecf20Sopenharmony_ci				(copylen - dev->hard_header_len),
27878c2ecf20Sopenharmony_ci				!need_wait, &err);
27888c2ecf20Sopenharmony_ci
27898c2ecf20Sopenharmony_ci		if (unlikely(skb == NULL)) {
27908c2ecf20Sopenharmony_ci			/* we assume the socket was initially writeable ... */
27918c2ecf20Sopenharmony_ci			if (likely(len_sum > 0))
27928c2ecf20Sopenharmony_ci				err = len_sum;
27938c2ecf20Sopenharmony_ci			goto out_status;
27948c2ecf20Sopenharmony_ci		}
27958c2ecf20Sopenharmony_ci		tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto,
27968c2ecf20Sopenharmony_ci					  addr, hlen, copylen, &sockc);
27978c2ecf20Sopenharmony_ci		if (likely(tp_len >= 0) &&
27988c2ecf20Sopenharmony_ci		    tp_len > dev->mtu + reserve &&
27998c2ecf20Sopenharmony_ci		    !po->has_vnet_hdr &&
28008c2ecf20Sopenharmony_ci		    !packet_extra_vlan_len_allowed(dev, skb))
28018c2ecf20Sopenharmony_ci			tp_len = -EMSGSIZE;
28028c2ecf20Sopenharmony_ci
28038c2ecf20Sopenharmony_ci		if (unlikely(tp_len < 0)) {
28048c2ecf20Sopenharmony_citpacket_error:
28058c2ecf20Sopenharmony_ci			if (po->tp_loss) {
28068c2ecf20Sopenharmony_ci				__packet_set_status(po, ph,
28078c2ecf20Sopenharmony_ci						TP_STATUS_AVAILABLE);
28088c2ecf20Sopenharmony_ci				packet_increment_head(&po->tx_ring);
28098c2ecf20Sopenharmony_ci				kfree_skb(skb);
28108c2ecf20Sopenharmony_ci				continue;
28118c2ecf20Sopenharmony_ci			} else {
28128c2ecf20Sopenharmony_ci				status = TP_STATUS_WRONG_FORMAT;
28138c2ecf20Sopenharmony_ci				err = tp_len;
28148c2ecf20Sopenharmony_ci				goto out_status;
28158c2ecf20Sopenharmony_ci			}
28168c2ecf20Sopenharmony_ci		}
28178c2ecf20Sopenharmony_ci
28188c2ecf20Sopenharmony_ci		if (po->has_vnet_hdr) {
28198c2ecf20Sopenharmony_ci			if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) {
28208c2ecf20Sopenharmony_ci				tp_len = -EINVAL;
28218c2ecf20Sopenharmony_ci				goto tpacket_error;
28228c2ecf20Sopenharmony_ci			}
28238c2ecf20Sopenharmony_ci			virtio_net_hdr_set_proto(skb, vnet_hdr);
28248c2ecf20Sopenharmony_ci		}
28258c2ecf20Sopenharmony_ci
28268c2ecf20Sopenharmony_ci		skb->destructor = tpacket_destruct_skb;
28278c2ecf20Sopenharmony_ci		__packet_set_status(po, ph, TP_STATUS_SENDING);
28288c2ecf20Sopenharmony_ci		packet_inc_pending(&po->tx_ring);
28298c2ecf20Sopenharmony_ci
28308c2ecf20Sopenharmony_ci		status = TP_STATUS_SEND_REQUEST;
28318c2ecf20Sopenharmony_ci		/* Paired with WRITE_ONCE() in packet_setsockopt() */
28328c2ecf20Sopenharmony_ci		err = READ_ONCE(po->xmit)(skb);
28338c2ecf20Sopenharmony_ci		if (unlikely(err != 0)) {
28348c2ecf20Sopenharmony_ci			if (err > 0)
28358c2ecf20Sopenharmony_ci				err = net_xmit_errno(err);
28368c2ecf20Sopenharmony_ci			if (err && __packet_get_status(po, ph) ==
28378c2ecf20Sopenharmony_ci				   TP_STATUS_AVAILABLE) {
28388c2ecf20Sopenharmony_ci				/* skb was destructed already */
28398c2ecf20Sopenharmony_ci				skb = NULL;
28408c2ecf20Sopenharmony_ci				goto out_status;
28418c2ecf20Sopenharmony_ci			}
28428c2ecf20Sopenharmony_ci			/*
28438c2ecf20Sopenharmony_ci			 * skb was dropped but not destructed yet;
28448c2ecf20Sopenharmony_ci			 * let's treat it like congestion or err < 0
28458c2ecf20Sopenharmony_ci			 */
28468c2ecf20Sopenharmony_ci			err = 0;
28478c2ecf20Sopenharmony_ci		}
28488c2ecf20Sopenharmony_ci		packet_increment_head(&po->tx_ring);
28498c2ecf20Sopenharmony_ci		len_sum += tp_len;
28508c2ecf20Sopenharmony_ci	} while (likely((ph != NULL) ||
28518c2ecf20Sopenharmony_ci		/* Note: packet_read_pending() might be slow if we have
28528c2ecf20Sopenharmony_ci		 * to call it as it's per_cpu variable, but in fast-path
28538c2ecf20Sopenharmony_ci		 * we already short-circuit the loop with the first
28548c2ecf20Sopenharmony_ci		 * condition, and luckily don't have to go that path
28558c2ecf20Sopenharmony_ci		 * anyway.
28568c2ecf20Sopenharmony_ci		 */
28578c2ecf20Sopenharmony_ci		 (need_wait && packet_read_pending(&po->tx_ring))));
28588c2ecf20Sopenharmony_ci
28598c2ecf20Sopenharmony_ci	err = len_sum;
28608c2ecf20Sopenharmony_ci	goto out_put;
28618c2ecf20Sopenharmony_ci
28628c2ecf20Sopenharmony_ciout_status:
28638c2ecf20Sopenharmony_ci	__packet_set_status(po, ph, status);
28648c2ecf20Sopenharmony_ci	kfree_skb(skb);
28658c2ecf20Sopenharmony_ciout_put:
28668c2ecf20Sopenharmony_ci	dev_put(dev);
28678c2ecf20Sopenharmony_ciout:
28688c2ecf20Sopenharmony_ci	mutex_unlock(&po->pg_vec_lock);
28698c2ecf20Sopenharmony_ci	return err;
28708c2ecf20Sopenharmony_ci}
28718c2ecf20Sopenharmony_ci
28728c2ecf20Sopenharmony_cistatic struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
28738c2ecf20Sopenharmony_ci				        size_t reserve, size_t len,
28748c2ecf20Sopenharmony_ci				        size_t linear, int noblock,
28758c2ecf20Sopenharmony_ci				        int *err)
28768c2ecf20Sopenharmony_ci{
28778c2ecf20Sopenharmony_ci	struct sk_buff *skb;
28788c2ecf20Sopenharmony_ci
28798c2ecf20Sopenharmony_ci	/* Under a page?  Don't bother with paged skb. */
28808c2ecf20Sopenharmony_ci	if (prepad + len < PAGE_SIZE || !linear)
28818c2ecf20Sopenharmony_ci		linear = len;
28828c2ecf20Sopenharmony_ci
28838c2ecf20Sopenharmony_ci	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
28848c2ecf20Sopenharmony_ci				   err, 0);
28858c2ecf20Sopenharmony_ci	if (!skb)
28868c2ecf20Sopenharmony_ci		return NULL;
28878c2ecf20Sopenharmony_ci
28888c2ecf20Sopenharmony_ci	skb_reserve(skb, reserve);
28898c2ecf20Sopenharmony_ci	skb_put(skb, linear);
28908c2ecf20Sopenharmony_ci	skb->data_len = len - linear;
28918c2ecf20Sopenharmony_ci	skb->len += len - linear;
28928c2ecf20Sopenharmony_ci
28938c2ecf20Sopenharmony_ci	return skb;
28948c2ecf20Sopenharmony_ci}
28958c2ecf20Sopenharmony_ci
28968c2ecf20Sopenharmony_cistatic int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
28978c2ecf20Sopenharmony_ci{
28988c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
28998c2ecf20Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name);
29008c2ecf20Sopenharmony_ci	struct sk_buff *skb;
29018c2ecf20Sopenharmony_ci	struct net_device *dev;
29028c2ecf20Sopenharmony_ci	__be16 proto;
29038c2ecf20Sopenharmony_ci	unsigned char *addr = NULL;
29048c2ecf20Sopenharmony_ci	int err, reserve = 0;
29058c2ecf20Sopenharmony_ci	struct sockcm_cookie sockc;
29068c2ecf20Sopenharmony_ci	struct virtio_net_hdr vnet_hdr = { 0 };
29078c2ecf20Sopenharmony_ci	int offset = 0;
29088c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
29098c2ecf20Sopenharmony_ci	bool has_vnet_hdr = false;
29108c2ecf20Sopenharmony_ci	int hlen, tlen, linear;
29118c2ecf20Sopenharmony_ci	int extra_len = 0;
29128c2ecf20Sopenharmony_ci
29138c2ecf20Sopenharmony_ci	/*
29148c2ecf20Sopenharmony_ci	 *	Get and verify the address.
29158c2ecf20Sopenharmony_ci	 */
29168c2ecf20Sopenharmony_ci
29178c2ecf20Sopenharmony_ci	if (likely(saddr == NULL)) {
29188c2ecf20Sopenharmony_ci		dev	= packet_cached_dev_get(po);
29198c2ecf20Sopenharmony_ci		proto	= READ_ONCE(po->num);
29208c2ecf20Sopenharmony_ci	} else {
29218c2ecf20Sopenharmony_ci		err = -EINVAL;
29228c2ecf20Sopenharmony_ci		if (msg->msg_namelen < sizeof(struct sockaddr_ll))
29238c2ecf20Sopenharmony_ci			goto out;
29248c2ecf20Sopenharmony_ci		if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
29258c2ecf20Sopenharmony_ci			goto out;
29268c2ecf20Sopenharmony_ci		proto	= saddr->sll_protocol;
29278c2ecf20Sopenharmony_ci		dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex);
29288c2ecf20Sopenharmony_ci		if (sock->type == SOCK_DGRAM) {
29298c2ecf20Sopenharmony_ci			if (dev && msg->msg_namelen < dev->addr_len +
29308c2ecf20Sopenharmony_ci				   offsetof(struct sockaddr_ll, sll_addr))
29318c2ecf20Sopenharmony_ci				goto out_unlock;
29328c2ecf20Sopenharmony_ci			addr = saddr->sll_addr;
29338c2ecf20Sopenharmony_ci		}
29348c2ecf20Sopenharmony_ci	}
29358c2ecf20Sopenharmony_ci
29368c2ecf20Sopenharmony_ci	err = -ENXIO;
29378c2ecf20Sopenharmony_ci	if (unlikely(dev == NULL))
29388c2ecf20Sopenharmony_ci		goto out_unlock;
29398c2ecf20Sopenharmony_ci	err = -ENETDOWN;
29408c2ecf20Sopenharmony_ci	if (unlikely(!(dev->flags & IFF_UP)))
29418c2ecf20Sopenharmony_ci		goto out_unlock;
29428c2ecf20Sopenharmony_ci
29438c2ecf20Sopenharmony_ci	sockcm_init(&sockc, sk);
29448c2ecf20Sopenharmony_ci	sockc.mark = sk->sk_mark;
29458c2ecf20Sopenharmony_ci	if (msg->msg_controllen) {
29468c2ecf20Sopenharmony_ci		err = sock_cmsg_send(sk, msg, &sockc);
29478c2ecf20Sopenharmony_ci		if (unlikely(err))
29488c2ecf20Sopenharmony_ci			goto out_unlock;
29498c2ecf20Sopenharmony_ci	}
29508c2ecf20Sopenharmony_ci
29518c2ecf20Sopenharmony_ci	if (sock->type == SOCK_RAW)
29528c2ecf20Sopenharmony_ci		reserve = dev->hard_header_len;
29538c2ecf20Sopenharmony_ci	if (po->has_vnet_hdr) {
29548c2ecf20Sopenharmony_ci		err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
29558c2ecf20Sopenharmony_ci		if (err)
29568c2ecf20Sopenharmony_ci			goto out_unlock;
29578c2ecf20Sopenharmony_ci		has_vnet_hdr = true;
29588c2ecf20Sopenharmony_ci	}
29598c2ecf20Sopenharmony_ci
29608c2ecf20Sopenharmony_ci	if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
29618c2ecf20Sopenharmony_ci		if (!netif_supports_nofcs(dev)) {
29628c2ecf20Sopenharmony_ci			err = -EPROTONOSUPPORT;
29638c2ecf20Sopenharmony_ci			goto out_unlock;
29648c2ecf20Sopenharmony_ci		}
29658c2ecf20Sopenharmony_ci		extra_len = 4; /* We're doing our own CRC */
29668c2ecf20Sopenharmony_ci	}
29678c2ecf20Sopenharmony_ci
29688c2ecf20Sopenharmony_ci	err = -EMSGSIZE;
29698c2ecf20Sopenharmony_ci	if (!vnet_hdr.gso_type &&
29708c2ecf20Sopenharmony_ci	    (len > dev->mtu + reserve + VLAN_HLEN + extra_len))
29718c2ecf20Sopenharmony_ci		goto out_unlock;
29728c2ecf20Sopenharmony_ci
29738c2ecf20Sopenharmony_ci	err = -ENOBUFS;
29748c2ecf20Sopenharmony_ci	hlen = LL_RESERVED_SPACE(dev);
29758c2ecf20Sopenharmony_ci	tlen = dev->needed_tailroom;
29768c2ecf20Sopenharmony_ci	linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len);
29778c2ecf20Sopenharmony_ci	linear = max(linear, min_t(int, len, dev->hard_header_len));
29788c2ecf20Sopenharmony_ci	skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear,
29798c2ecf20Sopenharmony_ci			       msg->msg_flags & MSG_DONTWAIT, &err);
29808c2ecf20Sopenharmony_ci	if (skb == NULL)
29818c2ecf20Sopenharmony_ci		goto out_unlock;
29828c2ecf20Sopenharmony_ci
29838c2ecf20Sopenharmony_ci	skb_reset_network_header(skb);
29848c2ecf20Sopenharmony_ci
29858c2ecf20Sopenharmony_ci	err = -EINVAL;
29868c2ecf20Sopenharmony_ci	if (sock->type == SOCK_DGRAM) {
29878c2ecf20Sopenharmony_ci		offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len);
29888c2ecf20Sopenharmony_ci		if (unlikely(offset < 0))
29898c2ecf20Sopenharmony_ci			goto out_free;
29908c2ecf20Sopenharmony_ci	} else if (reserve) {
29918c2ecf20Sopenharmony_ci		skb_reserve(skb, -reserve);
29928c2ecf20Sopenharmony_ci		if (len < reserve + sizeof(struct ipv6hdr) &&
29938c2ecf20Sopenharmony_ci		    dev->min_header_len != dev->hard_header_len)
29948c2ecf20Sopenharmony_ci			skb_reset_network_header(skb);
29958c2ecf20Sopenharmony_ci	}
29968c2ecf20Sopenharmony_ci
29978c2ecf20Sopenharmony_ci	/* Returns -EFAULT on error */
29988c2ecf20Sopenharmony_ci	err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
29998c2ecf20Sopenharmony_ci	if (err)
30008c2ecf20Sopenharmony_ci		goto out_free;
30018c2ecf20Sopenharmony_ci
30028c2ecf20Sopenharmony_ci	if ((sock->type == SOCK_RAW &&
30038c2ecf20Sopenharmony_ci	     !dev_validate_header(dev, skb->data, len)) || !skb->len) {
30048c2ecf20Sopenharmony_ci		err = -EINVAL;
30058c2ecf20Sopenharmony_ci		goto out_free;
30068c2ecf20Sopenharmony_ci	}
30078c2ecf20Sopenharmony_ci
30088c2ecf20Sopenharmony_ci	skb_setup_tx_timestamp(skb, sockc.tsflags);
30098c2ecf20Sopenharmony_ci
30108c2ecf20Sopenharmony_ci	if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) &&
30118c2ecf20Sopenharmony_ci	    !packet_extra_vlan_len_allowed(dev, skb)) {
30128c2ecf20Sopenharmony_ci		err = -EMSGSIZE;
30138c2ecf20Sopenharmony_ci		goto out_free;
30148c2ecf20Sopenharmony_ci	}
30158c2ecf20Sopenharmony_ci
30168c2ecf20Sopenharmony_ci	skb->protocol = proto;
30178c2ecf20Sopenharmony_ci	skb->dev = dev;
30188c2ecf20Sopenharmony_ci	skb->priority = sk->sk_priority;
30198c2ecf20Sopenharmony_ci	skb->mark = sockc.mark;
30208c2ecf20Sopenharmony_ci	skb->tstamp = sockc.transmit_time;
30218c2ecf20Sopenharmony_ci
30228c2ecf20Sopenharmony_ci	if (unlikely(extra_len == 4))
30238c2ecf20Sopenharmony_ci		skb->no_fcs = 1;
30248c2ecf20Sopenharmony_ci
30258c2ecf20Sopenharmony_ci	packet_parse_headers(skb, sock);
30268c2ecf20Sopenharmony_ci
30278c2ecf20Sopenharmony_ci	if (has_vnet_hdr) {
30288c2ecf20Sopenharmony_ci		err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le());
30298c2ecf20Sopenharmony_ci		if (err)
30308c2ecf20Sopenharmony_ci			goto out_free;
30318c2ecf20Sopenharmony_ci		len += sizeof(vnet_hdr);
30328c2ecf20Sopenharmony_ci		virtio_net_hdr_set_proto(skb, &vnet_hdr);
30338c2ecf20Sopenharmony_ci	}
30348c2ecf20Sopenharmony_ci
30358c2ecf20Sopenharmony_ci	/* Paired with WRITE_ONCE() in packet_setsockopt() */
30368c2ecf20Sopenharmony_ci	err = READ_ONCE(po->xmit)(skb);
30378c2ecf20Sopenharmony_ci	if (unlikely(err != 0)) {
30388c2ecf20Sopenharmony_ci		if (err > 0)
30398c2ecf20Sopenharmony_ci			err = net_xmit_errno(err);
30408c2ecf20Sopenharmony_ci		if (err)
30418c2ecf20Sopenharmony_ci			goto out_unlock;
30428c2ecf20Sopenharmony_ci	}
30438c2ecf20Sopenharmony_ci
30448c2ecf20Sopenharmony_ci	dev_put(dev);
30458c2ecf20Sopenharmony_ci
30468c2ecf20Sopenharmony_ci	return len;
30478c2ecf20Sopenharmony_ci
30488c2ecf20Sopenharmony_ciout_free:
30498c2ecf20Sopenharmony_ci	kfree_skb(skb);
30508c2ecf20Sopenharmony_ciout_unlock:
30518c2ecf20Sopenharmony_ci	if (dev)
30528c2ecf20Sopenharmony_ci		dev_put(dev);
30538c2ecf20Sopenharmony_ciout:
30548c2ecf20Sopenharmony_ci	return err;
30558c2ecf20Sopenharmony_ci}
30568c2ecf20Sopenharmony_ci
30578c2ecf20Sopenharmony_cistatic int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
30588c2ecf20Sopenharmony_ci{
30598c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
30608c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
30618c2ecf20Sopenharmony_ci
30628c2ecf20Sopenharmony_ci	/* Reading tx_ring.pg_vec without holding pg_vec_lock is racy.
30638c2ecf20Sopenharmony_ci	 * tpacket_snd() will redo the check safely.
30648c2ecf20Sopenharmony_ci	 */
30658c2ecf20Sopenharmony_ci	if (data_race(po->tx_ring.pg_vec))
30668c2ecf20Sopenharmony_ci		return tpacket_snd(po, msg);
30678c2ecf20Sopenharmony_ci
30688c2ecf20Sopenharmony_ci	return packet_snd(sock, msg, len);
30698c2ecf20Sopenharmony_ci}
30708c2ecf20Sopenharmony_ci
30718c2ecf20Sopenharmony_ci/*
30728c2ecf20Sopenharmony_ci *	Close a PACKET socket. This is fairly simple. We immediately go
30738c2ecf20Sopenharmony_ci *	to 'closed' state and remove our protocol entry in the device list.
30748c2ecf20Sopenharmony_ci */
30758c2ecf20Sopenharmony_ci
30768c2ecf20Sopenharmony_cistatic int packet_release(struct socket *sock)
30778c2ecf20Sopenharmony_ci{
30788c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
30798c2ecf20Sopenharmony_ci	struct packet_sock *po;
30808c2ecf20Sopenharmony_ci	struct packet_fanout *f;
30818c2ecf20Sopenharmony_ci	struct net *net;
30828c2ecf20Sopenharmony_ci	union tpacket_req_u req_u;
30838c2ecf20Sopenharmony_ci
30848c2ecf20Sopenharmony_ci	if (!sk)
30858c2ecf20Sopenharmony_ci		return 0;
30868c2ecf20Sopenharmony_ci
30878c2ecf20Sopenharmony_ci	net = sock_net(sk);
30888c2ecf20Sopenharmony_ci	po = pkt_sk(sk);
30898c2ecf20Sopenharmony_ci
30908c2ecf20Sopenharmony_ci	mutex_lock(&net->packet.sklist_lock);
30918c2ecf20Sopenharmony_ci	sk_del_node_init_rcu(sk);
30928c2ecf20Sopenharmony_ci	mutex_unlock(&net->packet.sklist_lock);
30938c2ecf20Sopenharmony_ci
30948c2ecf20Sopenharmony_ci	preempt_disable();
30958c2ecf20Sopenharmony_ci	sock_prot_inuse_add(net, sk->sk_prot, -1);
30968c2ecf20Sopenharmony_ci	preempt_enable();
30978c2ecf20Sopenharmony_ci
30988c2ecf20Sopenharmony_ci	spin_lock(&po->bind_lock);
30998c2ecf20Sopenharmony_ci	unregister_prot_hook(sk, false);
31008c2ecf20Sopenharmony_ci	packet_cached_dev_reset(po);
31018c2ecf20Sopenharmony_ci
31028c2ecf20Sopenharmony_ci	if (po->prot_hook.dev) {
31038c2ecf20Sopenharmony_ci		dev_put(po->prot_hook.dev);
31048c2ecf20Sopenharmony_ci		po->prot_hook.dev = NULL;
31058c2ecf20Sopenharmony_ci	}
31068c2ecf20Sopenharmony_ci	spin_unlock(&po->bind_lock);
31078c2ecf20Sopenharmony_ci
31088c2ecf20Sopenharmony_ci	packet_flush_mclist(sk);
31098c2ecf20Sopenharmony_ci
31108c2ecf20Sopenharmony_ci	lock_sock(sk);
31118c2ecf20Sopenharmony_ci	if (po->rx_ring.pg_vec) {
31128c2ecf20Sopenharmony_ci		memset(&req_u, 0, sizeof(req_u));
31138c2ecf20Sopenharmony_ci		packet_set_ring(sk, &req_u, 1, 0);
31148c2ecf20Sopenharmony_ci	}
31158c2ecf20Sopenharmony_ci
31168c2ecf20Sopenharmony_ci	if (po->tx_ring.pg_vec) {
31178c2ecf20Sopenharmony_ci		memset(&req_u, 0, sizeof(req_u));
31188c2ecf20Sopenharmony_ci		packet_set_ring(sk, &req_u, 1, 1);
31198c2ecf20Sopenharmony_ci	}
31208c2ecf20Sopenharmony_ci	release_sock(sk);
31218c2ecf20Sopenharmony_ci
31228c2ecf20Sopenharmony_ci	f = fanout_release(sk);
31238c2ecf20Sopenharmony_ci
31248c2ecf20Sopenharmony_ci	synchronize_net();
31258c2ecf20Sopenharmony_ci
31268c2ecf20Sopenharmony_ci	kfree(po->rollover);
31278c2ecf20Sopenharmony_ci	if (f) {
31288c2ecf20Sopenharmony_ci		fanout_release_data(f);
31298c2ecf20Sopenharmony_ci		kvfree(f);
31308c2ecf20Sopenharmony_ci	}
31318c2ecf20Sopenharmony_ci	/*
31328c2ecf20Sopenharmony_ci	 *	Now the socket is dead. No more input will appear.
31338c2ecf20Sopenharmony_ci	 */
31348c2ecf20Sopenharmony_ci	sock_orphan(sk);
31358c2ecf20Sopenharmony_ci	sock->sk = NULL;
31368c2ecf20Sopenharmony_ci
31378c2ecf20Sopenharmony_ci	/* Purge queues */
31388c2ecf20Sopenharmony_ci
31398c2ecf20Sopenharmony_ci	skb_queue_purge(&sk->sk_receive_queue);
31408c2ecf20Sopenharmony_ci	packet_free_pending(po);
31418c2ecf20Sopenharmony_ci	sk_refcnt_debug_release(sk);
31428c2ecf20Sopenharmony_ci
31438c2ecf20Sopenharmony_ci	sock_put(sk);
31448c2ecf20Sopenharmony_ci	return 0;
31458c2ecf20Sopenharmony_ci}
31468c2ecf20Sopenharmony_ci
31478c2ecf20Sopenharmony_ci/*
31488c2ecf20Sopenharmony_ci *	Attach a packet hook.
31498c2ecf20Sopenharmony_ci */
31508c2ecf20Sopenharmony_ci
31518c2ecf20Sopenharmony_cistatic int packet_do_bind(struct sock *sk, const char *name, int ifindex,
31528c2ecf20Sopenharmony_ci			  __be16 proto)
31538c2ecf20Sopenharmony_ci{
31548c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
31558c2ecf20Sopenharmony_ci	struct net_device *dev_curr;
31568c2ecf20Sopenharmony_ci	__be16 proto_curr;
31578c2ecf20Sopenharmony_ci	bool need_rehook;
31588c2ecf20Sopenharmony_ci	struct net_device *dev = NULL;
31598c2ecf20Sopenharmony_ci	int ret = 0;
31608c2ecf20Sopenharmony_ci	bool unlisted = false;
31618c2ecf20Sopenharmony_ci
31628c2ecf20Sopenharmony_ci	lock_sock(sk);
31638c2ecf20Sopenharmony_ci	spin_lock(&po->bind_lock);
31648c2ecf20Sopenharmony_ci	if (!proto)
31658c2ecf20Sopenharmony_ci		proto = po->num;
31668c2ecf20Sopenharmony_ci
31678c2ecf20Sopenharmony_ci	rcu_read_lock();
31688c2ecf20Sopenharmony_ci
31698c2ecf20Sopenharmony_ci	if (po->fanout) {
31708c2ecf20Sopenharmony_ci		ret = -EINVAL;
31718c2ecf20Sopenharmony_ci		goto out_unlock;
31728c2ecf20Sopenharmony_ci	}
31738c2ecf20Sopenharmony_ci
31748c2ecf20Sopenharmony_ci	if (name) {
31758c2ecf20Sopenharmony_ci		dev = dev_get_by_name_rcu(sock_net(sk), name);
31768c2ecf20Sopenharmony_ci		if (!dev) {
31778c2ecf20Sopenharmony_ci			ret = -ENODEV;
31788c2ecf20Sopenharmony_ci			goto out_unlock;
31798c2ecf20Sopenharmony_ci		}
31808c2ecf20Sopenharmony_ci	} else if (ifindex) {
31818c2ecf20Sopenharmony_ci		dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
31828c2ecf20Sopenharmony_ci		if (!dev) {
31838c2ecf20Sopenharmony_ci			ret = -ENODEV;
31848c2ecf20Sopenharmony_ci			goto out_unlock;
31858c2ecf20Sopenharmony_ci		}
31868c2ecf20Sopenharmony_ci	}
31878c2ecf20Sopenharmony_ci
31888c2ecf20Sopenharmony_ci	if (dev)
31898c2ecf20Sopenharmony_ci		dev_hold(dev);
31908c2ecf20Sopenharmony_ci
31918c2ecf20Sopenharmony_ci	proto_curr = po->prot_hook.type;
31928c2ecf20Sopenharmony_ci	dev_curr = po->prot_hook.dev;
31938c2ecf20Sopenharmony_ci
31948c2ecf20Sopenharmony_ci	need_rehook = proto_curr != proto || dev_curr != dev;
31958c2ecf20Sopenharmony_ci
31968c2ecf20Sopenharmony_ci	if (need_rehook) {
31978c2ecf20Sopenharmony_ci		if (po->running) {
31988c2ecf20Sopenharmony_ci			rcu_read_unlock();
31998c2ecf20Sopenharmony_ci			/* prevents packet_notifier() from calling
32008c2ecf20Sopenharmony_ci			 * register_prot_hook()
32018c2ecf20Sopenharmony_ci			 */
32028c2ecf20Sopenharmony_ci			WRITE_ONCE(po->num, 0);
32038c2ecf20Sopenharmony_ci			__unregister_prot_hook(sk, true);
32048c2ecf20Sopenharmony_ci			rcu_read_lock();
32058c2ecf20Sopenharmony_ci			dev_curr = po->prot_hook.dev;
32068c2ecf20Sopenharmony_ci			if (dev)
32078c2ecf20Sopenharmony_ci				unlisted = !dev_get_by_index_rcu(sock_net(sk),
32088c2ecf20Sopenharmony_ci								 dev->ifindex);
32098c2ecf20Sopenharmony_ci		}
32108c2ecf20Sopenharmony_ci
32118c2ecf20Sopenharmony_ci		BUG_ON(po->running);
32128c2ecf20Sopenharmony_ci		WRITE_ONCE(po->num, proto);
32138c2ecf20Sopenharmony_ci		po->prot_hook.type = proto;
32148c2ecf20Sopenharmony_ci
32158c2ecf20Sopenharmony_ci		if (unlikely(unlisted)) {
32168c2ecf20Sopenharmony_ci			dev_put(dev);
32178c2ecf20Sopenharmony_ci			po->prot_hook.dev = NULL;
32188c2ecf20Sopenharmony_ci			WRITE_ONCE(po->ifindex, -1);
32198c2ecf20Sopenharmony_ci			packet_cached_dev_reset(po);
32208c2ecf20Sopenharmony_ci		} else {
32218c2ecf20Sopenharmony_ci			po->prot_hook.dev = dev;
32228c2ecf20Sopenharmony_ci			WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0);
32238c2ecf20Sopenharmony_ci			packet_cached_dev_assign(po, dev);
32248c2ecf20Sopenharmony_ci		}
32258c2ecf20Sopenharmony_ci	}
32268c2ecf20Sopenharmony_ci	if (dev_curr)
32278c2ecf20Sopenharmony_ci		dev_put(dev_curr);
32288c2ecf20Sopenharmony_ci
32298c2ecf20Sopenharmony_ci	if (proto == 0 || !need_rehook)
32308c2ecf20Sopenharmony_ci		goto out_unlock;
32318c2ecf20Sopenharmony_ci
32328c2ecf20Sopenharmony_ci	if (!unlisted && (!dev || (dev->flags & IFF_UP))) {
32338c2ecf20Sopenharmony_ci		register_prot_hook(sk);
32348c2ecf20Sopenharmony_ci	} else {
32358c2ecf20Sopenharmony_ci		sk->sk_err = ENETDOWN;
32368c2ecf20Sopenharmony_ci		if (!sock_flag(sk, SOCK_DEAD))
32378c2ecf20Sopenharmony_ci			sk->sk_error_report(sk);
32388c2ecf20Sopenharmony_ci	}
32398c2ecf20Sopenharmony_ci
32408c2ecf20Sopenharmony_ciout_unlock:
32418c2ecf20Sopenharmony_ci	rcu_read_unlock();
32428c2ecf20Sopenharmony_ci	spin_unlock(&po->bind_lock);
32438c2ecf20Sopenharmony_ci	release_sock(sk);
32448c2ecf20Sopenharmony_ci	return ret;
32458c2ecf20Sopenharmony_ci}
32468c2ecf20Sopenharmony_ci
32478c2ecf20Sopenharmony_ci/*
32488c2ecf20Sopenharmony_ci *	Bind a packet socket to a device
32498c2ecf20Sopenharmony_ci */
32508c2ecf20Sopenharmony_ci
32518c2ecf20Sopenharmony_cistatic int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
32528c2ecf20Sopenharmony_ci			    int addr_len)
32538c2ecf20Sopenharmony_ci{
32548c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
32558c2ecf20Sopenharmony_ci	char name[sizeof(uaddr->sa_data_min) + 1];
32568c2ecf20Sopenharmony_ci
32578c2ecf20Sopenharmony_ci	/*
32588c2ecf20Sopenharmony_ci	 *	Check legality
32598c2ecf20Sopenharmony_ci	 */
32608c2ecf20Sopenharmony_ci
32618c2ecf20Sopenharmony_ci	if (addr_len != sizeof(struct sockaddr))
32628c2ecf20Sopenharmony_ci		return -EINVAL;
32638c2ecf20Sopenharmony_ci	/* uaddr->sa_data comes from the userspace, it's not guaranteed to be
32648c2ecf20Sopenharmony_ci	 * zero-terminated.
32658c2ecf20Sopenharmony_ci	 */
32668c2ecf20Sopenharmony_ci	memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min));
32678c2ecf20Sopenharmony_ci	name[sizeof(uaddr->sa_data_min)] = 0;
32688c2ecf20Sopenharmony_ci
32698c2ecf20Sopenharmony_ci	return packet_do_bind(sk, name, 0, 0);
32708c2ecf20Sopenharmony_ci}
32718c2ecf20Sopenharmony_ci
32728c2ecf20Sopenharmony_cistatic int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
32738c2ecf20Sopenharmony_ci{
32748c2ecf20Sopenharmony_ci	struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
32758c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
32768c2ecf20Sopenharmony_ci
32778c2ecf20Sopenharmony_ci	/*
32788c2ecf20Sopenharmony_ci	 *	Check legality
32798c2ecf20Sopenharmony_ci	 */
32808c2ecf20Sopenharmony_ci
32818c2ecf20Sopenharmony_ci	if (addr_len < sizeof(struct sockaddr_ll))
32828c2ecf20Sopenharmony_ci		return -EINVAL;
32838c2ecf20Sopenharmony_ci	if (sll->sll_family != AF_PACKET)
32848c2ecf20Sopenharmony_ci		return -EINVAL;
32858c2ecf20Sopenharmony_ci
32868c2ecf20Sopenharmony_ci	return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol);
32878c2ecf20Sopenharmony_ci}
32888c2ecf20Sopenharmony_ci
32898c2ecf20Sopenharmony_cistatic struct proto packet_proto = {
32908c2ecf20Sopenharmony_ci	.name	  = "PACKET",
32918c2ecf20Sopenharmony_ci	.owner	  = THIS_MODULE,
32928c2ecf20Sopenharmony_ci	.obj_size = sizeof(struct packet_sock),
32938c2ecf20Sopenharmony_ci};
32948c2ecf20Sopenharmony_ci
32958c2ecf20Sopenharmony_ci/*
32968c2ecf20Sopenharmony_ci *	Create a packet of type SOCK_PACKET.
32978c2ecf20Sopenharmony_ci */
32988c2ecf20Sopenharmony_ci
32998c2ecf20Sopenharmony_cistatic int packet_create(struct net *net, struct socket *sock, int protocol,
33008c2ecf20Sopenharmony_ci			 int kern)
33018c2ecf20Sopenharmony_ci{
33028c2ecf20Sopenharmony_ci	struct sock *sk;
33038c2ecf20Sopenharmony_ci	struct packet_sock *po;
33048c2ecf20Sopenharmony_ci	__be16 proto = (__force __be16)protocol; /* weird, but documented */
33058c2ecf20Sopenharmony_ci	int err;
33068c2ecf20Sopenharmony_ci
33078c2ecf20Sopenharmony_ci	if (!ns_capable(net->user_ns, CAP_NET_RAW))
33088c2ecf20Sopenharmony_ci		return -EPERM;
33098c2ecf20Sopenharmony_ci	if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
33108c2ecf20Sopenharmony_ci	    sock->type != SOCK_PACKET)
33118c2ecf20Sopenharmony_ci		return -ESOCKTNOSUPPORT;
33128c2ecf20Sopenharmony_ci
33138c2ecf20Sopenharmony_ci	sock->state = SS_UNCONNECTED;
33148c2ecf20Sopenharmony_ci
33158c2ecf20Sopenharmony_ci	err = -ENOBUFS;
33168c2ecf20Sopenharmony_ci	sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
33178c2ecf20Sopenharmony_ci	if (sk == NULL)
33188c2ecf20Sopenharmony_ci		goto out;
33198c2ecf20Sopenharmony_ci
33208c2ecf20Sopenharmony_ci	sock->ops = &packet_ops;
33218c2ecf20Sopenharmony_ci	if (sock->type == SOCK_PACKET)
33228c2ecf20Sopenharmony_ci		sock->ops = &packet_ops_spkt;
33238c2ecf20Sopenharmony_ci
33248c2ecf20Sopenharmony_ci	sock_init_data(sock, sk);
33258c2ecf20Sopenharmony_ci
33268c2ecf20Sopenharmony_ci	po = pkt_sk(sk);
33278c2ecf20Sopenharmony_ci	init_completion(&po->skb_completion);
33288c2ecf20Sopenharmony_ci	sk->sk_family = PF_PACKET;
33298c2ecf20Sopenharmony_ci	po->num = proto;
33308c2ecf20Sopenharmony_ci	po->xmit = dev_queue_xmit;
33318c2ecf20Sopenharmony_ci
33328c2ecf20Sopenharmony_ci	err = packet_alloc_pending(po);
33338c2ecf20Sopenharmony_ci	if (err)
33348c2ecf20Sopenharmony_ci		goto out2;
33358c2ecf20Sopenharmony_ci
33368c2ecf20Sopenharmony_ci	packet_cached_dev_reset(po);
33378c2ecf20Sopenharmony_ci
33388c2ecf20Sopenharmony_ci	sk->sk_destruct = packet_sock_destruct;
33398c2ecf20Sopenharmony_ci	sk_refcnt_debug_inc(sk);
33408c2ecf20Sopenharmony_ci
33418c2ecf20Sopenharmony_ci	/*
33428c2ecf20Sopenharmony_ci	 *	Attach a protocol block
33438c2ecf20Sopenharmony_ci	 */
33448c2ecf20Sopenharmony_ci
33458c2ecf20Sopenharmony_ci	spin_lock_init(&po->bind_lock);
33468c2ecf20Sopenharmony_ci	mutex_init(&po->pg_vec_lock);
33478c2ecf20Sopenharmony_ci	po->rollover = NULL;
33488c2ecf20Sopenharmony_ci	po->prot_hook.func = packet_rcv;
33498c2ecf20Sopenharmony_ci
33508c2ecf20Sopenharmony_ci	if (sock->type == SOCK_PACKET)
33518c2ecf20Sopenharmony_ci		po->prot_hook.func = packet_rcv_spkt;
33528c2ecf20Sopenharmony_ci
33538c2ecf20Sopenharmony_ci	po->prot_hook.af_packet_priv = sk;
33548c2ecf20Sopenharmony_ci	po->prot_hook.af_packet_net = sock_net(sk);
33558c2ecf20Sopenharmony_ci
33568c2ecf20Sopenharmony_ci	if (proto) {
33578c2ecf20Sopenharmony_ci		po->prot_hook.type = proto;
33588c2ecf20Sopenharmony_ci		__register_prot_hook(sk);
33598c2ecf20Sopenharmony_ci	}
33608c2ecf20Sopenharmony_ci
33618c2ecf20Sopenharmony_ci	mutex_lock(&net->packet.sklist_lock);
33628c2ecf20Sopenharmony_ci	sk_add_node_tail_rcu(sk, &net->packet.sklist);
33638c2ecf20Sopenharmony_ci	mutex_unlock(&net->packet.sklist_lock);
33648c2ecf20Sopenharmony_ci
33658c2ecf20Sopenharmony_ci	preempt_disable();
33668c2ecf20Sopenharmony_ci	sock_prot_inuse_add(net, &packet_proto, 1);
33678c2ecf20Sopenharmony_ci	preempt_enable();
33688c2ecf20Sopenharmony_ci
33698c2ecf20Sopenharmony_ci	return 0;
33708c2ecf20Sopenharmony_ciout2:
33718c2ecf20Sopenharmony_ci	sk_free(sk);
33728c2ecf20Sopenharmony_ciout:
33738c2ecf20Sopenharmony_ci	return err;
33748c2ecf20Sopenharmony_ci}
33758c2ecf20Sopenharmony_ci
33768c2ecf20Sopenharmony_ci/*
33778c2ecf20Sopenharmony_ci *	Pull a packet from our receive queue and hand it to the user.
33788c2ecf20Sopenharmony_ci *	If necessary we block.
33798c2ecf20Sopenharmony_ci */
33808c2ecf20Sopenharmony_ci
33818c2ecf20Sopenharmony_cistatic int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
33828c2ecf20Sopenharmony_ci			  int flags)
33838c2ecf20Sopenharmony_ci{
33848c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
33858c2ecf20Sopenharmony_ci	struct sk_buff *skb;
33868c2ecf20Sopenharmony_ci	int copied, err;
33878c2ecf20Sopenharmony_ci	int vnet_hdr_len = 0;
33888c2ecf20Sopenharmony_ci	unsigned int origlen = 0;
33898c2ecf20Sopenharmony_ci
33908c2ecf20Sopenharmony_ci	err = -EINVAL;
33918c2ecf20Sopenharmony_ci	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
33928c2ecf20Sopenharmony_ci		goto out;
33938c2ecf20Sopenharmony_ci
33948c2ecf20Sopenharmony_ci#if 0
33958c2ecf20Sopenharmony_ci	/* What error should we return now? EUNATTACH? */
33968c2ecf20Sopenharmony_ci	if (pkt_sk(sk)->ifindex < 0)
33978c2ecf20Sopenharmony_ci		return -ENODEV;
33988c2ecf20Sopenharmony_ci#endif
33998c2ecf20Sopenharmony_ci
34008c2ecf20Sopenharmony_ci	if (flags & MSG_ERRQUEUE) {
34018c2ecf20Sopenharmony_ci		err = sock_recv_errqueue(sk, msg, len,
34028c2ecf20Sopenharmony_ci					 SOL_PACKET, PACKET_TX_TIMESTAMP);
34038c2ecf20Sopenharmony_ci		goto out;
34048c2ecf20Sopenharmony_ci	}
34058c2ecf20Sopenharmony_ci
34068c2ecf20Sopenharmony_ci	/*
34078c2ecf20Sopenharmony_ci	 *	Call the generic datagram receiver. This handles all sorts
34088c2ecf20Sopenharmony_ci	 *	of horrible races and re-entrancy so we can forget about it
34098c2ecf20Sopenharmony_ci	 *	in the protocol layers.
34108c2ecf20Sopenharmony_ci	 *
34118c2ecf20Sopenharmony_ci	 *	Now it will return ENETDOWN, if device have just gone down,
34128c2ecf20Sopenharmony_ci	 *	but then it will block.
34138c2ecf20Sopenharmony_ci	 */
34148c2ecf20Sopenharmony_ci
34158c2ecf20Sopenharmony_ci	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
34168c2ecf20Sopenharmony_ci
34178c2ecf20Sopenharmony_ci	/*
34188c2ecf20Sopenharmony_ci	 *	An error occurred so return it. Because skb_recv_datagram()
34198c2ecf20Sopenharmony_ci	 *	handles the blocking we don't see and worry about blocking
34208c2ecf20Sopenharmony_ci	 *	retries.
34218c2ecf20Sopenharmony_ci	 */
34228c2ecf20Sopenharmony_ci
34238c2ecf20Sopenharmony_ci	if (skb == NULL)
34248c2ecf20Sopenharmony_ci		goto out;
34258c2ecf20Sopenharmony_ci
34268c2ecf20Sopenharmony_ci	packet_rcv_try_clear_pressure(pkt_sk(sk));
34278c2ecf20Sopenharmony_ci
34288c2ecf20Sopenharmony_ci	if (pkt_sk(sk)->has_vnet_hdr) {
34298c2ecf20Sopenharmony_ci		err = packet_rcv_vnet(msg, skb, &len);
34308c2ecf20Sopenharmony_ci		if (err)
34318c2ecf20Sopenharmony_ci			goto out_free;
34328c2ecf20Sopenharmony_ci		vnet_hdr_len = sizeof(struct virtio_net_hdr);
34338c2ecf20Sopenharmony_ci	}
34348c2ecf20Sopenharmony_ci
34358c2ecf20Sopenharmony_ci	/* You lose any data beyond the buffer you gave. If it worries
34368c2ecf20Sopenharmony_ci	 * a user program they can ask the device for its MTU
34378c2ecf20Sopenharmony_ci	 * anyway.
34388c2ecf20Sopenharmony_ci	 */
34398c2ecf20Sopenharmony_ci	copied = skb->len;
34408c2ecf20Sopenharmony_ci	if (copied > len) {
34418c2ecf20Sopenharmony_ci		copied = len;
34428c2ecf20Sopenharmony_ci		msg->msg_flags |= MSG_TRUNC;
34438c2ecf20Sopenharmony_ci	}
34448c2ecf20Sopenharmony_ci
34458c2ecf20Sopenharmony_ci	err = skb_copy_datagram_msg(skb, 0, msg, copied);
34468c2ecf20Sopenharmony_ci	if (err)
34478c2ecf20Sopenharmony_ci		goto out_free;
34488c2ecf20Sopenharmony_ci
34498c2ecf20Sopenharmony_ci	if (sock->type != SOCK_PACKET) {
34508c2ecf20Sopenharmony_ci		struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
34518c2ecf20Sopenharmony_ci
34528c2ecf20Sopenharmony_ci		/* Original length was stored in sockaddr_ll fields */
34538c2ecf20Sopenharmony_ci		origlen = PACKET_SKB_CB(skb)->sa.origlen;
34548c2ecf20Sopenharmony_ci		sll->sll_family = AF_PACKET;
34558c2ecf20Sopenharmony_ci		sll->sll_protocol = skb->protocol;
34568c2ecf20Sopenharmony_ci	}
34578c2ecf20Sopenharmony_ci
34588c2ecf20Sopenharmony_ci	sock_recv_ts_and_drops(msg, sk, skb);
34598c2ecf20Sopenharmony_ci
34608c2ecf20Sopenharmony_ci	if (msg->msg_name) {
34618c2ecf20Sopenharmony_ci		const size_t max_len = min(sizeof(skb->cb),
34628c2ecf20Sopenharmony_ci					   sizeof(struct sockaddr_storage));
34638c2ecf20Sopenharmony_ci		int copy_len;
34648c2ecf20Sopenharmony_ci
34658c2ecf20Sopenharmony_ci		/* If the address length field is there to be filled
34668c2ecf20Sopenharmony_ci		 * in, we fill it in now.
34678c2ecf20Sopenharmony_ci		 */
34688c2ecf20Sopenharmony_ci		if (sock->type == SOCK_PACKET) {
34698c2ecf20Sopenharmony_ci			__sockaddr_check_size(sizeof(struct sockaddr_pkt));
34708c2ecf20Sopenharmony_ci			msg->msg_namelen = sizeof(struct sockaddr_pkt);
34718c2ecf20Sopenharmony_ci			copy_len = msg->msg_namelen;
34728c2ecf20Sopenharmony_ci		} else {
34738c2ecf20Sopenharmony_ci			struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
34748c2ecf20Sopenharmony_ci
34758c2ecf20Sopenharmony_ci			msg->msg_namelen = sll->sll_halen +
34768c2ecf20Sopenharmony_ci				offsetof(struct sockaddr_ll, sll_addr);
34778c2ecf20Sopenharmony_ci			copy_len = msg->msg_namelen;
34788c2ecf20Sopenharmony_ci			if (msg->msg_namelen < sizeof(struct sockaddr_ll)) {
34798c2ecf20Sopenharmony_ci				memset(msg->msg_name +
34808c2ecf20Sopenharmony_ci				       offsetof(struct sockaddr_ll, sll_addr),
34818c2ecf20Sopenharmony_ci				       0, sizeof(sll->sll_addr));
34828c2ecf20Sopenharmony_ci				msg->msg_namelen = sizeof(struct sockaddr_ll);
34838c2ecf20Sopenharmony_ci			}
34848c2ecf20Sopenharmony_ci		}
34858c2ecf20Sopenharmony_ci		if (WARN_ON_ONCE(copy_len > max_len)) {
34868c2ecf20Sopenharmony_ci			copy_len = max_len;
34878c2ecf20Sopenharmony_ci			msg->msg_namelen = copy_len;
34888c2ecf20Sopenharmony_ci		}
34898c2ecf20Sopenharmony_ci		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
34908c2ecf20Sopenharmony_ci	}
34918c2ecf20Sopenharmony_ci
34928c2ecf20Sopenharmony_ci	if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) {
34938c2ecf20Sopenharmony_ci		struct tpacket_auxdata aux;
34948c2ecf20Sopenharmony_ci
34958c2ecf20Sopenharmony_ci		aux.tp_status = TP_STATUS_USER;
34968c2ecf20Sopenharmony_ci		if (skb->ip_summed == CHECKSUM_PARTIAL)
34978c2ecf20Sopenharmony_ci			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
34988c2ecf20Sopenharmony_ci		else if (skb->pkt_type != PACKET_OUTGOING &&
34998c2ecf20Sopenharmony_ci			 skb_csum_unnecessary(skb))
35008c2ecf20Sopenharmony_ci			aux.tp_status |= TP_STATUS_CSUM_VALID;
35018c2ecf20Sopenharmony_ci
35028c2ecf20Sopenharmony_ci		aux.tp_len = origlen;
35038c2ecf20Sopenharmony_ci		aux.tp_snaplen = skb->len;
35048c2ecf20Sopenharmony_ci		aux.tp_mac = 0;
35058c2ecf20Sopenharmony_ci		aux.tp_net = skb_network_offset(skb);
35068c2ecf20Sopenharmony_ci		if (skb_vlan_tag_present(skb)) {
35078c2ecf20Sopenharmony_ci			aux.tp_vlan_tci = skb_vlan_tag_get(skb);
35088c2ecf20Sopenharmony_ci			aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
35098c2ecf20Sopenharmony_ci			aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
35108c2ecf20Sopenharmony_ci		} else {
35118c2ecf20Sopenharmony_ci			aux.tp_vlan_tci = 0;
35128c2ecf20Sopenharmony_ci			aux.tp_vlan_tpid = 0;
35138c2ecf20Sopenharmony_ci		}
35148c2ecf20Sopenharmony_ci		put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
35158c2ecf20Sopenharmony_ci	}
35168c2ecf20Sopenharmony_ci
35178c2ecf20Sopenharmony_ci	/*
35188c2ecf20Sopenharmony_ci	 *	Free or return the buffer as appropriate. Again this
35198c2ecf20Sopenharmony_ci	 *	hides all the races and re-entrancy issues from us.
35208c2ecf20Sopenharmony_ci	 */
35218c2ecf20Sopenharmony_ci	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
35228c2ecf20Sopenharmony_ci
35238c2ecf20Sopenharmony_ciout_free:
35248c2ecf20Sopenharmony_ci	skb_free_datagram(sk, skb);
35258c2ecf20Sopenharmony_ciout:
35268c2ecf20Sopenharmony_ci	return err;
35278c2ecf20Sopenharmony_ci}
35288c2ecf20Sopenharmony_ci
35298c2ecf20Sopenharmony_cistatic int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
35308c2ecf20Sopenharmony_ci			       int peer)
35318c2ecf20Sopenharmony_ci{
35328c2ecf20Sopenharmony_ci	struct net_device *dev;
35338c2ecf20Sopenharmony_ci	struct sock *sk	= sock->sk;
35348c2ecf20Sopenharmony_ci
35358c2ecf20Sopenharmony_ci	if (peer)
35368c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
35378c2ecf20Sopenharmony_ci
35388c2ecf20Sopenharmony_ci	uaddr->sa_family = AF_PACKET;
35398c2ecf20Sopenharmony_ci	memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min));
35408c2ecf20Sopenharmony_ci	rcu_read_lock();
35418c2ecf20Sopenharmony_ci	dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex));
35428c2ecf20Sopenharmony_ci	if (dev)
35438c2ecf20Sopenharmony_ci		strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min));
35448c2ecf20Sopenharmony_ci	rcu_read_unlock();
35458c2ecf20Sopenharmony_ci
35468c2ecf20Sopenharmony_ci	return sizeof(*uaddr);
35478c2ecf20Sopenharmony_ci}
35488c2ecf20Sopenharmony_ci
35498c2ecf20Sopenharmony_cistatic int packet_getname(struct socket *sock, struct sockaddr *uaddr,
35508c2ecf20Sopenharmony_ci			  int peer)
35518c2ecf20Sopenharmony_ci{
35528c2ecf20Sopenharmony_ci	struct net_device *dev;
35538c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
35548c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
35558c2ecf20Sopenharmony_ci	DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
35568c2ecf20Sopenharmony_ci	int ifindex;
35578c2ecf20Sopenharmony_ci
35588c2ecf20Sopenharmony_ci	if (peer)
35598c2ecf20Sopenharmony_ci		return -EOPNOTSUPP;
35608c2ecf20Sopenharmony_ci
35618c2ecf20Sopenharmony_ci	ifindex = READ_ONCE(po->ifindex);
35628c2ecf20Sopenharmony_ci	sll->sll_family = AF_PACKET;
35638c2ecf20Sopenharmony_ci	sll->sll_ifindex = ifindex;
35648c2ecf20Sopenharmony_ci	sll->sll_protocol = READ_ONCE(po->num);
35658c2ecf20Sopenharmony_ci	sll->sll_pkttype = 0;
35668c2ecf20Sopenharmony_ci	rcu_read_lock();
35678c2ecf20Sopenharmony_ci	dev = dev_get_by_index_rcu(sock_net(sk), ifindex);
35688c2ecf20Sopenharmony_ci	if (dev) {
35698c2ecf20Sopenharmony_ci		sll->sll_hatype = dev->type;
35708c2ecf20Sopenharmony_ci		sll->sll_halen = dev->addr_len;
35718c2ecf20Sopenharmony_ci		memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
35728c2ecf20Sopenharmony_ci	} else {
35738c2ecf20Sopenharmony_ci		sll->sll_hatype = 0;	/* Bad: we have no ARPHRD_UNSPEC */
35748c2ecf20Sopenharmony_ci		sll->sll_halen = 0;
35758c2ecf20Sopenharmony_ci	}
35768c2ecf20Sopenharmony_ci	rcu_read_unlock();
35778c2ecf20Sopenharmony_ci
35788c2ecf20Sopenharmony_ci	return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
35798c2ecf20Sopenharmony_ci}
35808c2ecf20Sopenharmony_ci
35818c2ecf20Sopenharmony_cistatic int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
35828c2ecf20Sopenharmony_ci			 int what)
35838c2ecf20Sopenharmony_ci{
35848c2ecf20Sopenharmony_ci	switch (i->type) {
35858c2ecf20Sopenharmony_ci	case PACKET_MR_MULTICAST:
35868c2ecf20Sopenharmony_ci		if (i->alen != dev->addr_len)
35878c2ecf20Sopenharmony_ci			return -EINVAL;
35888c2ecf20Sopenharmony_ci		if (what > 0)
35898c2ecf20Sopenharmony_ci			return dev_mc_add(dev, i->addr);
35908c2ecf20Sopenharmony_ci		else
35918c2ecf20Sopenharmony_ci			return dev_mc_del(dev, i->addr);
35928c2ecf20Sopenharmony_ci		break;
35938c2ecf20Sopenharmony_ci	case PACKET_MR_PROMISC:
35948c2ecf20Sopenharmony_ci		return dev_set_promiscuity(dev, what);
35958c2ecf20Sopenharmony_ci	case PACKET_MR_ALLMULTI:
35968c2ecf20Sopenharmony_ci		return dev_set_allmulti(dev, what);
35978c2ecf20Sopenharmony_ci	case PACKET_MR_UNICAST:
35988c2ecf20Sopenharmony_ci		if (i->alen != dev->addr_len)
35998c2ecf20Sopenharmony_ci			return -EINVAL;
36008c2ecf20Sopenharmony_ci		if (what > 0)
36018c2ecf20Sopenharmony_ci			return dev_uc_add(dev, i->addr);
36028c2ecf20Sopenharmony_ci		else
36038c2ecf20Sopenharmony_ci			return dev_uc_del(dev, i->addr);
36048c2ecf20Sopenharmony_ci		break;
36058c2ecf20Sopenharmony_ci	default:
36068c2ecf20Sopenharmony_ci		break;
36078c2ecf20Sopenharmony_ci	}
36088c2ecf20Sopenharmony_ci	return 0;
36098c2ecf20Sopenharmony_ci}
36108c2ecf20Sopenharmony_ci
36118c2ecf20Sopenharmony_cistatic void packet_dev_mclist_delete(struct net_device *dev,
36128c2ecf20Sopenharmony_ci				     struct packet_mclist **mlp)
36138c2ecf20Sopenharmony_ci{
36148c2ecf20Sopenharmony_ci	struct packet_mclist *ml;
36158c2ecf20Sopenharmony_ci
36168c2ecf20Sopenharmony_ci	while ((ml = *mlp) != NULL) {
36178c2ecf20Sopenharmony_ci		if (ml->ifindex == dev->ifindex) {
36188c2ecf20Sopenharmony_ci			packet_dev_mc(dev, ml, -1);
36198c2ecf20Sopenharmony_ci			*mlp = ml->next;
36208c2ecf20Sopenharmony_ci			kfree(ml);
36218c2ecf20Sopenharmony_ci		} else
36228c2ecf20Sopenharmony_ci			mlp = &ml->next;
36238c2ecf20Sopenharmony_ci	}
36248c2ecf20Sopenharmony_ci}
36258c2ecf20Sopenharmony_ci
36268c2ecf20Sopenharmony_cistatic int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
36278c2ecf20Sopenharmony_ci{
36288c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
36298c2ecf20Sopenharmony_ci	struct packet_mclist *ml, *i;
36308c2ecf20Sopenharmony_ci	struct net_device *dev;
36318c2ecf20Sopenharmony_ci	int err;
36328c2ecf20Sopenharmony_ci
36338c2ecf20Sopenharmony_ci	rtnl_lock();
36348c2ecf20Sopenharmony_ci
36358c2ecf20Sopenharmony_ci	err = -ENODEV;
36368c2ecf20Sopenharmony_ci	dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
36378c2ecf20Sopenharmony_ci	if (!dev)
36388c2ecf20Sopenharmony_ci		goto done;
36398c2ecf20Sopenharmony_ci
36408c2ecf20Sopenharmony_ci	err = -EINVAL;
36418c2ecf20Sopenharmony_ci	if (mreq->mr_alen > dev->addr_len)
36428c2ecf20Sopenharmony_ci		goto done;
36438c2ecf20Sopenharmony_ci
36448c2ecf20Sopenharmony_ci	err = -ENOBUFS;
36458c2ecf20Sopenharmony_ci	i = kmalloc(sizeof(*i), GFP_KERNEL);
36468c2ecf20Sopenharmony_ci	if (i == NULL)
36478c2ecf20Sopenharmony_ci		goto done;
36488c2ecf20Sopenharmony_ci
36498c2ecf20Sopenharmony_ci	err = 0;
36508c2ecf20Sopenharmony_ci	for (ml = po->mclist; ml; ml = ml->next) {
36518c2ecf20Sopenharmony_ci		if (ml->ifindex == mreq->mr_ifindex &&
36528c2ecf20Sopenharmony_ci		    ml->type == mreq->mr_type &&
36538c2ecf20Sopenharmony_ci		    ml->alen == mreq->mr_alen &&
36548c2ecf20Sopenharmony_ci		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
36558c2ecf20Sopenharmony_ci			ml->count++;
36568c2ecf20Sopenharmony_ci			/* Free the new element ... */
36578c2ecf20Sopenharmony_ci			kfree(i);
36588c2ecf20Sopenharmony_ci			goto done;
36598c2ecf20Sopenharmony_ci		}
36608c2ecf20Sopenharmony_ci	}
36618c2ecf20Sopenharmony_ci
36628c2ecf20Sopenharmony_ci	i->type = mreq->mr_type;
36638c2ecf20Sopenharmony_ci	i->ifindex = mreq->mr_ifindex;
36648c2ecf20Sopenharmony_ci	i->alen = mreq->mr_alen;
36658c2ecf20Sopenharmony_ci	memcpy(i->addr, mreq->mr_address, i->alen);
36668c2ecf20Sopenharmony_ci	memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen);
36678c2ecf20Sopenharmony_ci	i->count = 1;
36688c2ecf20Sopenharmony_ci	i->next = po->mclist;
36698c2ecf20Sopenharmony_ci	po->mclist = i;
36708c2ecf20Sopenharmony_ci	err = packet_dev_mc(dev, i, 1);
36718c2ecf20Sopenharmony_ci	if (err) {
36728c2ecf20Sopenharmony_ci		po->mclist = i->next;
36738c2ecf20Sopenharmony_ci		kfree(i);
36748c2ecf20Sopenharmony_ci	}
36758c2ecf20Sopenharmony_ci
36768c2ecf20Sopenharmony_cidone:
36778c2ecf20Sopenharmony_ci	rtnl_unlock();
36788c2ecf20Sopenharmony_ci	return err;
36798c2ecf20Sopenharmony_ci}
36808c2ecf20Sopenharmony_ci
36818c2ecf20Sopenharmony_cistatic int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
36828c2ecf20Sopenharmony_ci{
36838c2ecf20Sopenharmony_ci	struct packet_mclist *ml, **mlp;
36848c2ecf20Sopenharmony_ci
36858c2ecf20Sopenharmony_ci	rtnl_lock();
36868c2ecf20Sopenharmony_ci
36878c2ecf20Sopenharmony_ci	for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
36888c2ecf20Sopenharmony_ci		if (ml->ifindex == mreq->mr_ifindex &&
36898c2ecf20Sopenharmony_ci		    ml->type == mreq->mr_type &&
36908c2ecf20Sopenharmony_ci		    ml->alen == mreq->mr_alen &&
36918c2ecf20Sopenharmony_ci		    memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
36928c2ecf20Sopenharmony_ci			if (--ml->count == 0) {
36938c2ecf20Sopenharmony_ci				struct net_device *dev;
36948c2ecf20Sopenharmony_ci				*mlp = ml->next;
36958c2ecf20Sopenharmony_ci				dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
36968c2ecf20Sopenharmony_ci				if (dev)
36978c2ecf20Sopenharmony_ci					packet_dev_mc(dev, ml, -1);
36988c2ecf20Sopenharmony_ci				kfree(ml);
36998c2ecf20Sopenharmony_ci			}
37008c2ecf20Sopenharmony_ci			break;
37018c2ecf20Sopenharmony_ci		}
37028c2ecf20Sopenharmony_ci	}
37038c2ecf20Sopenharmony_ci	rtnl_unlock();
37048c2ecf20Sopenharmony_ci	return 0;
37058c2ecf20Sopenharmony_ci}
37068c2ecf20Sopenharmony_ci
37078c2ecf20Sopenharmony_cistatic void packet_flush_mclist(struct sock *sk)
37088c2ecf20Sopenharmony_ci{
37098c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
37108c2ecf20Sopenharmony_ci	struct packet_mclist *ml;
37118c2ecf20Sopenharmony_ci
37128c2ecf20Sopenharmony_ci	if (!po->mclist)
37138c2ecf20Sopenharmony_ci		return;
37148c2ecf20Sopenharmony_ci
37158c2ecf20Sopenharmony_ci	rtnl_lock();
37168c2ecf20Sopenharmony_ci	while ((ml = po->mclist) != NULL) {
37178c2ecf20Sopenharmony_ci		struct net_device *dev;
37188c2ecf20Sopenharmony_ci
37198c2ecf20Sopenharmony_ci		po->mclist = ml->next;
37208c2ecf20Sopenharmony_ci		dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
37218c2ecf20Sopenharmony_ci		if (dev != NULL)
37228c2ecf20Sopenharmony_ci			packet_dev_mc(dev, ml, -1);
37238c2ecf20Sopenharmony_ci		kfree(ml);
37248c2ecf20Sopenharmony_ci	}
37258c2ecf20Sopenharmony_ci	rtnl_unlock();
37268c2ecf20Sopenharmony_ci}
37278c2ecf20Sopenharmony_ci
37288c2ecf20Sopenharmony_cistatic int
37298c2ecf20Sopenharmony_cipacket_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval,
37308c2ecf20Sopenharmony_ci		  unsigned int optlen)
37318c2ecf20Sopenharmony_ci{
37328c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
37338c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
37348c2ecf20Sopenharmony_ci	int ret;
37358c2ecf20Sopenharmony_ci
37368c2ecf20Sopenharmony_ci	if (level != SOL_PACKET)
37378c2ecf20Sopenharmony_ci		return -ENOPROTOOPT;
37388c2ecf20Sopenharmony_ci
37398c2ecf20Sopenharmony_ci	switch (optname) {
37408c2ecf20Sopenharmony_ci	case PACKET_ADD_MEMBERSHIP:
37418c2ecf20Sopenharmony_ci	case PACKET_DROP_MEMBERSHIP:
37428c2ecf20Sopenharmony_ci	{
37438c2ecf20Sopenharmony_ci		struct packet_mreq_max mreq;
37448c2ecf20Sopenharmony_ci		int len = optlen;
37458c2ecf20Sopenharmony_ci		memset(&mreq, 0, sizeof(mreq));
37468c2ecf20Sopenharmony_ci		if (len < sizeof(struct packet_mreq))
37478c2ecf20Sopenharmony_ci			return -EINVAL;
37488c2ecf20Sopenharmony_ci		if (len > sizeof(mreq))
37498c2ecf20Sopenharmony_ci			len = sizeof(mreq);
37508c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&mreq, optval, len))
37518c2ecf20Sopenharmony_ci			return -EFAULT;
37528c2ecf20Sopenharmony_ci		if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
37538c2ecf20Sopenharmony_ci			return -EINVAL;
37548c2ecf20Sopenharmony_ci		if (optname == PACKET_ADD_MEMBERSHIP)
37558c2ecf20Sopenharmony_ci			ret = packet_mc_add(sk, &mreq);
37568c2ecf20Sopenharmony_ci		else
37578c2ecf20Sopenharmony_ci			ret = packet_mc_drop(sk, &mreq);
37588c2ecf20Sopenharmony_ci		return ret;
37598c2ecf20Sopenharmony_ci	}
37608c2ecf20Sopenharmony_ci
37618c2ecf20Sopenharmony_ci	case PACKET_RX_RING:
37628c2ecf20Sopenharmony_ci	case PACKET_TX_RING:
37638c2ecf20Sopenharmony_ci	{
37648c2ecf20Sopenharmony_ci		union tpacket_req_u req_u;
37658c2ecf20Sopenharmony_ci		int len;
37668c2ecf20Sopenharmony_ci
37678c2ecf20Sopenharmony_ci		lock_sock(sk);
37688c2ecf20Sopenharmony_ci		switch (po->tp_version) {
37698c2ecf20Sopenharmony_ci		case TPACKET_V1:
37708c2ecf20Sopenharmony_ci		case TPACKET_V2:
37718c2ecf20Sopenharmony_ci			len = sizeof(req_u.req);
37728c2ecf20Sopenharmony_ci			break;
37738c2ecf20Sopenharmony_ci		case TPACKET_V3:
37748c2ecf20Sopenharmony_ci		default:
37758c2ecf20Sopenharmony_ci			len = sizeof(req_u.req3);
37768c2ecf20Sopenharmony_ci			break;
37778c2ecf20Sopenharmony_ci		}
37788c2ecf20Sopenharmony_ci		if (optlen < len) {
37798c2ecf20Sopenharmony_ci			ret = -EINVAL;
37808c2ecf20Sopenharmony_ci		} else {
37818c2ecf20Sopenharmony_ci			if (copy_from_sockptr(&req_u.req, optval, len))
37828c2ecf20Sopenharmony_ci				ret = -EFAULT;
37838c2ecf20Sopenharmony_ci			else
37848c2ecf20Sopenharmony_ci				ret = packet_set_ring(sk, &req_u, 0,
37858c2ecf20Sopenharmony_ci						    optname == PACKET_TX_RING);
37868c2ecf20Sopenharmony_ci		}
37878c2ecf20Sopenharmony_ci		release_sock(sk);
37888c2ecf20Sopenharmony_ci		return ret;
37898c2ecf20Sopenharmony_ci	}
37908c2ecf20Sopenharmony_ci	case PACKET_COPY_THRESH:
37918c2ecf20Sopenharmony_ci	{
37928c2ecf20Sopenharmony_ci		int val;
37938c2ecf20Sopenharmony_ci
37948c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
37958c2ecf20Sopenharmony_ci			return -EINVAL;
37968c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
37978c2ecf20Sopenharmony_ci			return -EFAULT;
37988c2ecf20Sopenharmony_ci
37998c2ecf20Sopenharmony_ci		pkt_sk(sk)->copy_thresh = val;
38008c2ecf20Sopenharmony_ci		return 0;
38018c2ecf20Sopenharmony_ci	}
38028c2ecf20Sopenharmony_ci	case PACKET_VERSION:
38038c2ecf20Sopenharmony_ci	{
38048c2ecf20Sopenharmony_ci		int val;
38058c2ecf20Sopenharmony_ci
38068c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
38078c2ecf20Sopenharmony_ci			return -EINVAL;
38088c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
38098c2ecf20Sopenharmony_ci			return -EFAULT;
38108c2ecf20Sopenharmony_ci		switch (val) {
38118c2ecf20Sopenharmony_ci		case TPACKET_V1:
38128c2ecf20Sopenharmony_ci		case TPACKET_V2:
38138c2ecf20Sopenharmony_ci		case TPACKET_V3:
38148c2ecf20Sopenharmony_ci			break;
38158c2ecf20Sopenharmony_ci		default:
38168c2ecf20Sopenharmony_ci			return -EINVAL;
38178c2ecf20Sopenharmony_ci		}
38188c2ecf20Sopenharmony_ci		lock_sock(sk);
38198c2ecf20Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
38208c2ecf20Sopenharmony_ci			ret = -EBUSY;
38218c2ecf20Sopenharmony_ci		} else {
38228c2ecf20Sopenharmony_ci			po->tp_version = val;
38238c2ecf20Sopenharmony_ci			ret = 0;
38248c2ecf20Sopenharmony_ci		}
38258c2ecf20Sopenharmony_ci		release_sock(sk);
38268c2ecf20Sopenharmony_ci		return ret;
38278c2ecf20Sopenharmony_ci	}
38288c2ecf20Sopenharmony_ci	case PACKET_RESERVE:
38298c2ecf20Sopenharmony_ci	{
38308c2ecf20Sopenharmony_ci		unsigned int val;
38318c2ecf20Sopenharmony_ci
38328c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
38338c2ecf20Sopenharmony_ci			return -EINVAL;
38348c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
38358c2ecf20Sopenharmony_ci			return -EFAULT;
38368c2ecf20Sopenharmony_ci		if (val > INT_MAX)
38378c2ecf20Sopenharmony_ci			return -EINVAL;
38388c2ecf20Sopenharmony_ci		lock_sock(sk);
38398c2ecf20Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
38408c2ecf20Sopenharmony_ci			ret = -EBUSY;
38418c2ecf20Sopenharmony_ci		} else {
38428c2ecf20Sopenharmony_ci			po->tp_reserve = val;
38438c2ecf20Sopenharmony_ci			ret = 0;
38448c2ecf20Sopenharmony_ci		}
38458c2ecf20Sopenharmony_ci		release_sock(sk);
38468c2ecf20Sopenharmony_ci		return ret;
38478c2ecf20Sopenharmony_ci	}
38488c2ecf20Sopenharmony_ci	case PACKET_LOSS:
38498c2ecf20Sopenharmony_ci	{
38508c2ecf20Sopenharmony_ci		unsigned int val;
38518c2ecf20Sopenharmony_ci
38528c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
38538c2ecf20Sopenharmony_ci			return -EINVAL;
38548c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
38558c2ecf20Sopenharmony_ci			return -EFAULT;
38568c2ecf20Sopenharmony_ci
38578c2ecf20Sopenharmony_ci		lock_sock(sk);
38588c2ecf20Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
38598c2ecf20Sopenharmony_ci			ret = -EBUSY;
38608c2ecf20Sopenharmony_ci		} else {
38618c2ecf20Sopenharmony_ci			po->tp_loss = !!val;
38628c2ecf20Sopenharmony_ci			ret = 0;
38638c2ecf20Sopenharmony_ci		}
38648c2ecf20Sopenharmony_ci		release_sock(sk);
38658c2ecf20Sopenharmony_ci		return ret;
38668c2ecf20Sopenharmony_ci	}
38678c2ecf20Sopenharmony_ci	case PACKET_AUXDATA:
38688c2ecf20Sopenharmony_ci	{
38698c2ecf20Sopenharmony_ci		int val;
38708c2ecf20Sopenharmony_ci
38718c2ecf20Sopenharmony_ci		if (optlen < sizeof(val))
38728c2ecf20Sopenharmony_ci			return -EINVAL;
38738c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
38748c2ecf20Sopenharmony_ci			return -EFAULT;
38758c2ecf20Sopenharmony_ci
38768c2ecf20Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val);
38778c2ecf20Sopenharmony_ci		return 0;
38788c2ecf20Sopenharmony_ci	}
38798c2ecf20Sopenharmony_ci	case PACKET_ORIGDEV:
38808c2ecf20Sopenharmony_ci	{
38818c2ecf20Sopenharmony_ci		int val;
38828c2ecf20Sopenharmony_ci
38838c2ecf20Sopenharmony_ci		if (optlen < sizeof(val))
38848c2ecf20Sopenharmony_ci			return -EINVAL;
38858c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
38868c2ecf20Sopenharmony_ci			return -EFAULT;
38878c2ecf20Sopenharmony_ci
38888c2ecf20Sopenharmony_ci		packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val);
38898c2ecf20Sopenharmony_ci		return 0;
38908c2ecf20Sopenharmony_ci	}
38918c2ecf20Sopenharmony_ci	case PACKET_VNET_HDR:
38928c2ecf20Sopenharmony_ci	{
38938c2ecf20Sopenharmony_ci		int val;
38948c2ecf20Sopenharmony_ci
38958c2ecf20Sopenharmony_ci		if (sock->type != SOCK_RAW)
38968c2ecf20Sopenharmony_ci			return -EINVAL;
38978c2ecf20Sopenharmony_ci		if (optlen < sizeof(val))
38988c2ecf20Sopenharmony_ci			return -EINVAL;
38998c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
39008c2ecf20Sopenharmony_ci			return -EFAULT;
39018c2ecf20Sopenharmony_ci
39028c2ecf20Sopenharmony_ci		lock_sock(sk);
39038c2ecf20Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
39048c2ecf20Sopenharmony_ci			ret = -EBUSY;
39058c2ecf20Sopenharmony_ci		} else {
39068c2ecf20Sopenharmony_ci			po->has_vnet_hdr = !!val;
39078c2ecf20Sopenharmony_ci			ret = 0;
39088c2ecf20Sopenharmony_ci		}
39098c2ecf20Sopenharmony_ci		release_sock(sk);
39108c2ecf20Sopenharmony_ci		return ret;
39118c2ecf20Sopenharmony_ci	}
39128c2ecf20Sopenharmony_ci	case PACKET_TIMESTAMP:
39138c2ecf20Sopenharmony_ci	{
39148c2ecf20Sopenharmony_ci		int val;
39158c2ecf20Sopenharmony_ci
39168c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
39178c2ecf20Sopenharmony_ci			return -EINVAL;
39188c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
39198c2ecf20Sopenharmony_ci			return -EFAULT;
39208c2ecf20Sopenharmony_ci
39218c2ecf20Sopenharmony_ci		po->tp_tstamp = val;
39228c2ecf20Sopenharmony_ci		return 0;
39238c2ecf20Sopenharmony_ci	}
39248c2ecf20Sopenharmony_ci	case PACKET_FANOUT:
39258c2ecf20Sopenharmony_ci	{
39268c2ecf20Sopenharmony_ci		struct fanout_args args = { 0 };
39278c2ecf20Sopenharmony_ci
39288c2ecf20Sopenharmony_ci		if (optlen != sizeof(int) && optlen != sizeof(args))
39298c2ecf20Sopenharmony_ci			return -EINVAL;
39308c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&args, optval, optlen))
39318c2ecf20Sopenharmony_ci			return -EFAULT;
39328c2ecf20Sopenharmony_ci
39338c2ecf20Sopenharmony_ci		return fanout_add(sk, &args);
39348c2ecf20Sopenharmony_ci	}
39358c2ecf20Sopenharmony_ci	case PACKET_FANOUT_DATA:
39368c2ecf20Sopenharmony_ci	{
39378c2ecf20Sopenharmony_ci		/* Paired with the WRITE_ONCE() in fanout_add() */
39388c2ecf20Sopenharmony_ci		if (!READ_ONCE(po->fanout))
39398c2ecf20Sopenharmony_ci			return -EINVAL;
39408c2ecf20Sopenharmony_ci
39418c2ecf20Sopenharmony_ci		return fanout_set_data(po, optval, optlen);
39428c2ecf20Sopenharmony_ci	}
39438c2ecf20Sopenharmony_ci	case PACKET_IGNORE_OUTGOING:
39448c2ecf20Sopenharmony_ci	{
39458c2ecf20Sopenharmony_ci		int val;
39468c2ecf20Sopenharmony_ci
39478c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
39488c2ecf20Sopenharmony_ci			return -EINVAL;
39498c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
39508c2ecf20Sopenharmony_ci			return -EFAULT;
39518c2ecf20Sopenharmony_ci		if (val < 0 || val > 1)
39528c2ecf20Sopenharmony_ci			return -EINVAL;
39538c2ecf20Sopenharmony_ci
39548c2ecf20Sopenharmony_ci		WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val);
39558c2ecf20Sopenharmony_ci		return 0;
39568c2ecf20Sopenharmony_ci	}
39578c2ecf20Sopenharmony_ci	case PACKET_TX_HAS_OFF:
39588c2ecf20Sopenharmony_ci	{
39598c2ecf20Sopenharmony_ci		unsigned int val;
39608c2ecf20Sopenharmony_ci
39618c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
39628c2ecf20Sopenharmony_ci			return -EINVAL;
39638c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
39648c2ecf20Sopenharmony_ci			return -EFAULT;
39658c2ecf20Sopenharmony_ci
39668c2ecf20Sopenharmony_ci		lock_sock(sk);
39678c2ecf20Sopenharmony_ci		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
39688c2ecf20Sopenharmony_ci			ret = -EBUSY;
39698c2ecf20Sopenharmony_ci		} else {
39708c2ecf20Sopenharmony_ci			po->tp_tx_has_off = !!val;
39718c2ecf20Sopenharmony_ci			ret = 0;
39728c2ecf20Sopenharmony_ci		}
39738c2ecf20Sopenharmony_ci		release_sock(sk);
39748c2ecf20Sopenharmony_ci		return 0;
39758c2ecf20Sopenharmony_ci	}
39768c2ecf20Sopenharmony_ci	case PACKET_QDISC_BYPASS:
39778c2ecf20Sopenharmony_ci	{
39788c2ecf20Sopenharmony_ci		int val;
39798c2ecf20Sopenharmony_ci
39808c2ecf20Sopenharmony_ci		if (optlen != sizeof(val))
39818c2ecf20Sopenharmony_ci			return -EINVAL;
39828c2ecf20Sopenharmony_ci		if (copy_from_sockptr(&val, optval, sizeof(val)))
39838c2ecf20Sopenharmony_ci			return -EFAULT;
39848c2ecf20Sopenharmony_ci
39858c2ecf20Sopenharmony_ci		/* Paired with all lockless reads of po->xmit */
39868c2ecf20Sopenharmony_ci		WRITE_ONCE(po->xmit, val ? packet_direct_xmit : dev_queue_xmit);
39878c2ecf20Sopenharmony_ci		return 0;
39888c2ecf20Sopenharmony_ci	}
39898c2ecf20Sopenharmony_ci	default:
39908c2ecf20Sopenharmony_ci		return -ENOPROTOOPT;
39918c2ecf20Sopenharmony_ci	}
39928c2ecf20Sopenharmony_ci}
39938c2ecf20Sopenharmony_ci
39948c2ecf20Sopenharmony_cistatic int packet_getsockopt(struct socket *sock, int level, int optname,
39958c2ecf20Sopenharmony_ci			     char __user *optval, int __user *optlen)
39968c2ecf20Sopenharmony_ci{
39978c2ecf20Sopenharmony_ci	int len;
39988c2ecf20Sopenharmony_ci	int val, lv = sizeof(val);
39998c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
40008c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
40018c2ecf20Sopenharmony_ci	void *data = &val;
40028c2ecf20Sopenharmony_ci	union tpacket_stats_u st;
40038c2ecf20Sopenharmony_ci	struct tpacket_rollover_stats rstats;
40048c2ecf20Sopenharmony_ci	int drops;
40058c2ecf20Sopenharmony_ci
40068c2ecf20Sopenharmony_ci	if (level != SOL_PACKET)
40078c2ecf20Sopenharmony_ci		return -ENOPROTOOPT;
40088c2ecf20Sopenharmony_ci
40098c2ecf20Sopenharmony_ci	if (get_user(len, optlen))
40108c2ecf20Sopenharmony_ci		return -EFAULT;
40118c2ecf20Sopenharmony_ci
40128c2ecf20Sopenharmony_ci	if (len < 0)
40138c2ecf20Sopenharmony_ci		return -EINVAL;
40148c2ecf20Sopenharmony_ci
40158c2ecf20Sopenharmony_ci	switch (optname) {
40168c2ecf20Sopenharmony_ci	case PACKET_STATISTICS:
40178c2ecf20Sopenharmony_ci		spin_lock_bh(&sk->sk_receive_queue.lock);
40188c2ecf20Sopenharmony_ci		memcpy(&st, &po->stats, sizeof(st));
40198c2ecf20Sopenharmony_ci		memset(&po->stats, 0, sizeof(po->stats));
40208c2ecf20Sopenharmony_ci		spin_unlock_bh(&sk->sk_receive_queue.lock);
40218c2ecf20Sopenharmony_ci		drops = atomic_xchg(&po->tp_drops, 0);
40228c2ecf20Sopenharmony_ci
40238c2ecf20Sopenharmony_ci		if (po->tp_version == TPACKET_V3) {
40248c2ecf20Sopenharmony_ci			lv = sizeof(struct tpacket_stats_v3);
40258c2ecf20Sopenharmony_ci			st.stats3.tp_drops = drops;
40268c2ecf20Sopenharmony_ci			st.stats3.tp_packets += drops;
40278c2ecf20Sopenharmony_ci			data = &st.stats3;
40288c2ecf20Sopenharmony_ci		} else {
40298c2ecf20Sopenharmony_ci			lv = sizeof(struct tpacket_stats);
40308c2ecf20Sopenharmony_ci			st.stats1.tp_drops = drops;
40318c2ecf20Sopenharmony_ci			st.stats1.tp_packets += drops;
40328c2ecf20Sopenharmony_ci			data = &st.stats1;
40338c2ecf20Sopenharmony_ci		}
40348c2ecf20Sopenharmony_ci
40358c2ecf20Sopenharmony_ci		break;
40368c2ecf20Sopenharmony_ci	case PACKET_AUXDATA:
40378c2ecf20Sopenharmony_ci		val = packet_sock_flag(po, PACKET_SOCK_AUXDATA);
40388c2ecf20Sopenharmony_ci		break;
40398c2ecf20Sopenharmony_ci	case PACKET_ORIGDEV:
40408c2ecf20Sopenharmony_ci		val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV);
40418c2ecf20Sopenharmony_ci		break;
40428c2ecf20Sopenharmony_ci	case PACKET_VNET_HDR:
40438c2ecf20Sopenharmony_ci		val = po->has_vnet_hdr;
40448c2ecf20Sopenharmony_ci		break;
40458c2ecf20Sopenharmony_ci	case PACKET_VERSION:
40468c2ecf20Sopenharmony_ci		val = po->tp_version;
40478c2ecf20Sopenharmony_ci		break;
40488c2ecf20Sopenharmony_ci	case PACKET_HDRLEN:
40498c2ecf20Sopenharmony_ci		if (len > sizeof(int))
40508c2ecf20Sopenharmony_ci			len = sizeof(int);
40518c2ecf20Sopenharmony_ci		if (len < sizeof(int))
40528c2ecf20Sopenharmony_ci			return -EINVAL;
40538c2ecf20Sopenharmony_ci		if (copy_from_user(&val, optval, len))
40548c2ecf20Sopenharmony_ci			return -EFAULT;
40558c2ecf20Sopenharmony_ci		switch (val) {
40568c2ecf20Sopenharmony_ci		case TPACKET_V1:
40578c2ecf20Sopenharmony_ci			val = sizeof(struct tpacket_hdr);
40588c2ecf20Sopenharmony_ci			break;
40598c2ecf20Sopenharmony_ci		case TPACKET_V2:
40608c2ecf20Sopenharmony_ci			val = sizeof(struct tpacket2_hdr);
40618c2ecf20Sopenharmony_ci			break;
40628c2ecf20Sopenharmony_ci		case TPACKET_V3:
40638c2ecf20Sopenharmony_ci			val = sizeof(struct tpacket3_hdr);
40648c2ecf20Sopenharmony_ci			break;
40658c2ecf20Sopenharmony_ci		default:
40668c2ecf20Sopenharmony_ci			return -EINVAL;
40678c2ecf20Sopenharmony_ci		}
40688c2ecf20Sopenharmony_ci		break;
40698c2ecf20Sopenharmony_ci	case PACKET_RESERVE:
40708c2ecf20Sopenharmony_ci		val = po->tp_reserve;
40718c2ecf20Sopenharmony_ci		break;
40728c2ecf20Sopenharmony_ci	case PACKET_LOSS:
40738c2ecf20Sopenharmony_ci		val = po->tp_loss;
40748c2ecf20Sopenharmony_ci		break;
40758c2ecf20Sopenharmony_ci	case PACKET_TIMESTAMP:
40768c2ecf20Sopenharmony_ci		val = po->tp_tstamp;
40778c2ecf20Sopenharmony_ci		break;
40788c2ecf20Sopenharmony_ci	case PACKET_FANOUT:
40798c2ecf20Sopenharmony_ci		val = (po->fanout ?
40808c2ecf20Sopenharmony_ci		       ((u32)po->fanout->id |
40818c2ecf20Sopenharmony_ci			((u32)po->fanout->type << 16) |
40828c2ecf20Sopenharmony_ci			((u32)po->fanout->flags << 24)) :
40838c2ecf20Sopenharmony_ci		       0);
40848c2ecf20Sopenharmony_ci		break;
40858c2ecf20Sopenharmony_ci	case PACKET_IGNORE_OUTGOING:
40868c2ecf20Sopenharmony_ci		val = READ_ONCE(po->prot_hook.ignore_outgoing);
40878c2ecf20Sopenharmony_ci		break;
40888c2ecf20Sopenharmony_ci	case PACKET_ROLLOVER_STATS:
40898c2ecf20Sopenharmony_ci		if (!po->rollover)
40908c2ecf20Sopenharmony_ci			return -EINVAL;
40918c2ecf20Sopenharmony_ci		rstats.tp_all = atomic_long_read(&po->rollover->num);
40928c2ecf20Sopenharmony_ci		rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
40938c2ecf20Sopenharmony_ci		rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
40948c2ecf20Sopenharmony_ci		data = &rstats;
40958c2ecf20Sopenharmony_ci		lv = sizeof(rstats);
40968c2ecf20Sopenharmony_ci		break;
40978c2ecf20Sopenharmony_ci	case PACKET_TX_HAS_OFF:
40988c2ecf20Sopenharmony_ci		val = po->tp_tx_has_off;
40998c2ecf20Sopenharmony_ci		break;
41008c2ecf20Sopenharmony_ci	case PACKET_QDISC_BYPASS:
41018c2ecf20Sopenharmony_ci		val = packet_use_direct_xmit(po);
41028c2ecf20Sopenharmony_ci		break;
41038c2ecf20Sopenharmony_ci	default:
41048c2ecf20Sopenharmony_ci		return -ENOPROTOOPT;
41058c2ecf20Sopenharmony_ci	}
41068c2ecf20Sopenharmony_ci
41078c2ecf20Sopenharmony_ci	if (len > lv)
41088c2ecf20Sopenharmony_ci		len = lv;
41098c2ecf20Sopenharmony_ci	if (put_user(len, optlen))
41108c2ecf20Sopenharmony_ci		return -EFAULT;
41118c2ecf20Sopenharmony_ci	if (copy_to_user(optval, data, len))
41128c2ecf20Sopenharmony_ci		return -EFAULT;
41138c2ecf20Sopenharmony_ci	return 0;
41148c2ecf20Sopenharmony_ci}
41158c2ecf20Sopenharmony_ci
41168c2ecf20Sopenharmony_cistatic int packet_notifier(struct notifier_block *this,
41178c2ecf20Sopenharmony_ci			   unsigned long msg, void *ptr)
41188c2ecf20Sopenharmony_ci{
41198c2ecf20Sopenharmony_ci	struct sock *sk;
41208c2ecf20Sopenharmony_ci	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
41218c2ecf20Sopenharmony_ci	struct net *net = dev_net(dev);
41228c2ecf20Sopenharmony_ci
41238c2ecf20Sopenharmony_ci	rcu_read_lock();
41248c2ecf20Sopenharmony_ci	sk_for_each_rcu(sk, &net->packet.sklist) {
41258c2ecf20Sopenharmony_ci		struct packet_sock *po = pkt_sk(sk);
41268c2ecf20Sopenharmony_ci
41278c2ecf20Sopenharmony_ci		switch (msg) {
41288c2ecf20Sopenharmony_ci		case NETDEV_UNREGISTER:
41298c2ecf20Sopenharmony_ci			if (po->mclist)
41308c2ecf20Sopenharmony_ci				packet_dev_mclist_delete(dev, &po->mclist);
41318c2ecf20Sopenharmony_ci			fallthrough;
41328c2ecf20Sopenharmony_ci
41338c2ecf20Sopenharmony_ci		case NETDEV_DOWN:
41348c2ecf20Sopenharmony_ci			if (dev->ifindex == po->ifindex) {
41358c2ecf20Sopenharmony_ci				spin_lock(&po->bind_lock);
41368c2ecf20Sopenharmony_ci				if (po->running) {
41378c2ecf20Sopenharmony_ci					__unregister_prot_hook(sk, false);
41388c2ecf20Sopenharmony_ci					sk->sk_err = ENETDOWN;
41398c2ecf20Sopenharmony_ci					if (!sock_flag(sk, SOCK_DEAD))
41408c2ecf20Sopenharmony_ci						sk->sk_error_report(sk);
41418c2ecf20Sopenharmony_ci				}
41428c2ecf20Sopenharmony_ci				if (msg == NETDEV_UNREGISTER) {
41438c2ecf20Sopenharmony_ci					packet_cached_dev_reset(po);
41448c2ecf20Sopenharmony_ci					WRITE_ONCE(po->ifindex, -1);
41458c2ecf20Sopenharmony_ci					if (po->prot_hook.dev)
41468c2ecf20Sopenharmony_ci						dev_put(po->prot_hook.dev);
41478c2ecf20Sopenharmony_ci					po->prot_hook.dev = NULL;
41488c2ecf20Sopenharmony_ci				}
41498c2ecf20Sopenharmony_ci				spin_unlock(&po->bind_lock);
41508c2ecf20Sopenharmony_ci			}
41518c2ecf20Sopenharmony_ci			break;
41528c2ecf20Sopenharmony_ci		case NETDEV_UP:
41538c2ecf20Sopenharmony_ci			if (dev->ifindex == po->ifindex) {
41548c2ecf20Sopenharmony_ci				spin_lock(&po->bind_lock);
41558c2ecf20Sopenharmony_ci				if (po->num)
41568c2ecf20Sopenharmony_ci					register_prot_hook(sk);
41578c2ecf20Sopenharmony_ci				spin_unlock(&po->bind_lock);
41588c2ecf20Sopenharmony_ci			}
41598c2ecf20Sopenharmony_ci			break;
41608c2ecf20Sopenharmony_ci		}
41618c2ecf20Sopenharmony_ci	}
41628c2ecf20Sopenharmony_ci	rcu_read_unlock();
41638c2ecf20Sopenharmony_ci	return NOTIFY_DONE;
41648c2ecf20Sopenharmony_ci}
41658c2ecf20Sopenharmony_ci
41668c2ecf20Sopenharmony_ci
41678c2ecf20Sopenharmony_cistatic int packet_ioctl(struct socket *sock, unsigned int cmd,
41688c2ecf20Sopenharmony_ci			unsigned long arg)
41698c2ecf20Sopenharmony_ci{
41708c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
41718c2ecf20Sopenharmony_ci
41728c2ecf20Sopenharmony_ci	switch (cmd) {
41738c2ecf20Sopenharmony_ci	case SIOCOUTQ:
41748c2ecf20Sopenharmony_ci	{
41758c2ecf20Sopenharmony_ci		int amount = sk_wmem_alloc_get(sk);
41768c2ecf20Sopenharmony_ci
41778c2ecf20Sopenharmony_ci		return put_user(amount, (int __user *)arg);
41788c2ecf20Sopenharmony_ci	}
41798c2ecf20Sopenharmony_ci	case SIOCINQ:
41808c2ecf20Sopenharmony_ci	{
41818c2ecf20Sopenharmony_ci		struct sk_buff *skb;
41828c2ecf20Sopenharmony_ci		int amount = 0;
41838c2ecf20Sopenharmony_ci
41848c2ecf20Sopenharmony_ci		spin_lock_bh(&sk->sk_receive_queue.lock);
41858c2ecf20Sopenharmony_ci		skb = skb_peek(&sk->sk_receive_queue);
41868c2ecf20Sopenharmony_ci		if (skb)
41878c2ecf20Sopenharmony_ci			amount = skb->len;
41888c2ecf20Sopenharmony_ci		spin_unlock_bh(&sk->sk_receive_queue.lock);
41898c2ecf20Sopenharmony_ci		return put_user(amount, (int __user *)arg);
41908c2ecf20Sopenharmony_ci	}
41918c2ecf20Sopenharmony_ci#ifdef CONFIG_INET
41928c2ecf20Sopenharmony_ci	case SIOCADDRT:
41938c2ecf20Sopenharmony_ci	case SIOCDELRT:
41948c2ecf20Sopenharmony_ci	case SIOCDARP:
41958c2ecf20Sopenharmony_ci	case SIOCGARP:
41968c2ecf20Sopenharmony_ci	case SIOCSARP:
41978c2ecf20Sopenharmony_ci	case SIOCGIFADDR:
41988c2ecf20Sopenharmony_ci	case SIOCSIFADDR:
41998c2ecf20Sopenharmony_ci	case SIOCGIFBRDADDR:
42008c2ecf20Sopenharmony_ci	case SIOCSIFBRDADDR:
42018c2ecf20Sopenharmony_ci	case SIOCGIFNETMASK:
42028c2ecf20Sopenharmony_ci	case SIOCSIFNETMASK:
42038c2ecf20Sopenharmony_ci	case SIOCGIFDSTADDR:
42048c2ecf20Sopenharmony_ci	case SIOCSIFDSTADDR:
42058c2ecf20Sopenharmony_ci	case SIOCSIFFLAGS:
42068c2ecf20Sopenharmony_ci		return inet_dgram_ops.ioctl(sock, cmd, arg);
42078c2ecf20Sopenharmony_ci#endif
42088c2ecf20Sopenharmony_ci
42098c2ecf20Sopenharmony_ci	default:
42108c2ecf20Sopenharmony_ci		return -ENOIOCTLCMD;
42118c2ecf20Sopenharmony_ci	}
42128c2ecf20Sopenharmony_ci	return 0;
42138c2ecf20Sopenharmony_ci}
42148c2ecf20Sopenharmony_ci
42158c2ecf20Sopenharmony_cistatic __poll_t packet_poll(struct file *file, struct socket *sock,
42168c2ecf20Sopenharmony_ci				poll_table *wait)
42178c2ecf20Sopenharmony_ci{
42188c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
42198c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
42208c2ecf20Sopenharmony_ci	__poll_t mask = datagram_poll(file, sock, wait);
42218c2ecf20Sopenharmony_ci
42228c2ecf20Sopenharmony_ci	spin_lock_bh(&sk->sk_receive_queue.lock);
42238c2ecf20Sopenharmony_ci	if (po->rx_ring.pg_vec) {
42248c2ecf20Sopenharmony_ci		if (!packet_previous_rx_frame(po, &po->rx_ring,
42258c2ecf20Sopenharmony_ci			TP_STATUS_KERNEL))
42268c2ecf20Sopenharmony_ci			mask |= EPOLLIN | EPOLLRDNORM;
42278c2ecf20Sopenharmony_ci	}
42288c2ecf20Sopenharmony_ci	packet_rcv_try_clear_pressure(po);
42298c2ecf20Sopenharmony_ci	spin_unlock_bh(&sk->sk_receive_queue.lock);
42308c2ecf20Sopenharmony_ci	spin_lock_bh(&sk->sk_write_queue.lock);
42318c2ecf20Sopenharmony_ci	if (po->tx_ring.pg_vec) {
42328c2ecf20Sopenharmony_ci		if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
42338c2ecf20Sopenharmony_ci			mask |= EPOLLOUT | EPOLLWRNORM;
42348c2ecf20Sopenharmony_ci	}
42358c2ecf20Sopenharmony_ci	spin_unlock_bh(&sk->sk_write_queue.lock);
42368c2ecf20Sopenharmony_ci	return mask;
42378c2ecf20Sopenharmony_ci}
42388c2ecf20Sopenharmony_ci
42398c2ecf20Sopenharmony_ci
42408c2ecf20Sopenharmony_ci/* Dirty? Well, I still did not learn better way to account
42418c2ecf20Sopenharmony_ci * for user mmaps.
42428c2ecf20Sopenharmony_ci */
42438c2ecf20Sopenharmony_ci
42448c2ecf20Sopenharmony_cistatic void packet_mm_open(struct vm_area_struct *vma)
42458c2ecf20Sopenharmony_ci{
42468c2ecf20Sopenharmony_ci	struct file *file = vma->vm_file;
42478c2ecf20Sopenharmony_ci	struct socket *sock = file->private_data;
42488c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
42498c2ecf20Sopenharmony_ci
42508c2ecf20Sopenharmony_ci	if (sk)
42518c2ecf20Sopenharmony_ci		atomic_long_inc(&pkt_sk(sk)->mapped);
42528c2ecf20Sopenharmony_ci}
42538c2ecf20Sopenharmony_ci
42548c2ecf20Sopenharmony_cistatic void packet_mm_close(struct vm_area_struct *vma)
42558c2ecf20Sopenharmony_ci{
42568c2ecf20Sopenharmony_ci	struct file *file = vma->vm_file;
42578c2ecf20Sopenharmony_ci	struct socket *sock = file->private_data;
42588c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
42598c2ecf20Sopenharmony_ci
42608c2ecf20Sopenharmony_ci	if (sk)
42618c2ecf20Sopenharmony_ci		atomic_long_dec(&pkt_sk(sk)->mapped);
42628c2ecf20Sopenharmony_ci}
42638c2ecf20Sopenharmony_ci
42648c2ecf20Sopenharmony_cistatic const struct vm_operations_struct packet_mmap_ops = {
42658c2ecf20Sopenharmony_ci	.open	=	packet_mm_open,
42668c2ecf20Sopenharmony_ci	.close	=	packet_mm_close,
42678c2ecf20Sopenharmony_ci};
42688c2ecf20Sopenharmony_ci
42698c2ecf20Sopenharmony_cistatic void free_pg_vec(struct pgv *pg_vec, unsigned int order,
42708c2ecf20Sopenharmony_ci			unsigned int len)
42718c2ecf20Sopenharmony_ci{
42728c2ecf20Sopenharmony_ci	int i;
42738c2ecf20Sopenharmony_ci
42748c2ecf20Sopenharmony_ci	for (i = 0; i < len; i++) {
42758c2ecf20Sopenharmony_ci		if (likely(pg_vec[i].buffer)) {
42768c2ecf20Sopenharmony_ci			if (is_vmalloc_addr(pg_vec[i].buffer))
42778c2ecf20Sopenharmony_ci				vfree(pg_vec[i].buffer);
42788c2ecf20Sopenharmony_ci			else
42798c2ecf20Sopenharmony_ci				free_pages((unsigned long)pg_vec[i].buffer,
42808c2ecf20Sopenharmony_ci					   order);
42818c2ecf20Sopenharmony_ci			pg_vec[i].buffer = NULL;
42828c2ecf20Sopenharmony_ci		}
42838c2ecf20Sopenharmony_ci	}
42848c2ecf20Sopenharmony_ci	kfree(pg_vec);
42858c2ecf20Sopenharmony_ci}
42868c2ecf20Sopenharmony_ci
42878c2ecf20Sopenharmony_cistatic char *alloc_one_pg_vec_page(unsigned long order)
42888c2ecf20Sopenharmony_ci{
42898c2ecf20Sopenharmony_ci	char *buffer;
42908c2ecf20Sopenharmony_ci	gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
42918c2ecf20Sopenharmony_ci			  __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
42928c2ecf20Sopenharmony_ci
42938c2ecf20Sopenharmony_ci	buffer = (char *) __get_free_pages(gfp_flags, order);
42948c2ecf20Sopenharmony_ci	if (buffer)
42958c2ecf20Sopenharmony_ci		return buffer;
42968c2ecf20Sopenharmony_ci
42978c2ecf20Sopenharmony_ci	/* __get_free_pages failed, fall back to vmalloc */
42988c2ecf20Sopenharmony_ci	buffer = vzalloc(array_size((1 << order), PAGE_SIZE));
42998c2ecf20Sopenharmony_ci	if (buffer)
43008c2ecf20Sopenharmony_ci		return buffer;
43018c2ecf20Sopenharmony_ci
43028c2ecf20Sopenharmony_ci	/* vmalloc failed, lets dig into swap here */
43038c2ecf20Sopenharmony_ci	gfp_flags &= ~__GFP_NORETRY;
43048c2ecf20Sopenharmony_ci	buffer = (char *) __get_free_pages(gfp_flags, order);
43058c2ecf20Sopenharmony_ci	if (buffer)
43068c2ecf20Sopenharmony_ci		return buffer;
43078c2ecf20Sopenharmony_ci
43088c2ecf20Sopenharmony_ci	/* complete and utter failure */
43098c2ecf20Sopenharmony_ci	return NULL;
43108c2ecf20Sopenharmony_ci}
43118c2ecf20Sopenharmony_ci
43128c2ecf20Sopenharmony_cistatic struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
43138c2ecf20Sopenharmony_ci{
43148c2ecf20Sopenharmony_ci	unsigned int block_nr = req->tp_block_nr;
43158c2ecf20Sopenharmony_ci	struct pgv *pg_vec;
43168c2ecf20Sopenharmony_ci	int i;
43178c2ecf20Sopenharmony_ci
43188c2ecf20Sopenharmony_ci	pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN);
43198c2ecf20Sopenharmony_ci	if (unlikely(!pg_vec))
43208c2ecf20Sopenharmony_ci		goto out;
43218c2ecf20Sopenharmony_ci
43228c2ecf20Sopenharmony_ci	for (i = 0; i < block_nr; i++) {
43238c2ecf20Sopenharmony_ci		pg_vec[i].buffer = alloc_one_pg_vec_page(order);
43248c2ecf20Sopenharmony_ci		if (unlikely(!pg_vec[i].buffer))
43258c2ecf20Sopenharmony_ci			goto out_free_pgvec;
43268c2ecf20Sopenharmony_ci	}
43278c2ecf20Sopenharmony_ci
43288c2ecf20Sopenharmony_ciout:
43298c2ecf20Sopenharmony_ci	return pg_vec;
43308c2ecf20Sopenharmony_ci
43318c2ecf20Sopenharmony_ciout_free_pgvec:
43328c2ecf20Sopenharmony_ci	free_pg_vec(pg_vec, order, block_nr);
43338c2ecf20Sopenharmony_ci	pg_vec = NULL;
43348c2ecf20Sopenharmony_ci	goto out;
43358c2ecf20Sopenharmony_ci}
43368c2ecf20Sopenharmony_ci
43378c2ecf20Sopenharmony_cistatic int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
43388c2ecf20Sopenharmony_ci		int closing, int tx_ring)
43398c2ecf20Sopenharmony_ci{
43408c2ecf20Sopenharmony_ci	struct pgv *pg_vec = NULL;
43418c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
43428c2ecf20Sopenharmony_ci	unsigned long *rx_owner_map = NULL;
43438c2ecf20Sopenharmony_ci	int was_running, order = 0;
43448c2ecf20Sopenharmony_ci	struct packet_ring_buffer *rb;
43458c2ecf20Sopenharmony_ci	struct sk_buff_head *rb_queue;
43468c2ecf20Sopenharmony_ci	__be16 num;
43478c2ecf20Sopenharmony_ci	int err;
43488c2ecf20Sopenharmony_ci	/* Added to avoid minimal code churn */
43498c2ecf20Sopenharmony_ci	struct tpacket_req *req = &req_u->req;
43508c2ecf20Sopenharmony_ci
43518c2ecf20Sopenharmony_ci	rb = tx_ring ? &po->tx_ring : &po->rx_ring;
43528c2ecf20Sopenharmony_ci	rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
43538c2ecf20Sopenharmony_ci
43548c2ecf20Sopenharmony_ci	err = -EBUSY;
43558c2ecf20Sopenharmony_ci	if (!closing) {
43568c2ecf20Sopenharmony_ci		if (atomic_long_read(&po->mapped))
43578c2ecf20Sopenharmony_ci			goto out;
43588c2ecf20Sopenharmony_ci		if (packet_read_pending(rb))
43598c2ecf20Sopenharmony_ci			goto out;
43608c2ecf20Sopenharmony_ci	}
43618c2ecf20Sopenharmony_ci
43628c2ecf20Sopenharmony_ci	if (req->tp_block_nr) {
43638c2ecf20Sopenharmony_ci		unsigned int min_frame_size;
43648c2ecf20Sopenharmony_ci
43658c2ecf20Sopenharmony_ci		/* Sanity tests and some calculations */
43668c2ecf20Sopenharmony_ci		err = -EBUSY;
43678c2ecf20Sopenharmony_ci		if (unlikely(rb->pg_vec))
43688c2ecf20Sopenharmony_ci			goto out;
43698c2ecf20Sopenharmony_ci
43708c2ecf20Sopenharmony_ci		switch (po->tp_version) {
43718c2ecf20Sopenharmony_ci		case TPACKET_V1:
43728c2ecf20Sopenharmony_ci			po->tp_hdrlen = TPACKET_HDRLEN;
43738c2ecf20Sopenharmony_ci			break;
43748c2ecf20Sopenharmony_ci		case TPACKET_V2:
43758c2ecf20Sopenharmony_ci			po->tp_hdrlen = TPACKET2_HDRLEN;
43768c2ecf20Sopenharmony_ci			break;
43778c2ecf20Sopenharmony_ci		case TPACKET_V3:
43788c2ecf20Sopenharmony_ci			po->tp_hdrlen = TPACKET3_HDRLEN;
43798c2ecf20Sopenharmony_ci			break;
43808c2ecf20Sopenharmony_ci		}
43818c2ecf20Sopenharmony_ci
43828c2ecf20Sopenharmony_ci		err = -EINVAL;
43838c2ecf20Sopenharmony_ci		if (unlikely((int)req->tp_block_size <= 0))
43848c2ecf20Sopenharmony_ci			goto out;
43858c2ecf20Sopenharmony_ci		if (unlikely(!PAGE_ALIGNED(req->tp_block_size)))
43868c2ecf20Sopenharmony_ci			goto out;
43878c2ecf20Sopenharmony_ci		min_frame_size = po->tp_hdrlen + po->tp_reserve;
43888c2ecf20Sopenharmony_ci		if (po->tp_version >= TPACKET_V3 &&
43898c2ecf20Sopenharmony_ci		    req->tp_block_size <
43908c2ecf20Sopenharmony_ci		    BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size)
43918c2ecf20Sopenharmony_ci			goto out;
43928c2ecf20Sopenharmony_ci		if (unlikely(req->tp_frame_size < min_frame_size))
43938c2ecf20Sopenharmony_ci			goto out;
43948c2ecf20Sopenharmony_ci		if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
43958c2ecf20Sopenharmony_ci			goto out;
43968c2ecf20Sopenharmony_ci
43978c2ecf20Sopenharmony_ci		rb->frames_per_block = req->tp_block_size / req->tp_frame_size;
43988c2ecf20Sopenharmony_ci		if (unlikely(rb->frames_per_block == 0))
43998c2ecf20Sopenharmony_ci			goto out;
44008c2ecf20Sopenharmony_ci		if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr))
44018c2ecf20Sopenharmony_ci			goto out;
44028c2ecf20Sopenharmony_ci		if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
44038c2ecf20Sopenharmony_ci					req->tp_frame_nr))
44048c2ecf20Sopenharmony_ci			goto out;
44058c2ecf20Sopenharmony_ci
44068c2ecf20Sopenharmony_ci		err = -ENOMEM;
44078c2ecf20Sopenharmony_ci		order = get_order(req->tp_block_size);
44088c2ecf20Sopenharmony_ci		pg_vec = alloc_pg_vec(req, order);
44098c2ecf20Sopenharmony_ci		if (unlikely(!pg_vec))
44108c2ecf20Sopenharmony_ci			goto out;
44118c2ecf20Sopenharmony_ci		switch (po->tp_version) {
44128c2ecf20Sopenharmony_ci		case TPACKET_V3:
44138c2ecf20Sopenharmony_ci			/* Block transmit is not supported yet */
44148c2ecf20Sopenharmony_ci			if (!tx_ring) {
44158c2ecf20Sopenharmony_ci				init_prb_bdqc(po, rb, pg_vec, req_u);
44168c2ecf20Sopenharmony_ci			} else {
44178c2ecf20Sopenharmony_ci				struct tpacket_req3 *req3 = &req_u->req3;
44188c2ecf20Sopenharmony_ci
44198c2ecf20Sopenharmony_ci				if (req3->tp_retire_blk_tov ||
44208c2ecf20Sopenharmony_ci				    req3->tp_sizeof_priv ||
44218c2ecf20Sopenharmony_ci				    req3->tp_feature_req_word) {
44228c2ecf20Sopenharmony_ci					err = -EINVAL;
44238c2ecf20Sopenharmony_ci					goto out_free_pg_vec;
44248c2ecf20Sopenharmony_ci				}
44258c2ecf20Sopenharmony_ci			}
44268c2ecf20Sopenharmony_ci			break;
44278c2ecf20Sopenharmony_ci		default:
44288c2ecf20Sopenharmony_ci			if (!tx_ring) {
44298c2ecf20Sopenharmony_ci				rx_owner_map = bitmap_alloc(req->tp_frame_nr,
44308c2ecf20Sopenharmony_ci					GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
44318c2ecf20Sopenharmony_ci				if (!rx_owner_map)
44328c2ecf20Sopenharmony_ci					goto out_free_pg_vec;
44338c2ecf20Sopenharmony_ci			}
44348c2ecf20Sopenharmony_ci			break;
44358c2ecf20Sopenharmony_ci		}
44368c2ecf20Sopenharmony_ci	}
44378c2ecf20Sopenharmony_ci	/* Done */
44388c2ecf20Sopenharmony_ci	else {
44398c2ecf20Sopenharmony_ci		err = -EINVAL;
44408c2ecf20Sopenharmony_ci		if (unlikely(req->tp_frame_nr))
44418c2ecf20Sopenharmony_ci			goto out;
44428c2ecf20Sopenharmony_ci	}
44438c2ecf20Sopenharmony_ci
44448c2ecf20Sopenharmony_ci
44458c2ecf20Sopenharmony_ci	/* Detach socket from network */
44468c2ecf20Sopenharmony_ci	spin_lock(&po->bind_lock);
44478c2ecf20Sopenharmony_ci	was_running = po->running;
44488c2ecf20Sopenharmony_ci	num = po->num;
44498c2ecf20Sopenharmony_ci	if (was_running) {
44508c2ecf20Sopenharmony_ci		WRITE_ONCE(po->num, 0);
44518c2ecf20Sopenharmony_ci		__unregister_prot_hook(sk, false);
44528c2ecf20Sopenharmony_ci	}
44538c2ecf20Sopenharmony_ci	spin_unlock(&po->bind_lock);
44548c2ecf20Sopenharmony_ci
44558c2ecf20Sopenharmony_ci	synchronize_net();
44568c2ecf20Sopenharmony_ci
44578c2ecf20Sopenharmony_ci	err = -EBUSY;
44588c2ecf20Sopenharmony_ci	mutex_lock(&po->pg_vec_lock);
44598c2ecf20Sopenharmony_ci	if (closing || atomic_long_read(&po->mapped) == 0) {
44608c2ecf20Sopenharmony_ci		err = 0;
44618c2ecf20Sopenharmony_ci		spin_lock_bh(&rb_queue->lock);
44628c2ecf20Sopenharmony_ci		swap(rb->pg_vec, pg_vec);
44638c2ecf20Sopenharmony_ci		if (po->tp_version <= TPACKET_V2)
44648c2ecf20Sopenharmony_ci			swap(rb->rx_owner_map, rx_owner_map);
44658c2ecf20Sopenharmony_ci		rb->frame_max = (req->tp_frame_nr - 1);
44668c2ecf20Sopenharmony_ci		rb->head = 0;
44678c2ecf20Sopenharmony_ci		rb->frame_size = req->tp_frame_size;
44688c2ecf20Sopenharmony_ci		spin_unlock_bh(&rb_queue->lock);
44698c2ecf20Sopenharmony_ci
44708c2ecf20Sopenharmony_ci		swap(rb->pg_vec_order, order);
44718c2ecf20Sopenharmony_ci		swap(rb->pg_vec_len, req->tp_block_nr);
44728c2ecf20Sopenharmony_ci
44738c2ecf20Sopenharmony_ci		rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
44748c2ecf20Sopenharmony_ci		po->prot_hook.func = (po->rx_ring.pg_vec) ?
44758c2ecf20Sopenharmony_ci						tpacket_rcv : packet_rcv;
44768c2ecf20Sopenharmony_ci		skb_queue_purge(rb_queue);
44778c2ecf20Sopenharmony_ci		if (atomic_long_read(&po->mapped))
44788c2ecf20Sopenharmony_ci			pr_err("packet_mmap: vma is busy: %ld\n",
44798c2ecf20Sopenharmony_ci			       atomic_long_read(&po->mapped));
44808c2ecf20Sopenharmony_ci	}
44818c2ecf20Sopenharmony_ci	mutex_unlock(&po->pg_vec_lock);
44828c2ecf20Sopenharmony_ci
44838c2ecf20Sopenharmony_ci	spin_lock(&po->bind_lock);
44848c2ecf20Sopenharmony_ci	if (was_running) {
44858c2ecf20Sopenharmony_ci		WRITE_ONCE(po->num, num);
44868c2ecf20Sopenharmony_ci		register_prot_hook(sk);
44878c2ecf20Sopenharmony_ci	}
44888c2ecf20Sopenharmony_ci	spin_unlock(&po->bind_lock);
44898c2ecf20Sopenharmony_ci	if (pg_vec && (po->tp_version > TPACKET_V2)) {
44908c2ecf20Sopenharmony_ci		/* Because we don't support block-based V3 on tx-ring */
44918c2ecf20Sopenharmony_ci		if (!tx_ring)
44928c2ecf20Sopenharmony_ci			prb_shutdown_retire_blk_timer(po, rb_queue);
44938c2ecf20Sopenharmony_ci	}
44948c2ecf20Sopenharmony_ci
44958c2ecf20Sopenharmony_ciout_free_pg_vec:
44968c2ecf20Sopenharmony_ci	if (pg_vec) {
44978c2ecf20Sopenharmony_ci		bitmap_free(rx_owner_map);
44988c2ecf20Sopenharmony_ci		free_pg_vec(pg_vec, order, req->tp_block_nr);
44998c2ecf20Sopenharmony_ci	}
45008c2ecf20Sopenharmony_ciout:
45018c2ecf20Sopenharmony_ci	return err;
45028c2ecf20Sopenharmony_ci}
45038c2ecf20Sopenharmony_ci
45048c2ecf20Sopenharmony_cistatic int packet_mmap(struct file *file, struct socket *sock,
45058c2ecf20Sopenharmony_ci		struct vm_area_struct *vma)
45068c2ecf20Sopenharmony_ci{
45078c2ecf20Sopenharmony_ci	struct sock *sk = sock->sk;
45088c2ecf20Sopenharmony_ci	struct packet_sock *po = pkt_sk(sk);
45098c2ecf20Sopenharmony_ci	unsigned long size, expected_size;
45108c2ecf20Sopenharmony_ci	struct packet_ring_buffer *rb;
45118c2ecf20Sopenharmony_ci	unsigned long start;
45128c2ecf20Sopenharmony_ci	int err = -EINVAL;
45138c2ecf20Sopenharmony_ci	int i;
45148c2ecf20Sopenharmony_ci
45158c2ecf20Sopenharmony_ci	if (vma->vm_pgoff)
45168c2ecf20Sopenharmony_ci		return -EINVAL;
45178c2ecf20Sopenharmony_ci
45188c2ecf20Sopenharmony_ci	mutex_lock(&po->pg_vec_lock);
45198c2ecf20Sopenharmony_ci
45208c2ecf20Sopenharmony_ci	expected_size = 0;
45218c2ecf20Sopenharmony_ci	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
45228c2ecf20Sopenharmony_ci		if (rb->pg_vec) {
45238c2ecf20Sopenharmony_ci			expected_size += rb->pg_vec_len
45248c2ecf20Sopenharmony_ci						* rb->pg_vec_pages
45258c2ecf20Sopenharmony_ci						* PAGE_SIZE;
45268c2ecf20Sopenharmony_ci		}
45278c2ecf20Sopenharmony_ci	}
45288c2ecf20Sopenharmony_ci
45298c2ecf20Sopenharmony_ci	if (expected_size == 0)
45308c2ecf20Sopenharmony_ci		goto out;
45318c2ecf20Sopenharmony_ci
45328c2ecf20Sopenharmony_ci	size = vma->vm_end - vma->vm_start;
45338c2ecf20Sopenharmony_ci	if (size != expected_size)
45348c2ecf20Sopenharmony_ci		goto out;
45358c2ecf20Sopenharmony_ci
45368c2ecf20Sopenharmony_ci	start = vma->vm_start;
45378c2ecf20Sopenharmony_ci	for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
45388c2ecf20Sopenharmony_ci		if (rb->pg_vec == NULL)
45398c2ecf20Sopenharmony_ci			continue;
45408c2ecf20Sopenharmony_ci
45418c2ecf20Sopenharmony_ci		for (i = 0; i < rb->pg_vec_len; i++) {
45428c2ecf20Sopenharmony_ci			struct page *page;
45438c2ecf20Sopenharmony_ci			void *kaddr = rb->pg_vec[i].buffer;
45448c2ecf20Sopenharmony_ci			int pg_num;
45458c2ecf20Sopenharmony_ci
45468c2ecf20Sopenharmony_ci			for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) {
45478c2ecf20Sopenharmony_ci				page = pgv_to_page(kaddr);
45488c2ecf20Sopenharmony_ci				err = vm_insert_page(vma, start, page);
45498c2ecf20Sopenharmony_ci				if (unlikely(err))
45508c2ecf20Sopenharmony_ci					goto out;
45518c2ecf20Sopenharmony_ci				start += PAGE_SIZE;
45528c2ecf20Sopenharmony_ci				kaddr += PAGE_SIZE;
45538c2ecf20Sopenharmony_ci			}
45548c2ecf20Sopenharmony_ci		}
45558c2ecf20Sopenharmony_ci	}
45568c2ecf20Sopenharmony_ci
45578c2ecf20Sopenharmony_ci	atomic_long_inc(&po->mapped);
45588c2ecf20Sopenharmony_ci	vma->vm_ops = &packet_mmap_ops;
45598c2ecf20Sopenharmony_ci	err = 0;
45608c2ecf20Sopenharmony_ci
45618c2ecf20Sopenharmony_ciout:
45628c2ecf20Sopenharmony_ci	mutex_unlock(&po->pg_vec_lock);
45638c2ecf20Sopenharmony_ci	return err;
45648c2ecf20Sopenharmony_ci}
45658c2ecf20Sopenharmony_ci
45668c2ecf20Sopenharmony_cistatic const struct proto_ops packet_ops_spkt = {
45678c2ecf20Sopenharmony_ci	.family =	PF_PACKET,
45688c2ecf20Sopenharmony_ci	.owner =	THIS_MODULE,
45698c2ecf20Sopenharmony_ci	.release =	packet_release,
45708c2ecf20Sopenharmony_ci	.bind =		packet_bind_spkt,
45718c2ecf20Sopenharmony_ci	.connect =	sock_no_connect,
45728c2ecf20Sopenharmony_ci	.socketpair =	sock_no_socketpair,
45738c2ecf20Sopenharmony_ci	.accept =	sock_no_accept,
45748c2ecf20Sopenharmony_ci	.getname =	packet_getname_spkt,
45758c2ecf20Sopenharmony_ci	.poll =		datagram_poll,
45768c2ecf20Sopenharmony_ci	.ioctl =	packet_ioctl,
45778c2ecf20Sopenharmony_ci	.gettstamp =	sock_gettstamp,
45788c2ecf20Sopenharmony_ci	.listen =	sock_no_listen,
45798c2ecf20Sopenharmony_ci	.shutdown =	sock_no_shutdown,
45808c2ecf20Sopenharmony_ci	.sendmsg =	packet_sendmsg_spkt,
45818c2ecf20Sopenharmony_ci	.recvmsg =	packet_recvmsg,
45828c2ecf20Sopenharmony_ci	.mmap =		sock_no_mmap,
45838c2ecf20Sopenharmony_ci	.sendpage =	sock_no_sendpage,
45848c2ecf20Sopenharmony_ci};
45858c2ecf20Sopenharmony_ci
45868c2ecf20Sopenharmony_cistatic const struct proto_ops packet_ops = {
45878c2ecf20Sopenharmony_ci	.family =	PF_PACKET,
45888c2ecf20Sopenharmony_ci	.owner =	THIS_MODULE,
45898c2ecf20Sopenharmony_ci	.release =	packet_release,
45908c2ecf20Sopenharmony_ci	.bind =		packet_bind,
45918c2ecf20Sopenharmony_ci	.connect =	sock_no_connect,
45928c2ecf20Sopenharmony_ci	.socketpair =	sock_no_socketpair,
45938c2ecf20Sopenharmony_ci	.accept =	sock_no_accept,
45948c2ecf20Sopenharmony_ci	.getname =	packet_getname,
45958c2ecf20Sopenharmony_ci	.poll =		packet_poll,
45968c2ecf20Sopenharmony_ci	.ioctl =	packet_ioctl,
45978c2ecf20Sopenharmony_ci	.gettstamp =	sock_gettstamp,
45988c2ecf20Sopenharmony_ci	.listen =	sock_no_listen,
45998c2ecf20Sopenharmony_ci	.shutdown =	sock_no_shutdown,
46008c2ecf20Sopenharmony_ci	.setsockopt =	packet_setsockopt,
46018c2ecf20Sopenharmony_ci	.getsockopt =	packet_getsockopt,
46028c2ecf20Sopenharmony_ci	.sendmsg =	packet_sendmsg,
46038c2ecf20Sopenharmony_ci	.recvmsg =	packet_recvmsg,
46048c2ecf20Sopenharmony_ci	.mmap =		packet_mmap,
46058c2ecf20Sopenharmony_ci	.sendpage =	sock_no_sendpage,
46068c2ecf20Sopenharmony_ci};
46078c2ecf20Sopenharmony_ci
46088c2ecf20Sopenharmony_cistatic const struct net_proto_family packet_family_ops = {
46098c2ecf20Sopenharmony_ci	.family =	PF_PACKET,
46108c2ecf20Sopenharmony_ci	.create =	packet_create,
46118c2ecf20Sopenharmony_ci	.owner	=	THIS_MODULE,
46128c2ecf20Sopenharmony_ci};
46138c2ecf20Sopenharmony_ci
46148c2ecf20Sopenharmony_cistatic struct notifier_block packet_netdev_notifier = {
46158c2ecf20Sopenharmony_ci	.notifier_call =	packet_notifier,
46168c2ecf20Sopenharmony_ci};
46178c2ecf20Sopenharmony_ci
46188c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS
46198c2ecf20Sopenharmony_ci
46208c2ecf20Sopenharmony_cistatic void *packet_seq_start(struct seq_file *seq, loff_t *pos)
46218c2ecf20Sopenharmony_ci	__acquires(RCU)
46228c2ecf20Sopenharmony_ci{
46238c2ecf20Sopenharmony_ci	struct net *net = seq_file_net(seq);
46248c2ecf20Sopenharmony_ci
46258c2ecf20Sopenharmony_ci	rcu_read_lock();
46268c2ecf20Sopenharmony_ci	return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
46278c2ecf20Sopenharmony_ci}
46288c2ecf20Sopenharmony_ci
46298c2ecf20Sopenharmony_cistatic void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
46308c2ecf20Sopenharmony_ci{
46318c2ecf20Sopenharmony_ci	struct net *net = seq_file_net(seq);
46328c2ecf20Sopenharmony_ci	return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
46338c2ecf20Sopenharmony_ci}
46348c2ecf20Sopenharmony_ci
46358c2ecf20Sopenharmony_cistatic void packet_seq_stop(struct seq_file *seq, void *v)
46368c2ecf20Sopenharmony_ci	__releases(RCU)
46378c2ecf20Sopenharmony_ci{
46388c2ecf20Sopenharmony_ci	rcu_read_unlock();
46398c2ecf20Sopenharmony_ci}
46408c2ecf20Sopenharmony_ci
46418c2ecf20Sopenharmony_cistatic int packet_seq_show(struct seq_file *seq, void *v)
46428c2ecf20Sopenharmony_ci{
46438c2ecf20Sopenharmony_ci	if (v == SEQ_START_TOKEN)
46448c2ecf20Sopenharmony_ci		seq_puts(seq, "sk       RefCnt Type Proto  Iface R Rmem   User   Inode\n");
46458c2ecf20Sopenharmony_ci	else {
46468c2ecf20Sopenharmony_ci		struct sock *s = sk_entry(v);
46478c2ecf20Sopenharmony_ci		const struct packet_sock *po = pkt_sk(s);
46488c2ecf20Sopenharmony_ci
46498c2ecf20Sopenharmony_ci		seq_printf(seq,
46508c2ecf20Sopenharmony_ci			   "%pK %-6d %-4d %04x   %-5d %1d %-6u %-6u %-6lu\n",
46518c2ecf20Sopenharmony_ci			   s,
46528c2ecf20Sopenharmony_ci			   refcount_read(&s->sk_refcnt),
46538c2ecf20Sopenharmony_ci			   s->sk_type,
46548c2ecf20Sopenharmony_ci			   ntohs(READ_ONCE(po->num)),
46558c2ecf20Sopenharmony_ci			   READ_ONCE(po->ifindex),
46568c2ecf20Sopenharmony_ci			   po->running,
46578c2ecf20Sopenharmony_ci			   atomic_read(&s->sk_rmem_alloc),
46588c2ecf20Sopenharmony_ci			   from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)),
46598c2ecf20Sopenharmony_ci			   sock_i_ino(s));
46608c2ecf20Sopenharmony_ci	}
46618c2ecf20Sopenharmony_ci
46628c2ecf20Sopenharmony_ci	return 0;
46638c2ecf20Sopenharmony_ci}
46648c2ecf20Sopenharmony_ci
46658c2ecf20Sopenharmony_cistatic const struct seq_operations packet_seq_ops = {
46668c2ecf20Sopenharmony_ci	.start	= packet_seq_start,
46678c2ecf20Sopenharmony_ci	.next	= packet_seq_next,
46688c2ecf20Sopenharmony_ci	.stop	= packet_seq_stop,
46698c2ecf20Sopenharmony_ci	.show	= packet_seq_show,
46708c2ecf20Sopenharmony_ci};
46718c2ecf20Sopenharmony_ci#endif
46728c2ecf20Sopenharmony_ci
46738c2ecf20Sopenharmony_cistatic int __net_init packet_net_init(struct net *net)
46748c2ecf20Sopenharmony_ci{
46758c2ecf20Sopenharmony_ci	mutex_init(&net->packet.sklist_lock);
46768c2ecf20Sopenharmony_ci	INIT_HLIST_HEAD(&net->packet.sklist);
46778c2ecf20Sopenharmony_ci
46788c2ecf20Sopenharmony_ci#ifdef CONFIG_PROC_FS
46798c2ecf20Sopenharmony_ci	if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops,
46808c2ecf20Sopenharmony_ci			sizeof(struct seq_net_private)))
46818c2ecf20Sopenharmony_ci		return -ENOMEM;
46828c2ecf20Sopenharmony_ci#endif /* CONFIG_PROC_FS */
46838c2ecf20Sopenharmony_ci
46848c2ecf20Sopenharmony_ci	return 0;
46858c2ecf20Sopenharmony_ci}
46868c2ecf20Sopenharmony_ci
46878c2ecf20Sopenharmony_cistatic void __net_exit packet_net_exit(struct net *net)
46888c2ecf20Sopenharmony_ci{
46898c2ecf20Sopenharmony_ci	remove_proc_entry("packet", net->proc_net);
46908c2ecf20Sopenharmony_ci	WARN_ON_ONCE(!hlist_empty(&net->packet.sklist));
46918c2ecf20Sopenharmony_ci}
46928c2ecf20Sopenharmony_ci
46938c2ecf20Sopenharmony_cistatic struct pernet_operations packet_net_ops = {
46948c2ecf20Sopenharmony_ci	.init = packet_net_init,
46958c2ecf20Sopenharmony_ci	.exit = packet_net_exit,
46968c2ecf20Sopenharmony_ci};
46978c2ecf20Sopenharmony_ci
46988c2ecf20Sopenharmony_ci
46998c2ecf20Sopenharmony_cistatic void __exit packet_exit(void)
47008c2ecf20Sopenharmony_ci{
47018c2ecf20Sopenharmony_ci	unregister_netdevice_notifier(&packet_netdev_notifier);
47028c2ecf20Sopenharmony_ci	unregister_pernet_subsys(&packet_net_ops);
47038c2ecf20Sopenharmony_ci	sock_unregister(PF_PACKET);
47048c2ecf20Sopenharmony_ci	proto_unregister(&packet_proto);
47058c2ecf20Sopenharmony_ci}
47068c2ecf20Sopenharmony_ci
47078c2ecf20Sopenharmony_cistatic int __init packet_init(void)
47088c2ecf20Sopenharmony_ci{
47098c2ecf20Sopenharmony_ci	int rc;
47108c2ecf20Sopenharmony_ci
47118c2ecf20Sopenharmony_ci	rc = proto_register(&packet_proto, 0);
47128c2ecf20Sopenharmony_ci	if (rc)
47138c2ecf20Sopenharmony_ci		goto out;
47148c2ecf20Sopenharmony_ci	rc = sock_register(&packet_family_ops);
47158c2ecf20Sopenharmony_ci	if (rc)
47168c2ecf20Sopenharmony_ci		goto out_proto;
47178c2ecf20Sopenharmony_ci	rc = register_pernet_subsys(&packet_net_ops);
47188c2ecf20Sopenharmony_ci	if (rc)
47198c2ecf20Sopenharmony_ci		goto out_sock;
47208c2ecf20Sopenharmony_ci	rc = register_netdevice_notifier(&packet_netdev_notifier);
47218c2ecf20Sopenharmony_ci	if (rc)
47228c2ecf20Sopenharmony_ci		goto out_pernet;
47238c2ecf20Sopenharmony_ci
47248c2ecf20Sopenharmony_ci	return 0;
47258c2ecf20Sopenharmony_ci
47268c2ecf20Sopenharmony_ciout_pernet:
47278c2ecf20Sopenharmony_ci	unregister_pernet_subsys(&packet_net_ops);
47288c2ecf20Sopenharmony_ciout_sock:
47298c2ecf20Sopenharmony_ci	sock_unregister(PF_PACKET);
47308c2ecf20Sopenharmony_ciout_proto:
47318c2ecf20Sopenharmony_ci	proto_unregister(&packet_proto);
47328c2ecf20Sopenharmony_ciout:
47338c2ecf20Sopenharmony_ci	return rc;
47348c2ecf20Sopenharmony_ci}
47358c2ecf20Sopenharmony_ci
47368c2ecf20Sopenharmony_cimodule_init(packet_init);
47378c2ecf20Sopenharmony_cimodule_exit(packet_exit);
47388c2ecf20Sopenharmony_ciMODULE_LICENSE("GPL");
47398c2ecf20Sopenharmony_ciMODULE_ALIAS_NETPROTO(PF_PACKET);
4740