162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-or-later 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * INET An implementation of the TCP/IP protocol suite for the LINUX 462306a36Sopenharmony_ci * operating system. INET is implemented using the BSD Socket 562306a36Sopenharmony_ci * interface as the means of communication with the user level. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * PACKET - implements raw packet sockets. 862306a36Sopenharmony_ci * 962306a36Sopenharmony_ci * Authors: Ross Biro 1062306a36Sopenharmony_ci * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> 1162306a36Sopenharmony_ci * Alan Cox, <gw4pts@gw4pts.ampr.org> 1262306a36Sopenharmony_ci * 1362306a36Sopenharmony_ci * Fixes: 1462306a36Sopenharmony_ci * Alan Cox : verify_area() now used correctly 1562306a36Sopenharmony_ci * Alan Cox : new skbuff lists, look ma no backlogs! 1662306a36Sopenharmony_ci * Alan Cox : tidied skbuff lists. 1762306a36Sopenharmony_ci * Alan Cox : Now uses generic datagram routines I 1862306a36Sopenharmony_ci * added. Also fixed the peek/read crash 1962306a36Sopenharmony_ci * from all old Linux datagram code. 2062306a36Sopenharmony_ci * Alan Cox : Uses the improved datagram code. 2162306a36Sopenharmony_ci * Alan Cox : Added NULL's for socket options. 2262306a36Sopenharmony_ci * Alan Cox : Re-commented the code. 2362306a36Sopenharmony_ci * Alan Cox : Use new kernel side addressing 2462306a36Sopenharmony_ci * Rob Janssen : Correct MTU usage. 2562306a36Sopenharmony_ci * Dave Platt : Counter leaks caused by incorrect 2662306a36Sopenharmony_ci * interrupt locking and some slightly 2762306a36Sopenharmony_ci * dubious gcc output. Can you read 2862306a36Sopenharmony_ci * compiler: it said _VOLATILE_ 2962306a36Sopenharmony_ci * Richard Kooijman : Timestamp fixes. 3062306a36Sopenharmony_ci * Alan Cox : New buffers. Use sk->mac.raw. 3162306a36Sopenharmony_ci * Alan Cox : sendmsg/recvmsg support. 3262306a36Sopenharmony_ci * Alan Cox : Protocol setting support 3362306a36Sopenharmony_ci * Alexey Kuznetsov : Untied from IPv4 stack. 3462306a36Sopenharmony_ci * Cyrus Durgin : Fixed kerneld for kmod. 3562306a36Sopenharmony_ci * Michal Ostrowski : Module initialization cleanup. 3662306a36Sopenharmony_ci * Ulises Alonso : Frame number limit removal and 3762306a36Sopenharmony_ci * packet_set_ring memory leak. 3862306a36Sopenharmony_ci * Eric Biederman : Allow for > 8 byte hardware addresses. 3962306a36Sopenharmony_ci * The convention is that longer addresses 4062306a36Sopenharmony_ci * will simply extend the hardware address 4162306a36Sopenharmony_ci * byte arrays at the end of sockaddr_ll 4262306a36Sopenharmony_ci * and packet_mreq. 4362306a36Sopenharmony_ci * Johann Baudy : Added TX RING. 4462306a36Sopenharmony_ci * Chetan Loke : Implemented TPACKET_V3 block abstraction 4562306a36Sopenharmony_ci * layer. 4662306a36Sopenharmony_ci * Copyright (C) 2011, <lokec@ccs.neu.edu> 4762306a36Sopenharmony_ci */ 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_ci#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci#include <linux/ethtool.h> 5262306a36Sopenharmony_ci#include <linux/filter.h> 5362306a36Sopenharmony_ci#include <linux/types.h> 5462306a36Sopenharmony_ci#include <linux/mm.h> 5562306a36Sopenharmony_ci#include <linux/capability.h> 5662306a36Sopenharmony_ci#include <linux/fcntl.h> 5762306a36Sopenharmony_ci#include <linux/socket.h> 5862306a36Sopenharmony_ci#include <linux/in.h> 5962306a36Sopenharmony_ci#include <linux/inet.h> 6062306a36Sopenharmony_ci#include <linux/netdevice.h> 6162306a36Sopenharmony_ci#include <linux/if_packet.h> 6262306a36Sopenharmony_ci#include <linux/wireless.h> 6362306a36Sopenharmony_ci#include <linux/kernel.h> 6462306a36Sopenharmony_ci#include <linux/kmod.h> 6562306a36Sopenharmony_ci#include <linux/slab.h> 6662306a36Sopenharmony_ci#include <linux/vmalloc.h> 6762306a36Sopenharmony_ci#include <net/net_namespace.h> 6862306a36Sopenharmony_ci#include <net/ip.h> 6962306a36Sopenharmony_ci#include <net/protocol.h> 7062306a36Sopenharmony_ci#include <linux/skbuff.h> 7162306a36Sopenharmony_ci#include <net/sock.h> 7262306a36Sopenharmony_ci#include <linux/errno.h> 7362306a36Sopenharmony_ci#include <linux/timer.h> 7462306a36Sopenharmony_ci#include <linux/uaccess.h> 7562306a36Sopenharmony_ci#include <asm/ioctls.h> 7662306a36Sopenharmony_ci#include <asm/page.h> 7762306a36Sopenharmony_ci#include <asm/cacheflush.h> 7862306a36Sopenharmony_ci#include <asm/io.h> 7962306a36Sopenharmony_ci#include <linux/proc_fs.h> 8062306a36Sopenharmony_ci#include <linux/seq_file.h> 8162306a36Sopenharmony_ci#include <linux/poll.h> 8262306a36Sopenharmony_ci#include <linux/module.h> 8362306a36Sopenharmony_ci#include <linux/init.h> 8462306a36Sopenharmony_ci#include <linux/mutex.h> 8562306a36Sopenharmony_ci#include <linux/if_vlan.h> 8662306a36Sopenharmony_ci#include <linux/virtio_net.h> 8762306a36Sopenharmony_ci#include <linux/errqueue.h> 8862306a36Sopenharmony_ci#include <linux/net_tstamp.h> 8962306a36Sopenharmony_ci#include <linux/percpu.h> 9062306a36Sopenharmony_ci#ifdef CONFIG_INET 9162306a36Sopenharmony_ci#include <net/inet_common.h> 9262306a36Sopenharmony_ci#endif 9362306a36Sopenharmony_ci#include <linux/bpf.h> 9462306a36Sopenharmony_ci#include <net/compat.h> 9562306a36Sopenharmony_ci#include <linux/netfilter_netdev.h> 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci#include "internal.h" 9862306a36Sopenharmony_ci 9962306a36Sopenharmony_ci/* 10062306a36Sopenharmony_ci Assumptions: 10162306a36Sopenharmony_ci - If the device has no dev->header_ops->create, there is no LL header 10262306a36Sopenharmony_ci visible above the device. In this case, its hard_header_len should be 0. 10362306a36Sopenharmony_ci The device may prepend its own header internally. In this case, its 10462306a36Sopenharmony_ci needed_headroom should be set to the space needed for it to add its 10562306a36Sopenharmony_ci internal header. 10662306a36Sopenharmony_ci For example, a WiFi driver pretending to be an Ethernet driver should 10762306a36Sopenharmony_ci set its hard_header_len to be the Ethernet header length, and set its 10862306a36Sopenharmony_ci needed_headroom to be (the real WiFi header length - the fake Ethernet 10962306a36Sopenharmony_ci header length). 11062306a36Sopenharmony_ci - packet socket receives packets with pulled ll header, 11162306a36Sopenharmony_ci so that SOCK_RAW should push it back. 11262306a36Sopenharmony_ci 11362306a36Sopenharmony_ciOn receive: 11462306a36Sopenharmony_ci----------- 11562306a36Sopenharmony_ci 11662306a36Sopenharmony_ciIncoming, dev_has_header(dev) == true 11762306a36Sopenharmony_ci mac_header -> ll header 11862306a36Sopenharmony_ci data -> data 11962306a36Sopenharmony_ci 12062306a36Sopenharmony_ciOutgoing, dev_has_header(dev) == true 12162306a36Sopenharmony_ci mac_header -> ll header 12262306a36Sopenharmony_ci data -> ll header 12362306a36Sopenharmony_ci 12462306a36Sopenharmony_ciIncoming, dev_has_header(dev) == false 12562306a36Sopenharmony_ci mac_header -> data 12662306a36Sopenharmony_ci However drivers often make it point to the ll header. 12762306a36Sopenharmony_ci This is incorrect because the ll header should be invisible to us. 12862306a36Sopenharmony_ci data -> data 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ciOutgoing, dev_has_header(dev) == false 13162306a36Sopenharmony_ci mac_header -> data. ll header is invisible to us. 13262306a36Sopenharmony_ci data -> data 13362306a36Sopenharmony_ci 13462306a36Sopenharmony_ciResume 13562306a36Sopenharmony_ci If dev_has_header(dev) == false we are unable to restore the ll header, 13662306a36Sopenharmony_ci because it is invisible to us. 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ciOn transmit: 14062306a36Sopenharmony_ci------------ 14162306a36Sopenharmony_ci 14262306a36Sopenharmony_cidev_has_header(dev) == true 14362306a36Sopenharmony_ci mac_header -> ll header 14462306a36Sopenharmony_ci data -> ll header 14562306a36Sopenharmony_ci 14662306a36Sopenharmony_cidev_has_header(dev) == false (ll header is invisible to us) 14762306a36Sopenharmony_ci mac_header -> data 14862306a36Sopenharmony_ci data -> data 14962306a36Sopenharmony_ci 15062306a36Sopenharmony_ci We should set network_header on output to the correct position, 15162306a36Sopenharmony_ci packet classifier depends on it. 15262306a36Sopenharmony_ci */ 15362306a36Sopenharmony_ci 15462306a36Sopenharmony_ci/* Private packet socket structures. */ 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_ci/* identical to struct packet_mreq except it has 15762306a36Sopenharmony_ci * a longer address field. 15862306a36Sopenharmony_ci */ 15962306a36Sopenharmony_cistruct packet_mreq_max { 16062306a36Sopenharmony_ci int mr_ifindex; 16162306a36Sopenharmony_ci unsigned short mr_type; 16262306a36Sopenharmony_ci unsigned short mr_alen; 16362306a36Sopenharmony_ci unsigned char mr_address[MAX_ADDR_LEN]; 16462306a36Sopenharmony_ci}; 16562306a36Sopenharmony_ci 16662306a36Sopenharmony_ciunion tpacket_uhdr { 16762306a36Sopenharmony_ci struct tpacket_hdr *h1; 16862306a36Sopenharmony_ci struct tpacket2_hdr *h2; 16962306a36Sopenharmony_ci struct tpacket3_hdr *h3; 17062306a36Sopenharmony_ci void *raw; 17162306a36Sopenharmony_ci}; 17262306a36Sopenharmony_ci 17362306a36Sopenharmony_cistatic int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, 17462306a36Sopenharmony_ci int closing, int tx_ring); 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci#define V3_ALIGNMENT (8) 17762306a36Sopenharmony_ci 17862306a36Sopenharmony_ci#define BLK_HDR_LEN (ALIGN(sizeof(struct tpacket_block_desc), V3_ALIGNMENT)) 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_ci#define BLK_PLUS_PRIV(sz_of_priv) \ 18162306a36Sopenharmony_ci (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) 18262306a36Sopenharmony_ci 18362306a36Sopenharmony_ci#define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) 18462306a36Sopenharmony_ci#define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) 18562306a36Sopenharmony_ci#define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) 18662306a36Sopenharmony_ci#define BLOCK_LEN(x) ((x)->hdr.bh1.blk_len) 18762306a36Sopenharmony_ci#define BLOCK_SNUM(x) ((x)->hdr.bh1.seq_num) 18862306a36Sopenharmony_ci#define BLOCK_O2PRIV(x) ((x)->offset_to_priv) 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_cistruct packet_sock; 19162306a36Sopenharmony_cistatic int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, 19262306a36Sopenharmony_ci struct packet_type *pt, struct net_device *orig_dev); 19362306a36Sopenharmony_ci 19462306a36Sopenharmony_cistatic void *packet_previous_frame(struct packet_sock *po, 19562306a36Sopenharmony_ci struct packet_ring_buffer *rb, 19662306a36Sopenharmony_ci int status); 19762306a36Sopenharmony_cistatic void packet_increment_head(struct packet_ring_buffer *buff); 19862306a36Sopenharmony_cistatic int prb_curr_blk_in_use(struct tpacket_block_desc *); 19962306a36Sopenharmony_cistatic void *prb_dispatch_next_block(struct tpacket_kbdq_core *, 20062306a36Sopenharmony_ci struct packet_sock *); 20162306a36Sopenharmony_cistatic void prb_retire_current_block(struct tpacket_kbdq_core *, 20262306a36Sopenharmony_ci struct packet_sock *, unsigned int status); 20362306a36Sopenharmony_cistatic int prb_queue_frozen(struct tpacket_kbdq_core *); 20462306a36Sopenharmony_cistatic void prb_open_block(struct tpacket_kbdq_core *, 20562306a36Sopenharmony_ci struct tpacket_block_desc *); 20662306a36Sopenharmony_cistatic void prb_retire_rx_blk_timer_expired(struct timer_list *); 20762306a36Sopenharmony_cistatic void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); 20862306a36Sopenharmony_cistatic void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); 20962306a36Sopenharmony_cistatic void prb_clear_rxhash(struct tpacket_kbdq_core *, 21062306a36Sopenharmony_ci struct tpacket3_hdr *); 21162306a36Sopenharmony_cistatic void prb_fill_vlan_info(struct tpacket_kbdq_core *, 21262306a36Sopenharmony_ci struct tpacket3_hdr *); 21362306a36Sopenharmony_cistatic void packet_flush_mclist(struct sock *sk); 21462306a36Sopenharmony_cistatic u16 packet_pick_tx_queue(struct sk_buff *skb); 21562306a36Sopenharmony_ci 21662306a36Sopenharmony_cistruct packet_skb_cb { 21762306a36Sopenharmony_ci union { 21862306a36Sopenharmony_ci struct sockaddr_pkt pkt; 21962306a36Sopenharmony_ci union { 22062306a36Sopenharmony_ci /* Trick: alias skb original length with 22162306a36Sopenharmony_ci * ll.sll_family and ll.protocol in order 22262306a36Sopenharmony_ci * to save room. 22362306a36Sopenharmony_ci */ 22462306a36Sopenharmony_ci unsigned int origlen; 22562306a36Sopenharmony_ci struct sockaddr_ll ll; 22662306a36Sopenharmony_ci }; 22762306a36Sopenharmony_ci } sa; 22862306a36Sopenharmony_ci}; 22962306a36Sopenharmony_ci 23062306a36Sopenharmony_ci#define vio_le() virtio_legacy_is_little_endian() 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb)) 23362306a36Sopenharmony_ci 23462306a36Sopenharmony_ci#define GET_PBDQC_FROM_RB(x) ((struct tpacket_kbdq_core *)(&(x)->prb_bdqc)) 23562306a36Sopenharmony_ci#define GET_PBLOCK_DESC(x, bid) \ 23662306a36Sopenharmony_ci ((struct tpacket_block_desc *)((x)->pkbdq[(bid)].buffer)) 23762306a36Sopenharmony_ci#define GET_CURR_PBLOCK_DESC_FROM_CORE(x) \ 23862306a36Sopenharmony_ci ((struct tpacket_block_desc *)((x)->pkbdq[(x)->kactive_blk_num].buffer)) 23962306a36Sopenharmony_ci#define GET_NEXT_PRB_BLK_NUM(x) \ 24062306a36Sopenharmony_ci (((x)->kactive_blk_num < ((x)->knum_blocks-1)) ? \ 24162306a36Sopenharmony_ci ((x)->kactive_blk_num+1) : 0) 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_cistatic void __fanout_unlink(struct sock *sk, struct packet_sock *po); 24462306a36Sopenharmony_cistatic void __fanout_link(struct sock *sk, struct packet_sock *po); 24562306a36Sopenharmony_ci 24662306a36Sopenharmony_ci#ifdef CONFIG_NETFILTER_EGRESS 24762306a36Sopenharmony_cistatic noinline struct sk_buff *nf_hook_direct_egress(struct sk_buff *skb) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci struct sk_buff *next, *head = NULL, *tail; 25062306a36Sopenharmony_ci int rc; 25162306a36Sopenharmony_ci 25262306a36Sopenharmony_ci rcu_read_lock(); 25362306a36Sopenharmony_ci for (; skb != NULL; skb = next) { 25462306a36Sopenharmony_ci next = skb->next; 25562306a36Sopenharmony_ci skb_mark_not_on_list(skb); 25662306a36Sopenharmony_ci 25762306a36Sopenharmony_ci if (!nf_hook_egress(skb, &rc, skb->dev)) 25862306a36Sopenharmony_ci continue; 25962306a36Sopenharmony_ci 26062306a36Sopenharmony_ci if (!head) 26162306a36Sopenharmony_ci head = skb; 26262306a36Sopenharmony_ci else 26362306a36Sopenharmony_ci tail->next = skb; 26462306a36Sopenharmony_ci 26562306a36Sopenharmony_ci tail = skb; 26662306a36Sopenharmony_ci } 26762306a36Sopenharmony_ci rcu_read_unlock(); 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_ci return head; 27062306a36Sopenharmony_ci} 27162306a36Sopenharmony_ci#endif 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_cistatic int packet_xmit(const struct packet_sock *po, struct sk_buff *skb) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci if (!packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS)) 27662306a36Sopenharmony_ci return dev_queue_xmit(skb); 27762306a36Sopenharmony_ci 27862306a36Sopenharmony_ci#ifdef CONFIG_NETFILTER_EGRESS 27962306a36Sopenharmony_ci if (nf_hook_egress_active()) { 28062306a36Sopenharmony_ci skb = nf_hook_direct_egress(skb); 28162306a36Sopenharmony_ci if (!skb) 28262306a36Sopenharmony_ci return NET_XMIT_DROP; 28362306a36Sopenharmony_ci } 28462306a36Sopenharmony_ci#endif 28562306a36Sopenharmony_ci return dev_direct_xmit(skb, packet_pick_tx_queue(skb)); 28662306a36Sopenharmony_ci} 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_cistatic struct net_device *packet_cached_dev_get(struct packet_sock *po) 28962306a36Sopenharmony_ci{ 29062306a36Sopenharmony_ci struct net_device *dev; 29162306a36Sopenharmony_ci 29262306a36Sopenharmony_ci rcu_read_lock(); 29362306a36Sopenharmony_ci dev = rcu_dereference(po->cached_dev); 29462306a36Sopenharmony_ci dev_hold(dev); 29562306a36Sopenharmony_ci rcu_read_unlock(); 29662306a36Sopenharmony_ci 29762306a36Sopenharmony_ci return dev; 29862306a36Sopenharmony_ci} 29962306a36Sopenharmony_ci 30062306a36Sopenharmony_cistatic void packet_cached_dev_assign(struct packet_sock *po, 30162306a36Sopenharmony_ci struct net_device *dev) 30262306a36Sopenharmony_ci{ 30362306a36Sopenharmony_ci rcu_assign_pointer(po->cached_dev, dev); 30462306a36Sopenharmony_ci} 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_cistatic void packet_cached_dev_reset(struct packet_sock *po) 30762306a36Sopenharmony_ci{ 30862306a36Sopenharmony_ci RCU_INIT_POINTER(po->cached_dev, NULL); 30962306a36Sopenharmony_ci} 31062306a36Sopenharmony_ci 31162306a36Sopenharmony_cistatic u16 packet_pick_tx_queue(struct sk_buff *skb) 31262306a36Sopenharmony_ci{ 31362306a36Sopenharmony_ci struct net_device *dev = skb->dev; 31462306a36Sopenharmony_ci const struct net_device_ops *ops = dev->netdev_ops; 31562306a36Sopenharmony_ci int cpu = raw_smp_processor_id(); 31662306a36Sopenharmony_ci u16 queue_index; 31762306a36Sopenharmony_ci 31862306a36Sopenharmony_ci#ifdef CONFIG_XPS 31962306a36Sopenharmony_ci skb->sender_cpu = cpu + 1; 32062306a36Sopenharmony_ci#endif 32162306a36Sopenharmony_ci skb_record_rx_queue(skb, cpu % dev->real_num_tx_queues); 32262306a36Sopenharmony_ci if (ops->ndo_select_queue) { 32362306a36Sopenharmony_ci queue_index = ops->ndo_select_queue(dev, skb, NULL); 32462306a36Sopenharmony_ci queue_index = netdev_cap_txqueue(dev, queue_index); 32562306a36Sopenharmony_ci } else { 32662306a36Sopenharmony_ci queue_index = netdev_pick_tx(dev, skb, NULL); 32762306a36Sopenharmony_ci } 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci return queue_index; 33062306a36Sopenharmony_ci} 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci/* __register_prot_hook must be invoked through register_prot_hook 33362306a36Sopenharmony_ci * or from a context in which asynchronous accesses to the packet 33462306a36Sopenharmony_ci * socket is not possible (packet_create()). 33562306a36Sopenharmony_ci */ 33662306a36Sopenharmony_cistatic void __register_prot_hook(struct sock *sk) 33762306a36Sopenharmony_ci{ 33862306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 33962306a36Sopenharmony_ci 34062306a36Sopenharmony_ci if (!packet_sock_flag(po, PACKET_SOCK_RUNNING)) { 34162306a36Sopenharmony_ci if (po->fanout) 34262306a36Sopenharmony_ci __fanout_link(sk, po); 34362306a36Sopenharmony_ci else 34462306a36Sopenharmony_ci dev_add_pack(&po->prot_hook); 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci sock_hold(sk); 34762306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 1); 34862306a36Sopenharmony_ci } 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_cistatic void register_prot_hook(struct sock *sk) 35262306a36Sopenharmony_ci{ 35362306a36Sopenharmony_ci lockdep_assert_held_once(&pkt_sk(sk)->bind_lock); 35462306a36Sopenharmony_ci __register_prot_hook(sk); 35562306a36Sopenharmony_ci} 35662306a36Sopenharmony_ci 35762306a36Sopenharmony_ci/* If the sync parameter is true, we will temporarily drop 35862306a36Sopenharmony_ci * the po->bind_lock and do a synchronize_net to make sure no 35962306a36Sopenharmony_ci * asynchronous packet processing paths still refer to the elements 36062306a36Sopenharmony_ci * of po->prot_hook. If the sync parameter is false, it is the 36162306a36Sopenharmony_ci * callers responsibility to take care of this. 36262306a36Sopenharmony_ci */ 36362306a36Sopenharmony_cistatic void __unregister_prot_hook(struct sock *sk, bool sync) 36462306a36Sopenharmony_ci{ 36562306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci lockdep_assert_held_once(&po->bind_lock); 36862306a36Sopenharmony_ci 36962306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_RUNNING, 0); 37062306a36Sopenharmony_ci 37162306a36Sopenharmony_ci if (po->fanout) 37262306a36Sopenharmony_ci __fanout_unlink(sk, po); 37362306a36Sopenharmony_ci else 37462306a36Sopenharmony_ci __dev_remove_pack(&po->prot_hook); 37562306a36Sopenharmony_ci 37662306a36Sopenharmony_ci __sock_put(sk); 37762306a36Sopenharmony_ci 37862306a36Sopenharmony_ci if (sync) { 37962306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 38062306a36Sopenharmony_ci synchronize_net(); 38162306a36Sopenharmony_ci spin_lock(&po->bind_lock); 38262306a36Sopenharmony_ci } 38362306a36Sopenharmony_ci} 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_cistatic void unregister_prot_hook(struct sock *sk, bool sync) 38662306a36Sopenharmony_ci{ 38762306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 38862306a36Sopenharmony_ci 38962306a36Sopenharmony_ci if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) 39062306a36Sopenharmony_ci __unregister_prot_hook(sk, sync); 39162306a36Sopenharmony_ci} 39262306a36Sopenharmony_ci 39362306a36Sopenharmony_cistatic inline struct page * __pure pgv_to_page(void *addr) 39462306a36Sopenharmony_ci{ 39562306a36Sopenharmony_ci if (is_vmalloc_addr(addr)) 39662306a36Sopenharmony_ci return vmalloc_to_page(addr); 39762306a36Sopenharmony_ci return virt_to_page(addr); 39862306a36Sopenharmony_ci} 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_cistatic void __packet_set_status(struct packet_sock *po, void *frame, int status) 40162306a36Sopenharmony_ci{ 40262306a36Sopenharmony_ci union tpacket_uhdr h; 40362306a36Sopenharmony_ci 40462306a36Sopenharmony_ci /* WRITE_ONCE() are paired with READ_ONCE() in __packet_get_status */ 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci h.raw = frame; 40762306a36Sopenharmony_ci switch (po->tp_version) { 40862306a36Sopenharmony_ci case TPACKET_V1: 40962306a36Sopenharmony_ci WRITE_ONCE(h.h1->tp_status, status); 41062306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(&h.h1->tp_status)); 41162306a36Sopenharmony_ci break; 41262306a36Sopenharmony_ci case TPACKET_V2: 41362306a36Sopenharmony_ci WRITE_ONCE(h.h2->tp_status, status); 41462306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(&h.h2->tp_status)); 41562306a36Sopenharmony_ci break; 41662306a36Sopenharmony_ci case TPACKET_V3: 41762306a36Sopenharmony_ci WRITE_ONCE(h.h3->tp_status, status); 41862306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(&h.h3->tp_status)); 41962306a36Sopenharmony_ci break; 42062306a36Sopenharmony_ci default: 42162306a36Sopenharmony_ci WARN(1, "TPACKET version not supported.\n"); 42262306a36Sopenharmony_ci BUG(); 42362306a36Sopenharmony_ci } 42462306a36Sopenharmony_ci 42562306a36Sopenharmony_ci smp_wmb(); 42662306a36Sopenharmony_ci} 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_cistatic int __packet_get_status(const struct packet_sock *po, void *frame) 42962306a36Sopenharmony_ci{ 43062306a36Sopenharmony_ci union tpacket_uhdr h; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci smp_rmb(); 43362306a36Sopenharmony_ci 43462306a36Sopenharmony_ci /* READ_ONCE() are paired with WRITE_ONCE() in __packet_set_status */ 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci h.raw = frame; 43762306a36Sopenharmony_ci switch (po->tp_version) { 43862306a36Sopenharmony_ci case TPACKET_V1: 43962306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(&h.h1->tp_status)); 44062306a36Sopenharmony_ci return READ_ONCE(h.h1->tp_status); 44162306a36Sopenharmony_ci case TPACKET_V2: 44262306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(&h.h2->tp_status)); 44362306a36Sopenharmony_ci return READ_ONCE(h.h2->tp_status); 44462306a36Sopenharmony_ci case TPACKET_V3: 44562306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(&h.h3->tp_status)); 44662306a36Sopenharmony_ci return READ_ONCE(h.h3->tp_status); 44762306a36Sopenharmony_ci default: 44862306a36Sopenharmony_ci WARN(1, "TPACKET version not supported.\n"); 44962306a36Sopenharmony_ci BUG(); 45062306a36Sopenharmony_ci return 0; 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci} 45362306a36Sopenharmony_ci 45462306a36Sopenharmony_cistatic __u32 tpacket_get_timestamp(struct sk_buff *skb, struct timespec64 *ts, 45562306a36Sopenharmony_ci unsigned int flags) 45662306a36Sopenharmony_ci{ 45762306a36Sopenharmony_ci struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); 45862306a36Sopenharmony_ci 45962306a36Sopenharmony_ci if (shhwtstamps && 46062306a36Sopenharmony_ci (flags & SOF_TIMESTAMPING_RAW_HARDWARE) && 46162306a36Sopenharmony_ci ktime_to_timespec64_cond(shhwtstamps->hwtstamp, ts)) 46262306a36Sopenharmony_ci return TP_STATUS_TS_RAW_HARDWARE; 46362306a36Sopenharmony_ci 46462306a36Sopenharmony_ci if ((flags & SOF_TIMESTAMPING_SOFTWARE) && 46562306a36Sopenharmony_ci ktime_to_timespec64_cond(skb_tstamp(skb), ts)) 46662306a36Sopenharmony_ci return TP_STATUS_TS_SOFTWARE; 46762306a36Sopenharmony_ci 46862306a36Sopenharmony_ci return 0; 46962306a36Sopenharmony_ci} 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_cistatic __u32 __packet_set_timestamp(struct packet_sock *po, void *frame, 47262306a36Sopenharmony_ci struct sk_buff *skb) 47362306a36Sopenharmony_ci{ 47462306a36Sopenharmony_ci union tpacket_uhdr h; 47562306a36Sopenharmony_ci struct timespec64 ts; 47662306a36Sopenharmony_ci __u32 ts_status; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci if (!(ts_status = tpacket_get_timestamp(skb, &ts, READ_ONCE(po->tp_tstamp)))) 47962306a36Sopenharmony_ci return 0; 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci h.raw = frame; 48262306a36Sopenharmony_ci /* 48362306a36Sopenharmony_ci * versions 1 through 3 overflow the timestamps in y2106, since they 48462306a36Sopenharmony_ci * all store the seconds in a 32-bit unsigned integer. 48562306a36Sopenharmony_ci * If we create a version 4, that should have a 64-bit timestamp, 48662306a36Sopenharmony_ci * either 64-bit seconds + 32-bit nanoseconds, or just 64-bit 48762306a36Sopenharmony_ci * nanoseconds. 48862306a36Sopenharmony_ci */ 48962306a36Sopenharmony_ci switch (po->tp_version) { 49062306a36Sopenharmony_ci case TPACKET_V1: 49162306a36Sopenharmony_ci h.h1->tp_sec = ts.tv_sec; 49262306a36Sopenharmony_ci h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; 49362306a36Sopenharmony_ci break; 49462306a36Sopenharmony_ci case TPACKET_V2: 49562306a36Sopenharmony_ci h.h2->tp_sec = ts.tv_sec; 49662306a36Sopenharmony_ci h.h2->tp_nsec = ts.tv_nsec; 49762306a36Sopenharmony_ci break; 49862306a36Sopenharmony_ci case TPACKET_V3: 49962306a36Sopenharmony_ci h.h3->tp_sec = ts.tv_sec; 50062306a36Sopenharmony_ci h.h3->tp_nsec = ts.tv_nsec; 50162306a36Sopenharmony_ci break; 50262306a36Sopenharmony_ci default: 50362306a36Sopenharmony_ci WARN(1, "TPACKET version not supported.\n"); 50462306a36Sopenharmony_ci BUG(); 50562306a36Sopenharmony_ci } 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci /* one flush is safe, as both fields always lie on the same cacheline */ 50862306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(&h.h1->tp_sec)); 50962306a36Sopenharmony_ci smp_wmb(); 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci return ts_status; 51262306a36Sopenharmony_ci} 51362306a36Sopenharmony_ci 51462306a36Sopenharmony_cistatic void *packet_lookup_frame(const struct packet_sock *po, 51562306a36Sopenharmony_ci const struct packet_ring_buffer *rb, 51662306a36Sopenharmony_ci unsigned int position, 51762306a36Sopenharmony_ci int status) 51862306a36Sopenharmony_ci{ 51962306a36Sopenharmony_ci unsigned int pg_vec_pos, frame_offset; 52062306a36Sopenharmony_ci union tpacket_uhdr h; 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci pg_vec_pos = position / rb->frames_per_block; 52362306a36Sopenharmony_ci frame_offset = position % rb->frames_per_block; 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci h.raw = rb->pg_vec[pg_vec_pos].buffer + 52662306a36Sopenharmony_ci (frame_offset * rb->frame_size); 52762306a36Sopenharmony_ci 52862306a36Sopenharmony_ci if (status != __packet_get_status(po, h.raw)) 52962306a36Sopenharmony_ci return NULL; 53062306a36Sopenharmony_ci 53162306a36Sopenharmony_ci return h.raw; 53262306a36Sopenharmony_ci} 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_cistatic void *packet_current_frame(struct packet_sock *po, 53562306a36Sopenharmony_ci struct packet_ring_buffer *rb, 53662306a36Sopenharmony_ci int status) 53762306a36Sopenharmony_ci{ 53862306a36Sopenharmony_ci return packet_lookup_frame(po, rb, rb->head, status); 53962306a36Sopenharmony_ci} 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_cistatic void prb_del_retire_blk_timer(struct tpacket_kbdq_core *pkc) 54262306a36Sopenharmony_ci{ 54362306a36Sopenharmony_ci del_timer_sync(&pkc->retire_blk_timer); 54462306a36Sopenharmony_ci} 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_cistatic void prb_shutdown_retire_blk_timer(struct packet_sock *po, 54762306a36Sopenharmony_ci struct sk_buff_head *rb_queue) 54862306a36Sopenharmony_ci{ 54962306a36Sopenharmony_ci struct tpacket_kbdq_core *pkc; 55062306a36Sopenharmony_ci 55162306a36Sopenharmony_ci pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci spin_lock_bh(&rb_queue->lock); 55462306a36Sopenharmony_ci pkc->delete_blk_timer = 1; 55562306a36Sopenharmony_ci spin_unlock_bh(&rb_queue->lock); 55662306a36Sopenharmony_ci 55762306a36Sopenharmony_ci prb_del_retire_blk_timer(pkc); 55862306a36Sopenharmony_ci} 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_cistatic void prb_setup_retire_blk_timer(struct packet_sock *po) 56162306a36Sopenharmony_ci{ 56262306a36Sopenharmony_ci struct tpacket_kbdq_core *pkc; 56362306a36Sopenharmony_ci 56462306a36Sopenharmony_ci pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 56562306a36Sopenharmony_ci timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired, 56662306a36Sopenharmony_ci 0); 56762306a36Sopenharmony_ci pkc->retire_blk_timer.expires = jiffies; 56862306a36Sopenharmony_ci} 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_cistatic int prb_calc_retire_blk_tmo(struct packet_sock *po, 57162306a36Sopenharmony_ci int blk_size_in_bytes) 57262306a36Sopenharmony_ci{ 57362306a36Sopenharmony_ci struct net_device *dev; 57462306a36Sopenharmony_ci unsigned int mbits, div; 57562306a36Sopenharmony_ci struct ethtool_link_ksettings ecmd; 57662306a36Sopenharmony_ci int err; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci rtnl_lock(); 57962306a36Sopenharmony_ci dev = __dev_get_by_index(sock_net(&po->sk), po->ifindex); 58062306a36Sopenharmony_ci if (unlikely(!dev)) { 58162306a36Sopenharmony_ci rtnl_unlock(); 58262306a36Sopenharmony_ci return DEFAULT_PRB_RETIRE_TOV; 58362306a36Sopenharmony_ci } 58462306a36Sopenharmony_ci err = __ethtool_get_link_ksettings(dev, &ecmd); 58562306a36Sopenharmony_ci rtnl_unlock(); 58662306a36Sopenharmony_ci if (err) 58762306a36Sopenharmony_ci return DEFAULT_PRB_RETIRE_TOV; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci /* If the link speed is so slow you don't really 59062306a36Sopenharmony_ci * need to worry about perf anyways 59162306a36Sopenharmony_ci */ 59262306a36Sopenharmony_ci if (ecmd.base.speed < SPEED_1000 || 59362306a36Sopenharmony_ci ecmd.base.speed == SPEED_UNKNOWN) 59462306a36Sopenharmony_ci return DEFAULT_PRB_RETIRE_TOV; 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ci div = ecmd.base.speed / 1000; 59762306a36Sopenharmony_ci mbits = (blk_size_in_bytes * 8) / (1024 * 1024); 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci if (div) 60062306a36Sopenharmony_ci mbits /= div; 60162306a36Sopenharmony_ci 60262306a36Sopenharmony_ci if (div) 60362306a36Sopenharmony_ci return mbits + 1; 60462306a36Sopenharmony_ci return mbits; 60562306a36Sopenharmony_ci} 60662306a36Sopenharmony_ci 60762306a36Sopenharmony_cistatic void prb_init_ft_ops(struct tpacket_kbdq_core *p1, 60862306a36Sopenharmony_ci union tpacket_req_u *req_u) 60962306a36Sopenharmony_ci{ 61062306a36Sopenharmony_ci p1->feature_req_word = req_u->req3.tp_feature_req_word; 61162306a36Sopenharmony_ci} 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_cistatic void init_prb_bdqc(struct packet_sock *po, 61462306a36Sopenharmony_ci struct packet_ring_buffer *rb, 61562306a36Sopenharmony_ci struct pgv *pg_vec, 61662306a36Sopenharmony_ci union tpacket_req_u *req_u) 61762306a36Sopenharmony_ci{ 61862306a36Sopenharmony_ci struct tpacket_kbdq_core *p1 = GET_PBDQC_FROM_RB(rb); 61962306a36Sopenharmony_ci struct tpacket_block_desc *pbd; 62062306a36Sopenharmony_ci 62162306a36Sopenharmony_ci memset(p1, 0x0, sizeof(*p1)); 62262306a36Sopenharmony_ci 62362306a36Sopenharmony_ci p1->knxt_seq_num = 1; 62462306a36Sopenharmony_ci p1->pkbdq = pg_vec; 62562306a36Sopenharmony_ci pbd = (struct tpacket_block_desc *)pg_vec[0].buffer; 62662306a36Sopenharmony_ci p1->pkblk_start = pg_vec[0].buffer; 62762306a36Sopenharmony_ci p1->kblk_size = req_u->req3.tp_block_size; 62862306a36Sopenharmony_ci p1->knum_blocks = req_u->req3.tp_block_nr; 62962306a36Sopenharmony_ci p1->hdrlen = po->tp_hdrlen; 63062306a36Sopenharmony_ci p1->version = po->tp_version; 63162306a36Sopenharmony_ci p1->last_kactive_blk_num = 0; 63262306a36Sopenharmony_ci po->stats.stats3.tp_freeze_q_cnt = 0; 63362306a36Sopenharmony_ci if (req_u->req3.tp_retire_blk_tov) 63462306a36Sopenharmony_ci p1->retire_blk_tov = req_u->req3.tp_retire_blk_tov; 63562306a36Sopenharmony_ci else 63662306a36Sopenharmony_ci p1->retire_blk_tov = prb_calc_retire_blk_tmo(po, 63762306a36Sopenharmony_ci req_u->req3.tp_block_size); 63862306a36Sopenharmony_ci p1->tov_in_jiffies = msecs_to_jiffies(p1->retire_blk_tov); 63962306a36Sopenharmony_ci p1->blk_sizeof_priv = req_u->req3.tp_sizeof_priv; 64062306a36Sopenharmony_ci rwlock_init(&p1->blk_fill_in_prog_lock); 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci p1->max_frame_len = p1->kblk_size - BLK_PLUS_PRIV(p1->blk_sizeof_priv); 64362306a36Sopenharmony_ci prb_init_ft_ops(p1, req_u); 64462306a36Sopenharmony_ci prb_setup_retire_blk_timer(po); 64562306a36Sopenharmony_ci prb_open_block(p1, pbd); 64662306a36Sopenharmony_ci} 64762306a36Sopenharmony_ci 64862306a36Sopenharmony_ci/* Do NOT update the last_blk_num first. 64962306a36Sopenharmony_ci * Assumes sk_buff_head lock is held. 65062306a36Sopenharmony_ci */ 65162306a36Sopenharmony_cistatic void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) 65262306a36Sopenharmony_ci{ 65362306a36Sopenharmony_ci mod_timer(&pkc->retire_blk_timer, 65462306a36Sopenharmony_ci jiffies + pkc->tov_in_jiffies); 65562306a36Sopenharmony_ci pkc->last_kactive_blk_num = pkc->kactive_blk_num; 65662306a36Sopenharmony_ci} 65762306a36Sopenharmony_ci 65862306a36Sopenharmony_ci/* 65962306a36Sopenharmony_ci * Timer logic: 66062306a36Sopenharmony_ci * 1) We refresh the timer only when we open a block. 66162306a36Sopenharmony_ci * By doing this we don't waste cycles refreshing the timer 66262306a36Sopenharmony_ci * on packet-by-packet basis. 66362306a36Sopenharmony_ci * 66462306a36Sopenharmony_ci * With a 1MB block-size, on a 1Gbps line, it will take 66562306a36Sopenharmony_ci * i) ~8 ms to fill a block + ii) memcpy etc. 66662306a36Sopenharmony_ci * In this cut we are not accounting for the memcpy time. 66762306a36Sopenharmony_ci * 66862306a36Sopenharmony_ci * So, if the user sets the 'tmo' to 10ms then the timer 66962306a36Sopenharmony_ci * will never fire while the block is still getting filled 67062306a36Sopenharmony_ci * (which is what we want). However, the user could choose 67162306a36Sopenharmony_ci * to close a block early and that's fine. 67262306a36Sopenharmony_ci * 67362306a36Sopenharmony_ci * But when the timer does fire, we check whether or not to refresh it. 67462306a36Sopenharmony_ci * Since the tmo granularity is in msecs, it is not too expensive 67562306a36Sopenharmony_ci * to refresh the timer, lets say every '8' msecs. 67662306a36Sopenharmony_ci * Either the user can set the 'tmo' or we can derive it based on 67762306a36Sopenharmony_ci * a) line-speed and b) block-size. 67862306a36Sopenharmony_ci * prb_calc_retire_blk_tmo() calculates the tmo. 67962306a36Sopenharmony_ci * 68062306a36Sopenharmony_ci */ 68162306a36Sopenharmony_cistatic void prb_retire_rx_blk_timer_expired(struct timer_list *t) 68262306a36Sopenharmony_ci{ 68362306a36Sopenharmony_ci struct packet_sock *po = 68462306a36Sopenharmony_ci from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer); 68562306a36Sopenharmony_ci struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 68662306a36Sopenharmony_ci unsigned int frozen; 68762306a36Sopenharmony_ci struct tpacket_block_desc *pbd; 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_ci spin_lock(&po->sk.sk_receive_queue.lock); 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci frozen = prb_queue_frozen(pkc); 69262306a36Sopenharmony_ci pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 69362306a36Sopenharmony_ci 69462306a36Sopenharmony_ci if (unlikely(pkc->delete_blk_timer)) 69562306a36Sopenharmony_ci goto out; 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci /* We only need to plug the race when the block is partially filled. 69862306a36Sopenharmony_ci * tpacket_rcv: 69962306a36Sopenharmony_ci * lock(); increment BLOCK_NUM_PKTS; unlock() 70062306a36Sopenharmony_ci * copy_bits() is in progress ... 70162306a36Sopenharmony_ci * timer fires on other cpu: 70262306a36Sopenharmony_ci * we can't retire the current block because copy_bits 70362306a36Sopenharmony_ci * is in progress. 70462306a36Sopenharmony_ci * 70562306a36Sopenharmony_ci */ 70662306a36Sopenharmony_ci if (BLOCK_NUM_PKTS(pbd)) { 70762306a36Sopenharmony_ci /* Waiting for skb_copy_bits to finish... */ 70862306a36Sopenharmony_ci write_lock(&pkc->blk_fill_in_prog_lock); 70962306a36Sopenharmony_ci write_unlock(&pkc->blk_fill_in_prog_lock); 71062306a36Sopenharmony_ci } 71162306a36Sopenharmony_ci 71262306a36Sopenharmony_ci if (pkc->last_kactive_blk_num == pkc->kactive_blk_num) { 71362306a36Sopenharmony_ci if (!frozen) { 71462306a36Sopenharmony_ci if (!BLOCK_NUM_PKTS(pbd)) { 71562306a36Sopenharmony_ci /* An empty block. Just refresh the timer. */ 71662306a36Sopenharmony_ci goto refresh_timer; 71762306a36Sopenharmony_ci } 71862306a36Sopenharmony_ci prb_retire_current_block(pkc, po, TP_STATUS_BLK_TMO); 71962306a36Sopenharmony_ci if (!prb_dispatch_next_block(pkc, po)) 72062306a36Sopenharmony_ci goto refresh_timer; 72162306a36Sopenharmony_ci else 72262306a36Sopenharmony_ci goto out; 72362306a36Sopenharmony_ci } else { 72462306a36Sopenharmony_ci /* Case 1. Queue was frozen because user-space was 72562306a36Sopenharmony_ci * lagging behind. 72662306a36Sopenharmony_ci */ 72762306a36Sopenharmony_ci if (prb_curr_blk_in_use(pbd)) { 72862306a36Sopenharmony_ci /* 72962306a36Sopenharmony_ci * Ok, user-space is still behind. 73062306a36Sopenharmony_ci * So just refresh the timer. 73162306a36Sopenharmony_ci */ 73262306a36Sopenharmony_ci goto refresh_timer; 73362306a36Sopenharmony_ci } else { 73462306a36Sopenharmony_ci /* Case 2. queue was frozen,user-space caught up, 73562306a36Sopenharmony_ci * now the link went idle && the timer fired. 73662306a36Sopenharmony_ci * We don't have a block to close.So we open this 73762306a36Sopenharmony_ci * block and restart the timer. 73862306a36Sopenharmony_ci * opening a block thaws the queue,restarts timer 73962306a36Sopenharmony_ci * Thawing/timer-refresh is a side effect. 74062306a36Sopenharmony_ci */ 74162306a36Sopenharmony_ci prb_open_block(pkc, pbd); 74262306a36Sopenharmony_ci goto out; 74362306a36Sopenharmony_ci } 74462306a36Sopenharmony_ci } 74562306a36Sopenharmony_ci } 74662306a36Sopenharmony_ci 74762306a36Sopenharmony_cirefresh_timer: 74862306a36Sopenharmony_ci _prb_refresh_rx_retire_blk_timer(pkc); 74962306a36Sopenharmony_ci 75062306a36Sopenharmony_ciout: 75162306a36Sopenharmony_ci spin_unlock(&po->sk.sk_receive_queue.lock); 75262306a36Sopenharmony_ci} 75362306a36Sopenharmony_ci 75462306a36Sopenharmony_cistatic void prb_flush_block(struct tpacket_kbdq_core *pkc1, 75562306a36Sopenharmony_ci struct tpacket_block_desc *pbd1, __u32 status) 75662306a36Sopenharmony_ci{ 75762306a36Sopenharmony_ci /* Flush everything minus the block header */ 75862306a36Sopenharmony_ci 75962306a36Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 76062306a36Sopenharmony_ci u8 *start, *end; 76162306a36Sopenharmony_ci 76262306a36Sopenharmony_ci start = (u8 *)pbd1; 76362306a36Sopenharmony_ci 76462306a36Sopenharmony_ci /* Skip the block header(we know header WILL fit in 4K) */ 76562306a36Sopenharmony_ci start += PAGE_SIZE; 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci end = (u8 *)PAGE_ALIGN((unsigned long)pkc1->pkblk_end); 76862306a36Sopenharmony_ci for (; start < end; start += PAGE_SIZE) 76962306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(start)); 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ci smp_wmb(); 77262306a36Sopenharmony_ci#endif 77362306a36Sopenharmony_ci 77462306a36Sopenharmony_ci /* Now update the block status. */ 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci BLOCK_STATUS(pbd1) = status; 77762306a36Sopenharmony_ci 77862306a36Sopenharmony_ci /* Flush the block header */ 77962306a36Sopenharmony_ci 78062306a36Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 78162306a36Sopenharmony_ci start = (u8 *)pbd1; 78262306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(start)); 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci smp_wmb(); 78562306a36Sopenharmony_ci#endif 78662306a36Sopenharmony_ci} 78762306a36Sopenharmony_ci 78862306a36Sopenharmony_ci/* 78962306a36Sopenharmony_ci * Side effect: 79062306a36Sopenharmony_ci * 79162306a36Sopenharmony_ci * 1) flush the block 79262306a36Sopenharmony_ci * 2) Increment active_blk_num 79362306a36Sopenharmony_ci * 79462306a36Sopenharmony_ci * Note:We DONT refresh the timer on purpose. 79562306a36Sopenharmony_ci * Because almost always the next block will be opened. 79662306a36Sopenharmony_ci */ 79762306a36Sopenharmony_cistatic void prb_close_block(struct tpacket_kbdq_core *pkc1, 79862306a36Sopenharmony_ci struct tpacket_block_desc *pbd1, 79962306a36Sopenharmony_ci struct packet_sock *po, unsigned int stat) 80062306a36Sopenharmony_ci{ 80162306a36Sopenharmony_ci __u32 status = TP_STATUS_USER | stat; 80262306a36Sopenharmony_ci 80362306a36Sopenharmony_ci struct tpacket3_hdr *last_pkt; 80462306a36Sopenharmony_ci struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; 80562306a36Sopenharmony_ci struct sock *sk = &po->sk; 80662306a36Sopenharmony_ci 80762306a36Sopenharmony_ci if (atomic_read(&po->tp_drops)) 80862306a36Sopenharmony_ci status |= TP_STATUS_LOSING; 80962306a36Sopenharmony_ci 81062306a36Sopenharmony_ci last_pkt = (struct tpacket3_hdr *)pkc1->prev; 81162306a36Sopenharmony_ci last_pkt->tp_next_offset = 0; 81262306a36Sopenharmony_ci 81362306a36Sopenharmony_ci /* Get the ts of the last pkt */ 81462306a36Sopenharmony_ci if (BLOCK_NUM_PKTS(pbd1)) { 81562306a36Sopenharmony_ci h1->ts_last_pkt.ts_sec = last_pkt->tp_sec; 81662306a36Sopenharmony_ci h1->ts_last_pkt.ts_nsec = last_pkt->tp_nsec; 81762306a36Sopenharmony_ci } else { 81862306a36Sopenharmony_ci /* Ok, we tmo'd - so get the current time. 81962306a36Sopenharmony_ci * 82062306a36Sopenharmony_ci * It shouldn't really happen as we don't close empty 82162306a36Sopenharmony_ci * blocks. See prb_retire_rx_blk_timer_expired(). 82262306a36Sopenharmony_ci */ 82362306a36Sopenharmony_ci struct timespec64 ts; 82462306a36Sopenharmony_ci ktime_get_real_ts64(&ts); 82562306a36Sopenharmony_ci h1->ts_last_pkt.ts_sec = ts.tv_sec; 82662306a36Sopenharmony_ci h1->ts_last_pkt.ts_nsec = ts.tv_nsec; 82762306a36Sopenharmony_ci } 82862306a36Sopenharmony_ci 82962306a36Sopenharmony_ci smp_wmb(); 83062306a36Sopenharmony_ci 83162306a36Sopenharmony_ci /* Flush the block */ 83262306a36Sopenharmony_ci prb_flush_block(pkc1, pbd1, status); 83362306a36Sopenharmony_ci 83462306a36Sopenharmony_ci sk->sk_data_ready(sk); 83562306a36Sopenharmony_ci 83662306a36Sopenharmony_ci pkc1->kactive_blk_num = GET_NEXT_PRB_BLK_NUM(pkc1); 83762306a36Sopenharmony_ci} 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_cistatic void prb_thaw_queue(struct tpacket_kbdq_core *pkc) 84062306a36Sopenharmony_ci{ 84162306a36Sopenharmony_ci pkc->reset_pending_on_curr_blk = 0; 84262306a36Sopenharmony_ci} 84362306a36Sopenharmony_ci 84462306a36Sopenharmony_ci/* 84562306a36Sopenharmony_ci * Side effect of opening a block: 84662306a36Sopenharmony_ci * 84762306a36Sopenharmony_ci * 1) prb_queue is thawed. 84862306a36Sopenharmony_ci * 2) retire_blk_timer is refreshed. 84962306a36Sopenharmony_ci * 85062306a36Sopenharmony_ci */ 85162306a36Sopenharmony_cistatic void prb_open_block(struct tpacket_kbdq_core *pkc1, 85262306a36Sopenharmony_ci struct tpacket_block_desc *pbd1) 85362306a36Sopenharmony_ci{ 85462306a36Sopenharmony_ci struct timespec64 ts; 85562306a36Sopenharmony_ci struct tpacket_hdr_v1 *h1 = &pbd1->hdr.bh1; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci smp_rmb(); 85862306a36Sopenharmony_ci 85962306a36Sopenharmony_ci /* We could have just memset this but we will lose the 86062306a36Sopenharmony_ci * flexibility of making the priv area sticky 86162306a36Sopenharmony_ci */ 86262306a36Sopenharmony_ci 86362306a36Sopenharmony_ci BLOCK_SNUM(pbd1) = pkc1->knxt_seq_num++; 86462306a36Sopenharmony_ci BLOCK_NUM_PKTS(pbd1) = 0; 86562306a36Sopenharmony_ci BLOCK_LEN(pbd1) = BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci ktime_get_real_ts64(&ts); 86862306a36Sopenharmony_ci 86962306a36Sopenharmony_ci h1->ts_first_pkt.ts_sec = ts.tv_sec; 87062306a36Sopenharmony_ci h1->ts_first_pkt.ts_nsec = ts.tv_nsec; 87162306a36Sopenharmony_ci 87262306a36Sopenharmony_ci pkc1->pkblk_start = (char *)pbd1; 87362306a36Sopenharmony_ci pkc1->nxt_offset = pkc1->pkblk_start + BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); 87462306a36Sopenharmony_ci 87562306a36Sopenharmony_ci BLOCK_O2FP(pbd1) = (__u32)BLK_PLUS_PRIV(pkc1->blk_sizeof_priv); 87662306a36Sopenharmony_ci BLOCK_O2PRIV(pbd1) = BLK_HDR_LEN; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci pbd1->version = pkc1->version; 87962306a36Sopenharmony_ci pkc1->prev = pkc1->nxt_offset; 88062306a36Sopenharmony_ci pkc1->pkblk_end = pkc1->pkblk_start + pkc1->kblk_size; 88162306a36Sopenharmony_ci 88262306a36Sopenharmony_ci prb_thaw_queue(pkc1); 88362306a36Sopenharmony_ci _prb_refresh_rx_retire_blk_timer(pkc1); 88462306a36Sopenharmony_ci 88562306a36Sopenharmony_ci smp_wmb(); 88662306a36Sopenharmony_ci} 88762306a36Sopenharmony_ci 88862306a36Sopenharmony_ci/* 88962306a36Sopenharmony_ci * Queue freeze logic: 89062306a36Sopenharmony_ci * 1) Assume tp_block_nr = 8 blocks. 89162306a36Sopenharmony_ci * 2) At time 't0', user opens Rx ring. 89262306a36Sopenharmony_ci * 3) Some time past 't0', kernel starts filling blocks starting from 0 .. 7 89362306a36Sopenharmony_ci * 4) user-space is either sleeping or processing block '0'. 89462306a36Sopenharmony_ci * 5) tpacket_rcv is currently filling block '7', since there is no space left, 89562306a36Sopenharmony_ci * it will close block-7,loop around and try to fill block '0'. 89662306a36Sopenharmony_ci * call-flow: 89762306a36Sopenharmony_ci * __packet_lookup_frame_in_block 89862306a36Sopenharmony_ci * prb_retire_current_block() 89962306a36Sopenharmony_ci * prb_dispatch_next_block() 90062306a36Sopenharmony_ci * |->(BLOCK_STATUS == USER) evaluates to true 90162306a36Sopenharmony_ci * 5.1) Since block-0 is currently in-use, we just freeze the queue. 90262306a36Sopenharmony_ci * 6) Now there are two cases: 90362306a36Sopenharmony_ci * 6.1) Link goes idle right after the queue is frozen. 90462306a36Sopenharmony_ci * But remember, the last open_block() refreshed the timer. 90562306a36Sopenharmony_ci * When this timer expires,it will refresh itself so that we can 90662306a36Sopenharmony_ci * re-open block-0 in near future. 90762306a36Sopenharmony_ci * 6.2) Link is busy and keeps on receiving packets. This is a simple 90862306a36Sopenharmony_ci * case and __packet_lookup_frame_in_block will check if block-0 90962306a36Sopenharmony_ci * is free and can now be re-used. 91062306a36Sopenharmony_ci */ 91162306a36Sopenharmony_cistatic void prb_freeze_queue(struct tpacket_kbdq_core *pkc, 91262306a36Sopenharmony_ci struct packet_sock *po) 91362306a36Sopenharmony_ci{ 91462306a36Sopenharmony_ci pkc->reset_pending_on_curr_blk = 1; 91562306a36Sopenharmony_ci po->stats.stats3.tp_freeze_q_cnt++; 91662306a36Sopenharmony_ci} 91762306a36Sopenharmony_ci 91862306a36Sopenharmony_ci#define TOTAL_PKT_LEN_INCL_ALIGN(length) (ALIGN((length), V3_ALIGNMENT)) 91962306a36Sopenharmony_ci 92062306a36Sopenharmony_ci/* 92162306a36Sopenharmony_ci * If the next block is free then we will dispatch it 92262306a36Sopenharmony_ci * and return a good offset. 92362306a36Sopenharmony_ci * Else, we will freeze the queue. 92462306a36Sopenharmony_ci * So, caller must check the return value. 92562306a36Sopenharmony_ci */ 92662306a36Sopenharmony_cistatic void *prb_dispatch_next_block(struct tpacket_kbdq_core *pkc, 92762306a36Sopenharmony_ci struct packet_sock *po) 92862306a36Sopenharmony_ci{ 92962306a36Sopenharmony_ci struct tpacket_block_desc *pbd; 93062306a36Sopenharmony_ci 93162306a36Sopenharmony_ci smp_rmb(); 93262306a36Sopenharmony_ci 93362306a36Sopenharmony_ci /* 1. Get current block num */ 93462306a36Sopenharmony_ci pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 93562306a36Sopenharmony_ci 93662306a36Sopenharmony_ci /* 2. If this block is currently in_use then freeze the queue */ 93762306a36Sopenharmony_ci if (TP_STATUS_USER & BLOCK_STATUS(pbd)) { 93862306a36Sopenharmony_ci prb_freeze_queue(pkc, po); 93962306a36Sopenharmony_ci return NULL; 94062306a36Sopenharmony_ci } 94162306a36Sopenharmony_ci 94262306a36Sopenharmony_ci /* 94362306a36Sopenharmony_ci * 3. 94462306a36Sopenharmony_ci * open this block and return the offset where the first packet 94562306a36Sopenharmony_ci * needs to get stored. 94662306a36Sopenharmony_ci */ 94762306a36Sopenharmony_ci prb_open_block(pkc, pbd); 94862306a36Sopenharmony_ci return (void *)pkc->nxt_offset; 94962306a36Sopenharmony_ci} 95062306a36Sopenharmony_ci 95162306a36Sopenharmony_cistatic void prb_retire_current_block(struct tpacket_kbdq_core *pkc, 95262306a36Sopenharmony_ci struct packet_sock *po, unsigned int status) 95362306a36Sopenharmony_ci{ 95462306a36Sopenharmony_ci struct tpacket_block_desc *pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 95562306a36Sopenharmony_ci 95662306a36Sopenharmony_ci /* retire/close the current block */ 95762306a36Sopenharmony_ci if (likely(TP_STATUS_KERNEL == BLOCK_STATUS(pbd))) { 95862306a36Sopenharmony_ci /* 95962306a36Sopenharmony_ci * Plug the case where copy_bits() is in progress on 96062306a36Sopenharmony_ci * cpu-0 and tpacket_rcv() got invoked on cpu-1, didn't 96162306a36Sopenharmony_ci * have space to copy the pkt in the current block and 96262306a36Sopenharmony_ci * called prb_retire_current_block() 96362306a36Sopenharmony_ci * 96462306a36Sopenharmony_ci * We don't need to worry about the TMO case because 96562306a36Sopenharmony_ci * the timer-handler already handled this case. 96662306a36Sopenharmony_ci */ 96762306a36Sopenharmony_ci if (!(status & TP_STATUS_BLK_TMO)) { 96862306a36Sopenharmony_ci /* Waiting for skb_copy_bits to finish... */ 96962306a36Sopenharmony_ci write_lock(&pkc->blk_fill_in_prog_lock); 97062306a36Sopenharmony_ci write_unlock(&pkc->blk_fill_in_prog_lock); 97162306a36Sopenharmony_ci } 97262306a36Sopenharmony_ci prb_close_block(pkc, pbd, po, status); 97362306a36Sopenharmony_ci return; 97462306a36Sopenharmony_ci } 97562306a36Sopenharmony_ci} 97662306a36Sopenharmony_ci 97762306a36Sopenharmony_cistatic int prb_curr_blk_in_use(struct tpacket_block_desc *pbd) 97862306a36Sopenharmony_ci{ 97962306a36Sopenharmony_ci return TP_STATUS_USER & BLOCK_STATUS(pbd); 98062306a36Sopenharmony_ci} 98162306a36Sopenharmony_ci 98262306a36Sopenharmony_cistatic int prb_queue_frozen(struct tpacket_kbdq_core *pkc) 98362306a36Sopenharmony_ci{ 98462306a36Sopenharmony_ci return pkc->reset_pending_on_curr_blk; 98562306a36Sopenharmony_ci} 98662306a36Sopenharmony_ci 98762306a36Sopenharmony_cistatic void prb_clear_blk_fill_status(struct packet_ring_buffer *rb) 98862306a36Sopenharmony_ci __releases(&pkc->blk_fill_in_prog_lock) 98962306a36Sopenharmony_ci{ 99062306a36Sopenharmony_ci struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); 99162306a36Sopenharmony_ci 99262306a36Sopenharmony_ci read_unlock(&pkc->blk_fill_in_prog_lock); 99362306a36Sopenharmony_ci} 99462306a36Sopenharmony_ci 99562306a36Sopenharmony_cistatic void prb_fill_rxhash(struct tpacket_kbdq_core *pkc, 99662306a36Sopenharmony_ci struct tpacket3_hdr *ppd) 99762306a36Sopenharmony_ci{ 99862306a36Sopenharmony_ci ppd->hv1.tp_rxhash = skb_get_hash(pkc->skb); 99962306a36Sopenharmony_ci} 100062306a36Sopenharmony_ci 100162306a36Sopenharmony_cistatic void prb_clear_rxhash(struct tpacket_kbdq_core *pkc, 100262306a36Sopenharmony_ci struct tpacket3_hdr *ppd) 100362306a36Sopenharmony_ci{ 100462306a36Sopenharmony_ci ppd->hv1.tp_rxhash = 0; 100562306a36Sopenharmony_ci} 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_cistatic void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc, 100862306a36Sopenharmony_ci struct tpacket3_hdr *ppd) 100962306a36Sopenharmony_ci{ 101062306a36Sopenharmony_ci if (skb_vlan_tag_present(pkc->skb)) { 101162306a36Sopenharmony_ci ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb); 101262306a36Sopenharmony_ci ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto); 101362306a36Sopenharmony_ci ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; 101462306a36Sopenharmony_ci } else { 101562306a36Sopenharmony_ci ppd->hv1.tp_vlan_tci = 0; 101662306a36Sopenharmony_ci ppd->hv1.tp_vlan_tpid = 0; 101762306a36Sopenharmony_ci ppd->tp_status = TP_STATUS_AVAILABLE; 101862306a36Sopenharmony_ci } 101962306a36Sopenharmony_ci} 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_cistatic void prb_run_all_ft_ops(struct tpacket_kbdq_core *pkc, 102262306a36Sopenharmony_ci struct tpacket3_hdr *ppd) 102362306a36Sopenharmony_ci{ 102462306a36Sopenharmony_ci ppd->hv1.tp_padding = 0; 102562306a36Sopenharmony_ci prb_fill_vlan_info(pkc, ppd); 102662306a36Sopenharmony_ci 102762306a36Sopenharmony_ci if (pkc->feature_req_word & TP_FT_REQ_FILL_RXHASH) 102862306a36Sopenharmony_ci prb_fill_rxhash(pkc, ppd); 102962306a36Sopenharmony_ci else 103062306a36Sopenharmony_ci prb_clear_rxhash(pkc, ppd); 103162306a36Sopenharmony_ci} 103262306a36Sopenharmony_ci 103362306a36Sopenharmony_cistatic void prb_fill_curr_block(char *curr, 103462306a36Sopenharmony_ci struct tpacket_kbdq_core *pkc, 103562306a36Sopenharmony_ci struct tpacket_block_desc *pbd, 103662306a36Sopenharmony_ci unsigned int len) 103762306a36Sopenharmony_ci __acquires(&pkc->blk_fill_in_prog_lock) 103862306a36Sopenharmony_ci{ 103962306a36Sopenharmony_ci struct tpacket3_hdr *ppd; 104062306a36Sopenharmony_ci 104162306a36Sopenharmony_ci ppd = (struct tpacket3_hdr *)curr; 104262306a36Sopenharmony_ci ppd->tp_next_offset = TOTAL_PKT_LEN_INCL_ALIGN(len); 104362306a36Sopenharmony_ci pkc->prev = curr; 104462306a36Sopenharmony_ci pkc->nxt_offset += TOTAL_PKT_LEN_INCL_ALIGN(len); 104562306a36Sopenharmony_ci BLOCK_LEN(pbd) += TOTAL_PKT_LEN_INCL_ALIGN(len); 104662306a36Sopenharmony_ci BLOCK_NUM_PKTS(pbd) += 1; 104762306a36Sopenharmony_ci read_lock(&pkc->blk_fill_in_prog_lock); 104862306a36Sopenharmony_ci prb_run_all_ft_ops(pkc, ppd); 104962306a36Sopenharmony_ci} 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci/* Assumes caller has the sk->rx_queue.lock */ 105262306a36Sopenharmony_cistatic void *__packet_lookup_frame_in_block(struct packet_sock *po, 105362306a36Sopenharmony_ci struct sk_buff *skb, 105462306a36Sopenharmony_ci unsigned int len 105562306a36Sopenharmony_ci ) 105662306a36Sopenharmony_ci{ 105762306a36Sopenharmony_ci struct tpacket_kbdq_core *pkc; 105862306a36Sopenharmony_ci struct tpacket_block_desc *pbd; 105962306a36Sopenharmony_ci char *curr, *end; 106062306a36Sopenharmony_ci 106162306a36Sopenharmony_ci pkc = GET_PBDQC_FROM_RB(&po->rx_ring); 106262306a36Sopenharmony_ci pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 106362306a36Sopenharmony_ci 106462306a36Sopenharmony_ci /* Queue is frozen when user space is lagging behind */ 106562306a36Sopenharmony_ci if (prb_queue_frozen(pkc)) { 106662306a36Sopenharmony_ci /* 106762306a36Sopenharmony_ci * Check if that last block which caused the queue to freeze, 106862306a36Sopenharmony_ci * is still in_use by user-space. 106962306a36Sopenharmony_ci */ 107062306a36Sopenharmony_ci if (prb_curr_blk_in_use(pbd)) { 107162306a36Sopenharmony_ci /* Can't record this packet */ 107262306a36Sopenharmony_ci return NULL; 107362306a36Sopenharmony_ci } else { 107462306a36Sopenharmony_ci /* 107562306a36Sopenharmony_ci * Ok, the block was released by user-space. 107662306a36Sopenharmony_ci * Now let's open that block. 107762306a36Sopenharmony_ci * opening a block also thaws the queue. 107862306a36Sopenharmony_ci * Thawing is a side effect. 107962306a36Sopenharmony_ci */ 108062306a36Sopenharmony_ci prb_open_block(pkc, pbd); 108162306a36Sopenharmony_ci } 108262306a36Sopenharmony_ci } 108362306a36Sopenharmony_ci 108462306a36Sopenharmony_ci smp_mb(); 108562306a36Sopenharmony_ci curr = pkc->nxt_offset; 108662306a36Sopenharmony_ci pkc->skb = skb; 108762306a36Sopenharmony_ci end = (char *)pbd + pkc->kblk_size; 108862306a36Sopenharmony_ci 108962306a36Sopenharmony_ci /* first try the current block */ 109062306a36Sopenharmony_ci if (curr+TOTAL_PKT_LEN_INCL_ALIGN(len) < end) { 109162306a36Sopenharmony_ci prb_fill_curr_block(curr, pkc, pbd, len); 109262306a36Sopenharmony_ci return (void *)curr; 109362306a36Sopenharmony_ci } 109462306a36Sopenharmony_ci 109562306a36Sopenharmony_ci /* Ok, close the current block */ 109662306a36Sopenharmony_ci prb_retire_current_block(pkc, po, 0); 109762306a36Sopenharmony_ci 109862306a36Sopenharmony_ci /* Now, try to dispatch the next block */ 109962306a36Sopenharmony_ci curr = (char *)prb_dispatch_next_block(pkc, po); 110062306a36Sopenharmony_ci if (curr) { 110162306a36Sopenharmony_ci pbd = GET_CURR_PBLOCK_DESC_FROM_CORE(pkc); 110262306a36Sopenharmony_ci prb_fill_curr_block(curr, pkc, pbd, len); 110362306a36Sopenharmony_ci return (void *)curr; 110462306a36Sopenharmony_ci } 110562306a36Sopenharmony_ci 110662306a36Sopenharmony_ci /* 110762306a36Sopenharmony_ci * No free blocks are available.user_space hasn't caught up yet. 110862306a36Sopenharmony_ci * Queue was just frozen and now this packet will get dropped. 110962306a36Sopenharmony_ci */ 111062306a36Sopenharmony_ci return NULL; 111162306a36Sopenharmony_ci} 111262306a36Sopenharmony_ci 111362306a36Sopenharmony_cistatic void *packet_current_rx_frame(struct packet_sock *po, 111462306a36Sopenharmony_ci struct sk_buff *skb, 111562306a36Sopenharmony_ci int status, unsigned int len) 111662306a36Sopenharmony_ci{ 111762306a36Sopenharmony_ci char *curr = NULL; 111862306a36Sopenharmony_ci switch (po->tp_version) { 111962306a36Sopenharmony_ci case TPACKET_V1: 112062306a36Sopenharmony_ci case TPACKET_V2: 112162306a36Sopenharmony_ci curr = packet_lookup_frame(po, &po->rx_ring, 112262306a36Sopenharmony_ci po->rx_ring.head, status); 112362306a36Sopenharmony_ci return curr; 112462306a36Sopenharmony_ci case TPACKET_V3: 112562306a36Sopenharmony_ci return __packet_lookup_frame_in_block(po, skb, len); 112662306a36Sopenharmony_ci default: 112762306a36Sopenharmony_ci WARN(1, "TPACKET version not supported\n"); 112862306a36Sopenharmony_ci BUG(); 112962306a36Sopenharmony_ci return NULL; 113062306a36Sopenharmony_ci } 113162306a36Sopenharmony_ci} 113262306a36Sopenharmony_ci 113362306a36Sopenharmony_cistatic void *prb_lookup_block(const struct packet_sock *po, 113462306a36Sopenharmony_ci const struct packet_ring_buffer *rb, 113562306a36Sopenharmony_ci unsigned int idx, 113662306a36Sopenharmony_ci int status) 113762306a36Sopenharmony_ci{ 113862306a36Sopenharmony_ci struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(rb); 113962306a36Sopenharmony_ci struct tpacket_block_desc *pbd = GET_PBLOCK_DESC(pkc, idx); 114062306a36Sopenharmony_ci 114162306a36Sopenharmony_ci if (status != BLOCK_STATUS(pbd)) 114262306a36Sopenharmony_ci return NULL; 114362306a36Sopenharmony_ci return pbd; 114462306a36Sopenharmony_ci} 114562306a36Sopenharmony_ci 114662306a36Sopenharmony_cistatic int prb_previous_blk_num(struct packet_ring_buffer *rb) 114762306a36Sopenharmony_ci{ 114862306a36Sopenharmony_ci unsigned int prev; 114962306a36Sopenharmony_ci if (rb->prb_bdqc.kactive_blk_num) 115062306a36Sopenharmony_ci prev = rb->prb_bdqc.kactive_blk_num-1; 115162306a36Sopenharmony_ci else 115262306a36Sopenharmony_ci prev = rb->prb_bdqc.knum_blocks-1; 115362306a36Sopenharmony_ci return prev; 115462306a36Sopenharmony_ci} 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci/* Assumes caller has held the rx_queue.lock */ 115762306a36Sopenharmony_cistatic void *__prb_previous_block(struct packet_sock *po, 115862306a36Sopenharmony_ci struct packet_ring_buffer *rb, 115962306a36Sopenharmony_ci int status) 116062306a36Sopenharmony_ci{ 116162306a36Sopenharmony_ci unsigned int previous = prb_previous_blk_num(rb); 116262306a36Sopenharmony_ci return prb_lookup_block(po, rb, previous, status); 116362306a36Sopenharmony_ci} 116462306a36Sopenharmony_ci 116562306a36Sopenharmony_cistatic void *packet_previous_rx_frame(struct packet_sock *po, 116662306a36Sopenharmony_ci struct packet_ring_buffer *rb, 116762306a36Sopenharmony_ci int status) 116862306a36Sopenharmony_ci{ 116962306a36Sopenharmony_ci if (po->tp_version <= TPACKET_V2) 117062306a36Sopenharmony_ci return packet_previous_frame(po, rb, status); 117162306a36Sopenharmony_ci 117262306a36Sopenharmony_ci return __prb_previous_block(po, rb, status); 117362306a36Sopenharmony_ci} 117462306a36Sopenharmony_ci 117562306a36Sopenharmony_cistatic void packet_increment_rx_head(struct packet_sock *po, 117662306a36Sopenharmony_ci struct packet_ring_buffer *rb) 117762306a36Sopenharmony_ci{ 117862306a36Sopenharmony_ci switch (po->tp_version) { 117962306a36Sopenharmony_ci case TPACKET_V1: 118062306a36Sopenharmony_ci case TPACKET_V2: 118162306a36Sopenharmony_ci return packet_increment_head(rb); 118262306a36Sopenharmony_ci case TPACKET_V3: 118362306a36Sopenharmony_ci default: 118462306a36Sopenharmony_ci WARN(1, "TPACKET version not supported.\n"); 118562306a36Sopenharmony_ci BUG(); 118662306a36Sopenharmony_ci return; 118762306a36Sopenharmony_ci } 118862306a36Sopenharmony_ci} 118962306a36Sopenharmony_ci 119062306a36Sopenharmony_cistatic void *packet_previous_frame(struct packet_sock *po, 119162306a36Sopenharmony_ci struct packet_ring_buffer *rb, 119262306a36Sopenharmony_ci int status) 119362306a36Sopenharmony_ci{ 119462306a36Sopenharmony_ci unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max; 119562306a36Sopenharmony_ci return packet_lookup_frame(po, rb, previous, status); 119662306a36Sopenharmony_ci} 119762306a36Sopenharmony_ci 119862306a36Sopenharmony_cistatic void packet_increment_head(struct packet_ring_buffer *buff) 119962306a36Sopenharmony_ci{ 120062306a36Sopenharmony_ci buff->head = buff->head != buff->frame_max ? buff->head+1 : 0; 120162306a36Sopenharmony_ci} 120262306a36Sopenharmony_ci 120362306a36Sopenharmony_cistatic void packet_inc_pending(struct packet_ring_buffer *rb) 120462306a36Sopenharmony_ci{ 120562306a36Sopenharmony_ci this_cpu_inc(*rb->pending_refcnt); 120662306a36Sopenharmony_ci} 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_cistatic void packet_dec_pending(struct packet_ring_buffer *rb) 120962306a36Sopenharmony_ci{ 121062306a36Sopenharmony_ci this_cpu_dec(*rb->pending_refcnt); 121162306a36Sopenharmony_ci} 121262306a36Sopenharmony_ci 121362306a36Sopenharmony_cistatic unsigned int packet_read_pending(const struct packet_ring_buffer *rb) 121462306a36Sopenharmony_ci{ 121562306a36Sopenharmony_ci unsigned int refcnt = 0; 121662306a36Sopenharmony_ci int cpu; 121762306a36Sopenharmony_ci 121862306a36Sopenharmony_ci /* We don't use pending refcount in rx_ring. */ 121962306a36Sopenharmony_ci if (rb->pending_refcnt == NULL) 122062306a36Sopenharmony_ci return 0; 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci for_each_possible_cpu(cpu) 122362306a36Sopenharmony_ci refcnt += *per_cpu_ptr(rb->pending_refcnt, cpu); 122462306a36Sopenharmony_ci 122562306a36Sopenharmony_ci return refcnt; 122662306a36Sopenharmony_ci} 122762306a36Sopenharmony_ci 122862306a36Sopenharmony_cistatic int packet_alloc_pending(struct packet_sock *po) 122962306a36Sopenharmony_ci{ 123062306a36Sopenharmony_ci po->rx_ring.pending_refcnt = NULL; 123162306a36Sopenharmony_ci 123262306a36Sopenharmony_ci po->tx_ring.pending_refcnt = alloc_percpu(unsigned int); 123362306a36Sopenharmony_ci if (unlikely(po->tx_ring.pending_refcnt == NULL)) 123462306a36Sopenharmony_ci return -ENOBUFS; 123562306a36Sopenharmony_ci 123662306a36Sopenharmony_ci return 0; 123762306a36Sopenharmony_ci} 123862306a36Sopenharmony_ci 123962306a36Sopenharmony_cistatic void packet_free_pending(struct packet_sock *po) 124062306a36Sopenharmony_ci{ 124162306a36Sopenharmony_ci free_percpu(po->tx_ring.pending_refcnt); 124262306a36Sopenharmony_ci} 124362306a36Sopenharmony_ci 124462306a36Sopenharmony_ci#define ROOM_POW_OFF 2 124562306a36Sopenharmony_ci#define ROOM_NONE 0x0 124662306a36Sopenharmony_ci#define ROOM_LOW 0x1 124762306a36Sopenharmony_ci#define ROOM_NORMAL 0x2 124862306a36Sopenharmony_ci 124962306a36Sopenharmony_cistatic bool __tpacket_has_room(const struct packet_sock *po, int pow_off) 125062306a36Sopenharmony_ci{ 125162306a36Sopenharmony_ci int idx, len; 125262306a36Sopenharmony_ci 125362306a36Sopenharmony_ci len = READ_ONCE(po->rx_ring.frame_max) + 1; 125462306a36Sopenharmony_ci idx = READ_ONCE(po->rx_ring.head); 125562306a36Sopenharmony_ci if (pow_off) 125662306a36Sopenharmony_ci idx += len >> pow_off; 125762306a36Sopenharmony_ci if (idx >= len) 125862306a36Sopenharmony_ci idx -= len; 125962306a36Sopenharmony_ci return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); 126062306a36Sopenharmony_ci} 126162306a36Sopenharmony_ci 126262306a36Sopenharmony_cistatic bool __tpacket_v3_has_room(const struct packet_sock *po, int pow_off) 126362306a36Sopenharmony_ci{ 126462306a36Sopenharmony_ci int idx, len; 126562306a36Sopenharmony_ci 126662306a36Sopenharmony_ci len = READ_ONCE(po->rx_ring.prb_bdqc.knum_blocks); 126762306a36Sopenharmony_ci idx = READ_ONCE(po->rx_ring.prb_bdqc.kactive_blk_num); 126862306a36Sopenharmony_ci if (pow_off) 126962306a36Sopenharmony_ci idx += len >> pow_off; 127062306a36Sopenharmony_ci if (idx >= len) 127162306a36Sopenharmony_ci idx -= len; 127262306a36Sopenharmony_ci return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); 127362306a36Sopenharmony_ci} 127462306a36Sopenharmony_ci 127562306a36Sopenharmony_cistatic int __packet_rcv_has_room(const struct packet_sock *po, 127662306a36Sopenharmony_ci const struct sk_buff *skb) 127762306a36Sopenharmony_ci{ 127862306a36Sopenharmony_ci const struct sock *sk = &po->sk; 127962306a36Sopenharmony_ci int ret = ROOM_NONE; 128062306a36Sopenharmony_ci 128162306a36Sopenharmony_ci if (po->prot_hook.func != tpacket_rcv) { 128262306a36Sopenharmony_ci int rcvbuf = READ_ONCE(sk->sk_rcvbuf); 128362306a36Sopenharmony_ci int avail = rcvbuf - atomic_read(&sk->sk_rmem_alloc) 128462306a36Sopenharmony_ci - (skb ? skb->truesize : 0); 128562306a36Sopenharmony_ci 128662306a36Sopenharmony_ci if (avail > (rcvbuf >> ROOM_POW_OFF)) 128762306a36Sopenharmony_ci return ROOM_NORMAL; 128862306a36Sopenharmony_ci else if (avail > 0) 128962306a36Sopenharmony_ci return ROOM_LOW; 129062306a36Sopenharmony_ci else 129162306a36Sopenharmony_ci return ROOM_NONE; 129262306a36Sopenharmony_ci } 129362306a36Sopenharmony_ci 129462306a36Sopenharmony_ci if (po->tp_version == TPACKET_V3) { 129562306a36Sopenharmony_ci if (__tpacket_v3_has_room(po, ROOM_POW_OFF)) 129662306a36Sopenharmony_ci ret = ROOM_NORMAL; 129762306a36Sopenharmony_ci else if (__tpacket_v3_has_room(po, 0)) 129862306a36Sopenharmony_ci ret = ROOM_LOW; 129962306a36Sopenharmony_ci } else { 130062306a36Sopenharmony_ci if (__tpacket_has_room(po, ROOM_POW_OFF)) 130162306a36Sopenharmony_ci ret = ROOM_NORMAL; 130262306a36Sopenharmony_ci else if (__tpacket_has_room(po, 0)) 130362306a36Sopenharmony_ci ret = ROOM_LOW; 130462306a36Sopenharmony_ci } 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci return ret; 130762306a36Sopenharmony_ci} 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_cistatic int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) 131062306a36Sopenharmony_ci{ 131162306a36Sopenharmony_ci bool pressure; 131262306a36Sopenharmony_ci int ret; 131362306a36Sopenharmony_ci 131462306a36Sopenharmony_ci ret = __packet_rcv_has_room(po, skb); 131562306a36Sopenharmony_ci pressure = ret != ROOM_NORMAL; 131662306a36Sopenharmony_ci 131762306a36Sopenharmony_ci if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) != pressure) 131862306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, pressure); 131962306a36Sopenharmony_ci 132062306a36Sopenharmony_ci return ret; 132162306a36Sopenharmony_ci} 132262306a36Sopenharmony_ci 132362306a36Sopenharmony_cistatic void packet_rcv_try_clear_pressure(struct packet_sock *po) 132462306a36Sopenharmony_ci{ 132562306a36Sopenharmony_ci if (packet_sock_flag(po, PACKET_SOCK_PRESSURE) && 132662306a36Sopenharmony_ci __packet_rcv_has_room(po, NULL) == ROOM_NORMAL) 132762306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_PRESSURE, false); 132862306a36Sopenharmony_ci} 132962306a36Sopenharmony_ci 133062306a36Sopenharmony_cistatic void packet_sock_destruct(struct sock *sk) 133162306a36Sopenharmony_ci{ 133262306a36Sopenharmony_ci skb_queue_purge(&sk->sk_error_queue); 133362306a36Sopenharmony_ci 133462306a36Sopenharmony_ci WARN_ON(atomic_read(&sk->sk_rmem_alloc)); 133562306a36Sopenharmony_ci WARN_ON(refcount_read(&sk->sk_wmem_alloc)); 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci if (!sock_flag(sk, SOCK_DEAD)) { 133862306a36Sopenharmony_ci pr_err("Attempt to release alive packet socket: %p\n", sk); 133962306a36Sopenharmony_ci return; 134062306a36Sopenharmony_ci } 134162306a36Sopenharmony_ci} 134262306a36Sopenharmony_ci 134362306a36Sopenharmony_cistatic bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb) 134462306a36Sopenharmony_ci{ 134562306a36Sopenharmony_ci u32 *history = po->rollover->history; 134662306a36Sopenharmony_ci u32 victim, rxhash; 134762306a36Sopenharmony_ci int i, count = 0; 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci rxhash = skb_get_hash(skb); 135062306a36Sopenharmony_ci for (i = 0; i < ROLLOVER_HLEN; i++) 135162306a36Sopenharmony_ci if (READ_ONCE(history[i]) == rxhash) 135262306a36Sopenharmony_ci count++; 135362306a36Sopenharmony_ci 135462306a36Sopenharmony_ci victim = get_random_u32_below(ROLLOVER_HLEN); 135562306a36Sopenharmony_ci 135662306a36Sopenharmony_ci /* Avoid dirtying the cache line if possible */ 135762306a36Sopenharmony_ci if (READ_ONCE(history[victim]) != rxhash) 135862306a36Sopenharmony_ci WRITE_ONCE(history[victim], rxhash); 135962306a36Sopenharmony_ci 136062306a36Sopenharmony_ci return count > (ROLLOVER_HLEN >> 1); 136162306a36Sopenharmony_ci} 136262306a36Sopenharmony_ci 136362306a36Sopenharmony_cistatic unsigned int fanout_demux_hash(struct packet_fanout *f, 136462306a36Sopenharmony_ci struct sk_buff *skb, 136562306a36Sopenharmony_ci unsigned int num) 136662306a36Sopenharmony_ci{ 136762306a36Sopenharmony_ci return reciprocal_scale(__skb_get_hash_symmetric(skb), num); 136862306a36Sopenharmony_ci} 136962306a36Sopenharmony_ci 137062306a36Sopenharmony_cistatic unsigned int fanout_demux_lb(struct packet_fanout *f, 137162306a36Sopenharmony_ci struct sk_buff *skb, 137262306a36Sopenharmony_ci unsigned int num) 137362306a36Sopenharmony_ci{ 137462306a36Sopenharmony_ci unsigned int val = atomic_inc_return(&f->rr_cur); 137562306a36Sopenharmony_ci 137662306a36Sopenharmony_ci return val % num; 137762306a36Sopenharmony_ci} 137862306a36Sopenharmony_ci 137962306a36Sopenharmony_cistatic unsigned int fanout_demux_cpu(struct packet_fanout *f, 138062306a36Sopenharmony_ci struct sk_buff *skb, 138162306a36Sopenharmony_ci unsigned int num) 138262306a36Sopenharmony_ci{ 138362306a36Sopenharmony_ci return smp_processor_id() % num; 138462306a36Sopenharmony_ci} 138562306a36Sopenharmony_ci 138662306a36Sopenharmony_cistatic unsigned int fanout_demux_rnd(struct packet_fanout *f, 138762306a36Sopenharmony_ci struct sk_buff *skb, 138862306a36Sopenharmony_ci unsigned int num) 138962306a36Sopenharmony_ci{ 139062306a36Sopenharmony_ci return get_random_u32_below(num); 139162306a36Sopenharmony_ci} 139262306a36Sopenharmony_ci 139362306a36Sopenharmony_cistatic unsigned int fanout_demux_rollover(struct packet_fanout *f, 139462306a36Sopenharmony_ci struct sk_buff *skb, 139562306a36Sopenharmony_ci unsigned int idx, bool try_self, 139662306a36Sopenharmony_ci unsigned int num) 139762306a36Sopenharmony_ci{ 139862306a36Sopenharmony_ci struct packet_sock *po, *po_next, *po_skip = NULL; 139962306a36Sopenharmony_ci unsigned int i, j, room = ROOM_NONE; 140062306a36Sopenharmony_ci 140162306a36Sopenharmony_ci po = pkt_sk(rcu_dereference(f->arr[idx])); 140262306a36Sopenharmony_ci 140362306a36Sopenharmony_ci if (try_self) { 140462306a36Sopenharmony_ci room = packet_rcv_has_room(po, skb); 140562306a36Sopenharmony_ci if (room == ROOM_NORMAL || 140662306a36Sopenharmony_ci (room == ROOM_LOW && !fanout_flow_is_huge(po, skb))) 140762306a36Sopenharmony_ci return idx; 140862306a36Sopenharmony_ci po_skip = po; 140962306a36Sopenharmony_ci } 141062306a36Sopenharmony_ci 141162306a36Sopenharmony_ci i = j = min_t(int, po->rollover->sock, num - 1); 141262306a36Sopenharmony_ci do { 141362306a36Sopenharmony_ci po_next = pkt_sk(rcu_dereference(f->arr[i])); 141462306a36Sopenharmony_ci if (po_next != po_skip && 141562306a36Sopenharmony_ci !packet_sock_flag(po_next, PACKET_SOCK_PRESSURE) && 141662306a36Sopenharmony_ci packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) { 141762306a36Sopenharmony_ci if (i != j) 141862306a36Sopenharmony_ci po->rollover->sock = i; 141962306a36Sopenharmony_ci atomic_long_inc(&po->rollover->num); 142062306a36Sopenharmony_ci if (room == ROOM_LOW) 142162306a36Sopenharmony_ci atomic_long_inc(&po->rollover->num_huge); 142262306a36Sopenharmony_ci return i; 142362306a36Sopenharmony_ci } 142462306a36Sopenharmony_ci 142562306a36Sopenharmony_ci if (++i == num) 142662306a36Sopenharmony_ci i = 0; 142762306a36Sopenharmony_ci } while (i != j); 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci atomic_long_inc(&po->rollover->num_failed); 143062306a36Sopenharmony_ci return idx; 143162306a36Sopenharmony_ci} 143262306a36Sopenharmony_ci 143362306a36Sopenharmony_cistatic unsigned int fanout_demux_qm(struct packet_fanout *f, 143462306a36Sopenharmony_ci struct sk_buff *skb, 143562306a36Sopenharmony_ci unsigned int num) 143662306a36Sopenharmony_ci{ 143762306a36Sopenharmony_ci return skb_get_queue_mapping(skb) % num; 143862306a36Sopenharmony_ci} 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_cistatic unsigned int fanout_demux_bpf(struct packet_fanout *f, 144162306a36Sopenharmony_ci struct sk_buff *skb, 144262306a36Sopenharmony_ci unsigned int num) 144362306a36Sopenharmony_ci{ 144462306a36Sopenharmony_ci struct bpf_prog *prog; 144562306a36Sopenharmony_ci unsigned int ret = 0; 144662306a36Sopenharmony_ci 144762306a36Sopenharmony_ci rcu_read_lock(); 144862306a36Sopenharmony_ci prog = rcu_dereference(f->bpf_prog); 144962306a36Sopenharmony_ci if (prog) 145062306a36Sopenharmony_ci ret = bpf_prog_run_clear_cb(prog, skb) % num; 145162306a36Sopenharmony_ci rcu_read_unlock(); 145262306a36Sopenharmony_ci 145362306a36Sopenharmony_ci return ret; 145462306a36Sopenharmony_ci} 145562306a36Sopenharmony_ci 145662306a36Sopenharmony_cistatic bool fanout_has_flag(struct packet_fanout *f, u16 flag) 145762306a36Sopenharmony_ci{ 145862306a36Sopenharmony_ci return f->flags & (flag >> 8); 145962306a36Sopenharmony_ci} 146062306a36Sopenharmony_ci 146162306a36Sopenharmony_cistatic int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, 146262306a36Sopenharmony_ci struct packet_type *pt, struct net_device *orig_dev) 146362306a36Sopenharmony_ci{ 146462306a36Sopenharmony_ci struct packet_fanout *f = pt->af_packet_priv; 146562306a36Sopenharmony_ci unsigned int num = READ_ONCE(f->num_members); 146662306a36Sopenharmony_ci struct net *net = read_pnet(&f->net); 146762306a36Sopenharmony_ci struct packet_sock *po; 146862306a36Sopenharmony_ci unsigned int idx; 146962306a36Sopenharmony_ci 147062306a36Sopenharmony_ci if (!net_eq(dev_net(dev), net) || !num) { 147162306a36Sopenharmony_ci kfree_skb(skb); 147262306a36Sopenharmony_ci return 0; 147362306a36Sopenharmony_ci } 147462306a36Sopenharmony_ci 147562306a36Sopenharmony_ci if (fanout_has_flag(f, PACKET_FANOUT_FLAG_DEFRAG)) { 147662306a36Sopenharmony_ci skb = ip_check_defrag(net, skb, IP_DEFRAG_AF_PACKET); 147762306a36Sopenharmony_ci if (!skb) 147862306a36Sopenharmony_ci return 0; 147962306a36Sopenharmony_ci } 148062306a36Sopenharmony_ci switch (f->type) { 148162306a36Sopenharmony_ci case PACKET_FANOUT_HASH: 148262306a36Sopenharmony_ci default: 148362306a36Sopenharmony_ci idx = fanout_demux_hash(f, skb, num); 148462306a36Sopenharmony_ci break; 148562306a36Sopenharmony_ci case PACKET_FANOUT_LB: 148662306a36Sopenharmony_ci idx = fanout_demux_lb(f, skb, num); 148762306a36Sopenharmony_ci break; 148862306a36Sopenharmony_ci case PACKET_FANOUT_CPU: 148962306a36Sopenharmony_ci idx = fanout_demux_cpu(f, skb, num); 149062306a36Sopenharmony_ci break; 149162306a36Sopenharmony_ci case PACKET_FANOUT_RND: 149262306a36Sopenharmony_ci idx = fanout_demux_rnd(f, skb, num); 149362306a36Sopenharmony_ci break; 149462306a36Sopenharmony_ci case PACKET_FANOUT_QM: 149562306a36Sopenharmony_ci idx = fanout_demux_qm(f, skb, num); 149662306a36Sopenharmony_ci break; 149762306a36Sopenharmony_ci case PACKET_FANOUT_ROLLOVER: 149862306a36Sopenharmony_ci idx = fanout_demux_rollover(f, skb, 0, false, num); 149962306a36Sopenharmony_ci break; 150062306a36Sopenharmony_ci case PACKET_FANOUT_CBPF: 150162306a36Sopenharmony_ci case PACKET_FANOUT_EBPF: 150262306a36Sopenharmony_ci idx = fanout_demux_bpf(f, skb, num); 150362306a36Sopenharmony_ci break; 150462306a36Sopenharmony_ci } 150562306a36Sopenharmony_ci 150662306a36Sopenharmony_ci if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) 150762306a36Sopenharmony_ci idx = fanout_demux_rollover(f, skb, idx, true, num); 150862306a36Sopenharmony_ci 150962306a36Sopenharmony_ci po = pkt_sk(rcu_dereference(f->arr[idx])); 151062306a36Sopenharmony_ci return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); 151162306a36Sopenharmony_ci} 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ciDEFINE_MUTEX(fanout_mutex); 151462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(fanout_mutex); 151562306a36Sopenharmony_cistatic LIST_HEAD(fanout_list); 151662306a36Sopenharmony_cistatic u16 fanout_next_id; 151762306a36Sopenharmony_ci 151862306a36Sopenharmony_cistatic void __fanout_link(struct sock *sk, struct packet_sock *po) 151962306a36Sopenharmony_ci{ 152062306a36Sopenharmony_ci struct packet_fanout *f = po->fanout; 152162306a36Sopenharmony_ci 152262306a36Sopenharmony_ci spin_lock(&f->lock); 152362306a36Sopenharmony_ci rcu_assign_pointer(f->arr[f->num_members], sk); 152462306a36Sopenharmony_ci smp_wmb(); 152562306a36Sopenharmony_ci f->num_members++; 152662306a36Sopenharmony_ci if (f->num_members == 1) 152762306a36Sopenharmony_ci dev_add_pack(&f->prot_hook); 152862306a36Sopenharmony_ci spin_unlock(&f->lock); 152962306a36Sopenharmony_ci} 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_cistatic void __fanout_unlink(struct sock *sk, struct packet_sock *po) 153262306a36Sopenharmony_ci{ 153362306a36Sopenharmony_ci struct packet_fanout *f = po->fanout; 153462306a36Sopenharmony_ci int i; 153562306a36Sopenharmony_ci 153662306a36Sopenharmony_ci spin_lock(&f->lock); 153762306a36Sopenharmony_ci for (i = 0; i < f->num_members; i++) { 153862306a36Sopenharmony_ci if (rcu_dereference_protected(f->arr[i], 153962306a36Sopenharmony_ci lockdep_is_held(&f->lock)) == sk) 154062306a36Sopenharmony_ci break; 154162306a36Sopenharmony_ci } 154262306a36Sopenharmony_ci BUG_ON(i >= f->num_members); 154362306a36Sopenharmony_ci rcu_assign_pointer(f->arr[i], 154462306a36Sopenharmony_ci rcu_dereference_protected(f->arr[f->num_members - 1], 154562306a36Sopenharmony_ci lockdep_is_held(&f->lock))); 154662306a36Sopenharmony_ci f->num_members--; 154762306a36Sopenharmony_ci if (f->num_members == 0) 154862306a36Sopenharmony_ci __dev_remove_pack(&f->prot_hook); 154962306a36Sopenharmony_ci spin_unlock(&f->lock); 155062306a36Sopenharmony_ci} 155162306a36Sopenharmony_ci 155262306a36Sopenharmony_cistatic bool match_fanout_group(struct packet_type *ptype, struct sock *sk) 155362306a36Sopenharmony_ci{ 155462306a36Sopenharmony_ci if (sk->sk_family != PF_PACKET) 155562306a36Sopenharmony_ci return false; 155662306a36Sopenharmony_ci 155762306a36Sopenharmony_ci return ptype->af_packet_priv == pkt_sk(sk)->fanout; 155862306a36Sopenharmony_ci} 155962306a36Sopenharmony_ci 156062306a36Sopenharmony_cistatic void fanout_init_data(struct packet_fanout *f) 156162306a36Sopenharmony_ci{ 156262306a36Sopenharmony_ci switch (f->type) { 156362306a36Sopenharmony_ci case PACKET_FANOUT_LB: 156462306a36Sopenharmony_ci atomic_set(&f->rr_cur, 0); 156562306a36Sopenharmony_ci break; 156662306a36Sopenharmony_ci case PACKET_FANOUT_CBPF: 156762306a36Sopenharmony_ci case PACKET_FANOUT_EBPF: 156862306a36Sopenharmony_ci RCU_INIT_POINTER(f->bpf_prog, NULL); 156962306a36Sopenharmony_ci break; 157062306a36Sopenharmony_ci } 157162306a36Sopenharmony_ci} 157262306a36Sopenharmony_ci 157362306a36Sopenharmony_cistatic void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new) 157462306a36Sopenharmony_ci{ 157562306a36Sopenharmony_ci struct bpf_prog *old; 157662306a36Sopenharmony_ci 157762306a36Sopenharmony_ci spin_lock(&f->lock); 157862306a36Sopenharmony_ci old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock)); 157962306a36Sopenharmony_ci rcu_assign_pointer(f->bpf_prog, new); 158062306a36Sopenharmony_ci spin_unlock(&f->lock); 158162306a36Sopenharmony_ci 158262306a36Sopenharmony_ci if (old) { 158362306a36Sopenharmony_ci synchronize_net(); 158462306a36Sopenharmony_ci bpf_prog_destroy(old); 158562306a36Sopenharmony_ci } 158662306a36Sopenharmony_ci} 158762306a36Sopenharmony_ci 158862306a36Sopenharmony_cistatic int fanout_set_data_cbpf(struct packet_sock *po, sockptr_t data, 158962306a36Sopenharmony_ci unsigned int len) 159062306a36Sopenharmony_ci{ 159162306a36Sopenharmony_ci struct bpf_prog *new; 159262306a36Sopenharmony_ci struct sock_fprog fprog; 159362306a36Sopenharmony_ci int ret; 159462306a36Sopenharmony_ci 159562306a36Sopenharmony_ci if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) 159662306a36Sopenharmony_ci return -EPERM; 159762306a36Sopenharmony_ci 159862306a36Sopenharmony_ci ret = copy_bpf_fprog_from_user(&fprog, data, len); 159962306a36Sopenharmony_ci if (ret) 160062306a36Sopenharmony_ci return ret; 160162306a36Sopenharmony_ci 160262306a36Sopenharmony_ci ret = bpf_prog_create_from_user(&new, &fprog, NULL, false); 160362306a36Sopenharmony_ci if (ret) 160462306a36Sopenharmony_ci return ret; 160562306a36Sopenharmony_ci 160662306a36Sopenharmony_ci __fanout_set_data_bpf(po->fanout, new); 160762306a36Sopenharmony_ci return 0; 160862306a36Sopenharmony_ci} 160962306a36Sopenharmony_ci 161062306a36Sopenharmony_cistatic int fanout_set_data_ebpf(struct packet_sock *po, sockptr_t data, 161162306a36Sopenharmony_ci unsigned int len) 161262306a36Sopenharmony_ci{ 161362306a36Sopenharmony_ci struct bpf_prog *new; 161462306a36Sopenharmony_ci u32 fd; 161562306a36Sopenharmony_ci 161662306a36Sopenharmony_ci if (sock_flag(&po->sk, SOCK_FILTER_LOCKED)) 161762306a36Sopenharmony_ci return -EPERM; 161862306a36Sopenharmony_ci if (len != sizeof(fd)) 161962306a36Sopenharmony_ci return -EINVAL; 162062306a36Sopenharmony_ci if (copy_from_sockptr(&fd, data, len)) 162162306a36Sopenharmony_ci return -EFAULT; 162262306a36Sopenharmony_ci 162362306a36Sopenharmony_ci new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); 162462306a36Sopenharmony_ci if (IS_ERR(new)) 162562306a36Sopenharmony_ci return PTR_ERR(new); 162662306a36Sopenharmony_ci 162762306a36Sopenharmony_ci __fanout_set_data_bpf(po->fanout, new); 162862306a36Sopenharmony_ci return 0; 162962306a36Sopenharmony_ci} 163062306a36Sopenharmony_ci 163162306a36Sopenharmony_cistatic int fanout_set_data(struct packet_sock *po, sockptr_t data, 163262306a36Sopenharmony_ci unsigned int len) 163362306a36Sopenharmony_ci{ 163462306a36Sopenharmony_ci switch (po->fanout->type) { 163562306a36Sopenharmony_ci case PACKET_FANOUT_CBPF: 163662306a36Sopenharmony_ci return fanout_set_data_cbpf(po, data, len); 163762306a36Sopenharmony_ci case PACKET_FANOUT_EBPF: 163862306a36Sopenharmony_ci return fanout_set_data_ebpf(po, data, len); 163962306a36Sopenharmony_ci default: 164062306a36Sopenharmony_ci return -EINVAL; 164162306a36Sopenharmony_ci } 164262306a36Sopenharmony_ci} 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_cistatic void fanout_release_data(struct packet_fanout *f) 164562306a36Sopenharmony_ci{ 164662306a36Sopenharmony_ci switch (f->type) { 164762306a36Sopenharmony_ci case PACKET_FANOUT_CBPF: 164862306a36Sopenharmony_ci case PACKET_FANOUT_EBPF: 164962306a36Sopenharmony_ci __fanout_set_data_bpf(f, NULL); 165062306a36Sopenharmony_ci } 165162306a36Sopenharmony_ci} 165262306a36Sopenharmony_ci 165362306a36Sopenharmony_cistatic bool __fanout_id_is_free(struct sock *sk, u16 candidate_id) 165462306a36Sopenharmony_ci{ 165562306a36Sopenharmony_ci struct packet_fanout *f; 165662306a36Sopenharmony_ci 165762306a36Sopenharmony_ci list_for_each_entry(f, &fanout_list, list) { 165862306a36Sopenharmony_ci if (f->id == candidate_id && 165962306a36Sopenharmony_ci read_pnet(&f->net) == sock_net(sk)) { 166062306a36Sopenharmony_ci return false; 166162306a36Sopenharmony_ci } 166262306a36Sopenharmony_ci } 166362306a36Sopenharmony_ci return true; 166462306a36Sopenharmony_ci} 166562306a36Sopenharmony_ci 166662306a36Sopenharmony_cistatic bool fanout_find_new_id(struct sock *sk, u16 *new_id) 166762306a36Sopenharmony_ci{ 166862306a36Sopenharmony_ci u16 id = fanout_next_id; 166962306a36Sopenharmony_ci 167062306a36Sopenharmony_ci do { 167162306a36Sopenharmony_ci if (__fanout_id_is_free(sk, id)) { 167262306a36Sopenharmony_ci *new_id = id; 167362306a36Sopenharmony_ci fanout_next_id = id + 1; 167462306a36Sopenharmony_ci return true; 167562306a36Sopenharmony_ci } 167662306a36Sopenharmony_ci 167762306a36Sopenharmony_ci id++; 167862306a36Sopenharmony_ci } while (id != fanout_next_id); 167962306a36Sopenharmony_ci 168062306a36Sopenharmony_ci return false; 168162306a36Sopenharmony_ci} 168262306a36Sopenharmony_ci 168362306a36Sopenharmony_cistatic int fanout_add(struct sock *sk, struct fanout_args *args) 168462306a36Sopenharmony_ci{ 168562306a36Sopenharmony_ci struct packet_rollover *rollover = NULL; 168662306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 168762306a36Sopenharmony_ci u16 type_flags = args->type_flags; 168862306a36Sopenharmony_ci struct packet_fanout *f, *match; 168962306a36Sopenharmony_ci u8 type = type_flags & 0xff; 169062306a36Sopenharmony_ci u8 flags = type_flags >> 8; 169162306a36Sopenharmony_ci u16 id = args->id; 169262306a36Sopenharmony_ci int err; 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci switch (type) { 169562306a36Sopenharmony_ci case PACKET_FANOUT_ROLLOVER: 169662306a36Sopenharmony_ci if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) 169762306a36Sopenharmony_ci return -EINVAL; 169862306a36Sopenharmony_ci break; 169962306a36Sopenharmony_ci case PACKET_FANOUT_HASH: 170062306a36Sopenharmony_ci case PACKET_FANOUT_LB: 170162306a36Sopenharmony_ci case PACKET_FANOUT_CPU: 170262306a36Sopenharmony_ci case PACKET_FANOUT_RND: 170362306a36Sopenharmony_ci case PACKET_FANOUT_QM: 170462306a36Sopenharmony_ci case PACKET_FANOUT_CBPF: 170562306a36Sopenharmony_ci case PACKET_FANOUT_EBPF: 170662306a36Sopenharmony_ci break; 170762306a36Sopenharmony_ci default: 170862306a36Sopenharmony_ci return -EINVAL; 170962306a36Sopenharmony_ci } 171062306a36Sopenharmony_ci 171162306a36Sopenharmony_ci mutex_lock(&fanout_mutex); 171262306a36Sopenharmony_ci 171362306a36Sopenharmony_ci err = -EALREADY; 171462306a36Sopenharmony_ci if (po->fanout) 171562306a36Sopenharmony_ci goto out; 171662306a36Sopenharmony_ci 171762306a36Sopenharmony_ci if (type == PACKET_FANOUT_ROLLOVER || 171862306a36Sopenharmony_ci (type_flags & PACKET_FANOUT_FLAG_ROLLOVER)) { 171962306a36Sopenharmony_ci err = -ENOMEM; 172062306a36Sopenharmony_ci rollover = kzalloc(sizeof(*rollover), GFP_KERNEL); 172162306a36Sopenharmony_ci if (!rollover) 172262306a36Sopenharmony_ci goto out; 172362306a36Sopenharmony_ci atomic_long_set(&rollover->num, 0); 172462306a36Sopenharmony_ci atomic_long_set(&rollover->num_huge, 0); 172562306a36Sopenharmony_ci atomic_long_set(&rollover->num_failed, 0); 172662306a36Sopenharmony_ci } 172762306a36Sopenharmony_ci 172862306a36Sopenharmony_ci if (type_flags & PACKET_FANOUT_FLAG_UNIQUEID) { 172962306a36Sopenharmony_ci if (id != 0) { 173062306a36Sopenharmony_ci err = -EINVAL; 173162306a36Sopenharmony_ci goto out; 173262306a36Sopenharmony_ci } 173362306a36Sopenharmony_ci if (!fanout_find_new_id(sk, &id)) { 173462306a36Sopenharmony_ci err = -ENOMEM; 173562306a36Sopenharmony_ci goto out; 173662306a36Sopenharmony_ci } 173762306a36Sopenharmony_ci /* ephemeral flag for the first socket in the group: drop it */ 173862306a36Sopenharmony_ci flags &= ~(PACKET_FANOUT_FLAG_UNIQUEID >> 8); 173962306a36Sopenharmony_ci } 174062306a36Sopenharmony_ci 174162306a36Sopenharmony_ci match = NULL; 174262306a36Sopenharmony_ci list_for_each_entry(f, &fanout_list, list) { 174362306a36Sopenharmony_ci if (f->id == id && 174462306a36Sopenharmony_ci read_pnet(&f->net) == sock_net(sk)) { 174562306a36Sopenharmony_ci match = f; 174662306a36Sopenharmony_ci break; 174762306a36Sopenharmony_ci } 174862306a36Sopenharmony_ci } 174962306a36Sopenharmony_ci err = -EINVAL; 175062306a36Sopenharmony_ci if (match) { 175162306a36Sopenharmony_ci if (match->flags != flags) 175262306a36Sopenharmony_ci goto out; 175362306a36Sopenharmony_ci if (args->max_num_members && 175462306a36Sopenharmony_ci args->max_num_members != match->max_num_members) 175562306a36Sopenharmony_ci goto out; 175662306a36Sopenharmony_ci } else { 175762306a36Sopenharmony_ci if (args->max_num_members > PACKET_FANOUT_MAX) 175862306a36Sopenharmony_ci goto out; 175962306a36Sopenharmony_ci if (!args->max_num_members) 176062306a36Sopenharmony_ci /* legacy PACKET_FANOUT_MAX */ 176162306a36Sopenharmony_ci args->max_num_members = 256; 176262306a36Sopenharmony_ci err = -ENOMEM; 176362306a36Sopenharmony_ci match = kvzalloc(struct_size(match, arr, args->max_num_members), 176462306a36Sopenharmony_ci GFP_KERNEL); 176562306a36Sopenharmony_ci if (!match) 176662306a36Sopenharmony_ci goto out; 176762306a36Sopenharmony_ci write_pnet(&match->net, sock_net(sk)); 176862306a36Sopenharmony_ci match->id = id; 176962306a36Sopenharmony_ci match->type = type; 177062306a36Sopenharmony_ci match->flags = flags; 177162306a36Sopenharmony_ci INIT_LIST_HEAD(&match->list); 177262306a36Sopenharmony_ci spin_lock_init(&match->lock); 177362306a36Sopenharmony_ci refcount_set(&match->sk_ref, 0); 177462306a36Sopenharmony_ci fanout_init_data(match); 177562306a36Sopenharmony_ci match->prot_hook.type = po->prot_hook.type; 177662306a36Sopenharmony_ci match->prot_hook.dev = po->prot_hook.dev; 177762306a36Sopenharmony_ci match->prot_hook.func = packet_rcv_fanout; 177862306a36Sopenharmony_ci match->prot_hook.af_packet_priv = match; 177962306a36Sopenharmony_ci match->prot_hook.af_packet_net = read_pnet(&match->net); 178062306a36Sopenharmony_ci match->prot_hook.id_match = match_fanout_group; 178162306a36Sopenharmony_ci match->max_num_members = args->max_num_members; 178262306a36Sopenharmony_ci match->prot_hook.ignore_outgoing = type_flags & PACKET_FANOUT_FLAG_IGNORE_OUTGOING; 178362306a36Sopenharmony_ci list_add(&match->list, &fanout_list); 178462306a36Sopenharmony_ci } 178562306a36Sopenharmony_ci err = -EINVAL; 178662306a36Sopenharmony_ci 178762306a36Sopenharmony_ci spin_lock(&po->bind_lock); 178862306a36Sopenharmony_ci if (packet_sock_flag(po, PACKET_SOCK_RUNNING) && 178962306a36Sopenharmony_ci match->type == type && 179062306a36Sopenharmony_ci match->prot_hook.type == po->prot_hook.type && 179162306a36Sopenharmony_ci match->prot_hook.dev == po->prot_hook.dev) { 179262306a36Sopenharmony_ci err = -ENOSPC; 179362306a36Sopenharmony_ci if (refcount_read(&match->sk_ref) < match->max_num_members) { 179462306a36Sopenharmony_ci __dev_remove_pack(&po->prot_hook); 179562306a36Sopenharmony_ci 179662306a36Sopenharmony_ci /* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */ 179762306a36Sopenharmony_ci WRITE_ONCE(po->fanout, match); 179862306a36Sopenharmony_ci 179962306a36Sopenharmony_ci po->rollover = rollover; 180062306a36Sopenharmony_ci rollover = NULL; 180162306a36Sopenharmony_ci refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1); 180262306a36Sopenharmony_ci __fanout_link(sk, po); 180362306a36Sopenharmony_ci err = 0; 180462306a36Sopenharmony_ci } 180562306a36Sopenharmony_ci } 180662306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 180762306a36Sopenharmony_ci 180862306a36Sopenharmony_ci if (err && !refcount_read(&match->sk_ref)) { 180962306a36Sopenharmony_ci list_del(&match->list); 181062306a36Sopenharmony_ci kvfree(match); 181162306a36Sopenharmony_ci } 181262306a36Sopenharmony_ci 181362306a36Sopenharmony_ciout: 181462306a36Sopenharmony_ci kfree(rollover); 181562306a36Sopenharmony_ci mutex_unlock(&fanout_mutex); 181662306a36Sopenharmony_ci return err; 181762306a36Sopenharmony_ci} 181862306a36Sopenharmony_ci 181962306a36Sopenharmony_ci/* If pkt_sk(sk)->fanout->sk_ref is zero, this function removes 182062306a36Sopenharmony_ci * pkt_sk(sk)->fanout from fanout_list and returns pkt_sk(sk)->fanout. 182162306a36Sopenharmony_ci * It is the responsibility of the caller to call fanout_release_data() and 182262306a36Sopenharmony_ci * free the returned packet_fanout (after synchronize_net()) 182362306a36Sopenharmony_ci */ 182462306a36Sopenharmony_cistatic struct packet_fanout *fanout_release(struct sock *sk) 182562306a36Sopenharmony_ci{ 182662306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 182762306a36Sopenharmony_ci struct packet_fanout *f; 182862306a36Sopenharmony_ci 182962306a36Sopenharmony_ci mutex_lock(&fanout_mutex); 183062306a36Sopenharmony_ci f = po->fanout; 183162306a36Sopenharmony_ci if (f) { 183262306a36Sopenharmony_ci po->fanout = NULL; 183362306a36Sopenharmony_ci 183462306a36Sopenharmony_ci if (refcount_dec_and_test(&f->sk_ref)) 183562306a36Sopenharmony_ci list_del(&f->list); 183662306a36Sopenharmony_ci else 183762306a36Sopenharmony_ci f = NULL; 183862306a36Sopenharmony_ci } 183962306a36Sopenharmony_ci mutex_unlock(&fanout_mutex); 184062306a36Sopenharmony_ci 184162306a36Sopenharmony_ci return f; 184262306a36Sopenharmony_ci} 184362306a36Sopenharmony_ci 184462306a36Sopenharmony_cistatic bool packet_extra_vlan_len_allowed(const struct net_device *dev, 184562306a36Sopenharmony_ci struct sk_buff *skb) 184662306a36Sopenharmony_ci{ 184762306a36Sopenharmony_ci /* Earlier code assumed this would be a VLAN pkt, double-check 184862306a36Sopenharmony_ci * this now that we have the actual packet in hand. We can only 184962306a36Sopenharmony_ci * do this check on Ethernet devices. 185062306a36Sopenharmony_ci */ 185162306a36Sopenharmony_ci if (unlikely(dev->type != ARPHRD_ETHER)) 185262306a36Sopenharmony_ci return false; 185362306a36Sopenharmony_ci 185462306a36Sopenharmony_ci skb_reset_mac_header(skb); 185562306a36Sopenharmony_ci return likely(eth_hdr(skb)->h_proto == htons(ETH_P_8021Q)); 185662306a36Sopenharmony_ci} 185762306a36Sopenharmony_ci 185862306a36Sopenharmony_cistatic const struct proto_ops packet_ops; 185962306a36Sopenharmony_ci 186062306a36Sopenharmony_cistatic const struct proto_ops packet_ops_spkt; 186162306a36Sopenharmony_ci 186262306a36Sopenharmony_cistatic int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, 186362306a36Sopenharmony_ci struct packet_type *pt, struct net_device *orig_dev) 186462306a36Sopenharmony_ci{ 186562306a36Sopenharmony_ci struct sock *sk; 186662306a36Sopenharmony_ci struct sockaddr_pkt *spkt; 186762306a36Sopenharmony_ci 186862306a36Sopenharmony_ci /* 186962306a36Sopenharmony_ci * When we registered the protocol we saved the socket in the data 187062306a36Sopenharmony_ci * field for just this event. 187162306a36Sopenharmony_ci */ 187262306a36Sopenharmony_ci 187362306a36Sopenharmony_ci sk = pt->af_packet_priv; 187462306a36Sopenharmony_ci 187562306a36Sopenharmony_ci /* 187662306a36Sopenharmony_ci * Yank back the headers [hope the device set this 187762306a36Sopenharmony_ci * right or kerboom...] 187862306a36Sopenharmony_ci * 187962306a36Sopenharmony_ci * Incoming packets have ll header pulled, 188062306a36Sopenharmony_ci * push it back. 188162306a36Sopenharmony_ci * 188262306a36Sopenharmony_ci * For outgoing ones skb->data == skb_mac_header(skb) 188362306a36Sopenharmony_ci * so that this procedure is noop. 188462306a36Sopenharmony_ci */ 188562306a36Sopenharmony_ci 188662306a36Sopenharmony_ci if (skb->pkt_type == PACKET_LOOPBACK) 188762306a36Sopenharmony_ci goto out; 188862306a36Sopenharmony_ci 188962306a36Sopenharmony_ci if (!net_eq(dev_net(dev), sock_net(sk))) 189062306a36Sopenharmony_ci goto out; 189162306a36Sopenharmony_ci 189262306a36Sopenharmony_ci skb = skb_share_check(skb, GFP_ATOMIC); 189362306a36Sopenharmony_ci if (skb == NULL) 189462306a36Sopenharmony_ci goto oom; 189562306a36Sopenharmony_ci 189662306a36Sopenharmony_ci /* drop any routing info */ 189762306a36Sopenharmony_ci skb_dst_drop(skb); 189862306a36Sopenharmony_ci 189962306a36Sopenharmony_ci /* drop conntrack reference */ 190062306a36Sopenharmony_ci nf_reset_ct(skb); 190162306a36Sopenharmony_ci 190262306a36Sopenharmony_ci spkt = &PACKET_SKB_CB(skb)->sa.pkt; 190362306a36Sopenharmony_ci 190462306a36Sopenharmony_ci skb_push(skb, skb->data - skb_mac_header(skb)); 190562306a36Sopenharmony_ci 190662306a36Sopenharmony_ci /* 190762306a36Sopenharmony_ci * The SOCK_PACKET socket receives _all_ frames. 190862306a36Sopenharmony_ci */ 190962306a36Sopenharmony_ci 191062306a36Sopenharmony_ci spkt->spkt_family = dev->type; 191162306a36Sopenharmony_ci strscpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device)); 191262306a36Sopenharmony_ci spkt->spkt_protocol = skb->protocol; 191362306a36Sopenharmony_ci 191462306a36Sopenharmony_ci /* 191562306a36Sopenharmony_ci * Charge the memory to the socket. This is done specifically 191662306a36Sopenharmony_ci * to prevent sockets using all the memory up. 191762306a36Sopenharmony_ci */ 191862306a36Sopenharmony_ci 191962306a36Sopenharmony_ci if (sock_queue_rcv_skb(sk, skb) == 0) 192062306a36Sopenharmony_ci return 0; 192162306a36Sopenharmony_ci 192262306a36Sopenharmony_ciout: 192362306a36Sopenharmony_ci kfree_skb(skb); 192462306a36Sopenharmony_cioom: 192562306a36Sopenharmony_ci return 0; 192662306a36Sopenharmony_ci} 192762306a36Sopenharmony_ci 192862306a36Sopenharmony_cistatic void packet_parse_headers(struct sk_buff *skb, struct socket *sock) 192962306a36Sopenharmony_ci{ 193062306a36Sopenharmony_ci int depth; 193162306a36Sopenharmony_ci 193262306a36Sopenharmony_ci if ((!skb->protocol || skb->protocol == htons(ETH_P_ALL)) && 193362306a36Sopenharmony_ci sock->type == SOCK_RAW) { 193462306a36Sopenharmony_ci skb_reset_mac_header(skb); 193562306a36Sopenharmony_ci skb->protocol = dev_parse_header_protocol(skb); 193662306a36Sopenharmony_ci } 193762306a36Sopenharmony_ci 193862306a36Sopenharmony_ci /* Move network header to the right position for VLAN tagged packets */ 193962306a36Sopenharmony_ci if (likely(skb->dev->type == ARPHRD_ETHER) && 194062306a36Sopenharmony_ci eth_type_vlan(skb->protocol) && 194162306a36Sopenharmony_ci vlan_get_protocol_and_depth(skb, skb->protocol, &depth) != 0) 194262306a36Sopenharmony_ci skb_set_network_header(skb, depth); 194362306a36Sopenharmony_ci 194462306a36Sopenharmony_ci skb_probe_transport_header(skb); 194562306a36Sopenharmony_ci} 194662306a36Sopenharmony_ci 194762306a36Sopenharmony_ci/* 194862306a36Sopenharmony_ci * Output a raw packet to a device layer. This bypasses all the other 194962306a36Sopenharmony_ci * protocol layers and you must therefore supply it with a complete frame 195062306a36Sopenharmony_ci */ 195162306a36Sopenharmony_ci 195262306a36Sopenharmony_cistatic int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg, 195362306a36Sopenharmony_ci size_t len) 195462306a36Sopenharmony_ci{ 195562306a36Sopenharmony_ci struct sock *sk = sock->sk; 195662306a36Sopenharmony_ci DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name); 195762306a36Sopenharmony_ci struct sk_buff *skb = NULL; 195862306a36Sopenharmony_ci struct net_device *dev; 195962306a36Sopenharmony_ci struct sockcm_cookie sockc; 196062306a36Sopenharmony_ci __be16 proto = 0; 196162306a36Sopenharmony_ci int err; 196262306a36Sopenharmony_ci int extra_len = 0; 196362306a36Sopenharmony_ci 196462306a36Sopenharmony_ci /* 196562306a36Sopenharmony_ci * Get and verify the address. 196662306a36Sopenharmony_ci */ 196762306a36Sopenharmony_ci 196862306a36Sopenharmony_ci if (saddr) { 196962306a36Sopenharmony_ci if (msg->msg_namelen < sizeof(struct sockaddr)) 197062306a36Sopenharmony_ci return -EINVAL; 197162306a36Sopenharmony_ci if (msg->msg_namelen == sizeof(struct sockaddr_pkt)) 197262306a36Sopenharmony_ci proto = saddr->spkt_protocol; 197362306a36Sopenharmony_ci } else 197462306a36Sopenharmony_ci return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */ 197562306a36Sopenharmony_ci 197662306a36Sopenharmony_ci /* 197762306a36Sopenharmony_ci * Find the device first to size check it 197862306a36Sopenharmony_ci */ 197962306a36Sopenharmony_ci 198062306a36Sopenharmony_ci saddr->spkt_device[sizeof(saddr->spkt_device) - 1] = 0; 198162306a36Sopenharmony_ciretry: 198262306a36Sopenharmony_ci rcu_read_lock(); 198362306a36Sopenharmony_ci dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device); 198462306a36Sopenharmony_ci err = -ENODEV; 198562306a36Sopenharmony_ci if (dev == NULL) 198662306a36Sopenharmony_ci goto out_unlock; 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci err = -ENETDOWN; 198962306a36Sopenharmony_ci if (!(dev->flags & IFF_UP)) 199062306a36Sopenharmony_ci goto out_unlock; 199162306a36Sopenharmony_ci 199262306a36Sopenharmony_ci /* 199362306a36Sopenharmony_ci * You may not queue a frame bigger than the mtu. This is the lowest level 199462306a36Sopenharmony_ci * raw protocol and you must do your own fragmentation at this level. 199562306a36Sopenharmony_ci */ 199662306a36Sopenharmony_ci 199762306a36Sopenharmony_ci if (unlikely(sock_flag(sk, SOCK_NOFCS))) { 199862306a36Sopenharmony_ci if (!netif_supports_nofcs(dev)) { 199962306a36Sopenharmony_ci err = -EPROTONOSUPPORT; 200062306a36Sopenharmony_ci goto out_unlock; 200162306a36Sopenharmony_ci } 200262306a36Sopenharmony_ci extra_len = 4; /* We're doing our own CRC */ 200362306a36Sopenharmony_ci } 200462306a36Sopenharmony_ci 200562306a36Sopenharmony_ci err = -EMSGSIZE; 200662306a36Sopenharmony_ci if (len > dev->mtu + dev->hard_header_len + VLAN_HLEN + extra_len) 200762306a36Sopenharmony_ci goto out_unlock; 200862306a36Sopenharmony_ci 200962306a36Sopenharmony_ci if (!skb) { 201062306a36Sopenharmony_ci size_t reserved = LL_RESERVED_SPACE(dev); 201162306a36Sopenharmony_ci int tlen = dev->needed_tailroom; 201262306a36Sopenharmony_ci unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0; 201362306a36Sopenharmony_ci 201462306a36Sopenharmony_ci rcu_read_unlock(); 201562306a36Sopenharmony_ci skb = sock_wmalloc(sk, len + reserved + tlen, 0, GFP_KERNEL); 201662306a36Sopenharmony_ci if (skb == NULL) 201762306a36Sopenharmony_ci return -ENOBUFS; 201862306a36Sopenharmony_ci /* FIXME: Save some space for broken drivers that write a hard 201962306a36Sopenharmony_ci * header at transmission time by themselves. PPP is the notable 202062306a36Sopenharmony_ci * one here. This should really be fixed at the driver level. 202162306a36Sopenharmony_ci */ 202262306a36Sopenharmony_ci skb_reserve(skb, reserved); 202362306a36Sopenharmony_ci skb_reset_network_header(skb); 202462306a36Sopenharmony_ci 202562306a36Sopenharmony_ci /* Try to align data part correctly */ 202662306a36Sopenharmony_ci if (hhlen) { 202762306a36Sopenharmony_ci skb->data -= hhlen; 202862306a36Sopenharmony_ci skb->tail -= hhlen; 202962306a36Sopenharmony_ci if (len < hhlen) 203062306a36Sopenharmony_ci skb_reset_network_header(skb); 203162306a36Sopenharmony_ci } 203262306a36Sopenharmony_ci err = memcpy_from_msg(skb_put(skb, len), msg, len); 203362306a36Sopenharmony_ci if (err) 203462306a36Sopenharmony_ci goto out_free; 203562306a36Sopenharmony_ci goto retry; 203662306a36Sopenharmony_ci } 203762306a36Sopenharmony_ci 203862306a36Sopenharmony_ci if (!dev_validate_header(dev, skb->data, len) || !skb->len) { 203962306a36Sopenharmony_ci err = -EINVAL; 204062306a36Sopenharmony_ci goto out_unlock; 204162306a36Sopenharmony_ci } 204262306a36Sopenharmony_ci if (len > (dev->mtu + dev->hard_header_len + extra_len) && 204362306a36Sopenharmony_ci !packet_extra_vlan_len_allowed(dev, skb)) { 204462306a36Sopenharmony_ci err = -EMSGSIZE; 204562306a36Sopenharmony_ci goto out_unlock; 204662306a36Sopenharmony_ci } 204762306a36Sopenharmony_ci 204862306a36Sopenharmony_ci sockcm_init(&sockc, sk); 204962306a36Sopenharmony_ci if (msg->msg_controllen) { 205062306a36Sopenharmony_ci err = sock_cmsg_send(sk, msg, &sockc); 205162306a36Sopenharmony_ci if (unlikely(err)) 205262306a36Sopenharmony_ci goto out_unlock; 205362306a36Sopenharmony_ci } 205462306a36Sopenharmony_ci 205562306a36Sopenharmony_ci skb->protocol = proto; 205662306a36Sopenharmony_ci skb->dev = dev; 205762306a36Sopenharmony_ci skb->priority = READ_ONCE(sk->sk_priority); 205862306a36Sopenharmony_ci skb->mark = READ_ONCE(sk->sk_mark); 205962306a36Sopenharmony_ci skb->tstamp = sockc.transmit_time; 206062306a36Sopenharmony_ci 206162306a36Sopenharmony_ci skb_setup_tx_timestamp(skb, sockc.tsflags); 206262306a36Sopenharmony_ci 206362306a36Sopenharmony_ci if (unlikely(extra_len == 4)) 206462306a36Sopenharmony_ci skb->no_fcs = 1; 206562306a36Sopenharmony_ci 206662306a36Sopenharmony_ci packet_parse_headers(skb, sock); 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci dev_queue_xmit(skb); 206962306a36Sopenharmony_ci rcu_read_unlock(); 207062306a36Sopenharmony_ci return len; 207162306a36Sopenharmony_ci 207262306a36Sopenharmony_ciout_unlock: 207362306a36Sopenharmony_ci rcu_read_unlock(); 207462306a36Sopenharmony_ciout_free: 207562306a36Sopenharmony_ci kfree_skb(skb); 207662306a36Sopenharmony_ci return err; 207762306a36Sopenharmony_ci} 207862306a36Sopenharmony_ci 207962306a36Sopenharmony_cistatic unsigned int run_filter(struct sk_buff *skb, 208062306a36Sopenharmony_ci const struct sock *sk, 208162306a36Sopenharmony_ci unsigned int res) 208262306a36Sopenharmony_ci{ 208362306a36Sopenharmony_ci struct sk_filter *filter; 208462306a36Sopenharmony_ci 208562306a36Sopenharmony_ci rcu_read_lock(); 208662306a36Sopenharmony_ci filter = rcu_dereference(sk->sk_filter); 208762306a36Sopenharmony_ci if (filter != NULL) 208862306a36Sopenharmony_ci res = bpf_prog_run_clear_cb(filter->prog, skb); 208962306a36Sopenharmony_ci rcu_read_unlock(); 209062306a36Sopenharmony_ci 209162306a36Sopenharmony_ci return res; 209262306a36Sopenharmony_ci} 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_cistatic int packet_rcv_vnet(struct msghdr *msg, const struct sk_buff *skb, 209562306a36Sopenharmony_ci size_t *len, int vnet_hdr_sz) 209662306a36Sopenharmony_ci{ 209762306a36Sopenharmony_ci struct virtio_net_hdr_mrg_rxbuf vnet_hdr = { .num_buffers = 0 }; 209862306a36Sopenharmony_ci 209962306a36Sopenharmony_ci if (*len < vnet_hdr_sz) 210062306a36Sopenharmony_ci return -EINVAL; 210162306a36Sopenharmony_ci *len -= vnet_hdr_sz; 210262306a36Sopenharmony_ci 210362306a36Sopenharmony_ci if (virtio_net_hdr_from_skb(skb, (struct virtio_net_hdr *)&vnet_hdr, vio_le(), true, 0)) 210462306a36Sopenharmony_ci return -EINVAL; 210562306a36Sopenharmony_ci 210662306a36Sopenharmony_ci return memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_sz); 210762306a36Sopenharmony_ci} 210862306a36Sopenharmony_ci 210962306a36Sopenharmony_ci/* 211062306a36Sopenharmony_ci * This function makes lazy skb cloning in hope that most of packets 211162306a36Sopenharmony_ci * are discarded by BPF. 211262306a36Sopenharmony_ci * 211362306a36Sopenharmony_ci * Note tricky part: we DO mangle shared skb! skb->data, skb->len 211462306a36Sopenharmony_ci * and skb->cb are mangled. It works because (and until) packets 211562306a36Sopenharmony_ci * falling here are owned by current CPU. Output packets are cloned 211662306a36Sopenharmony_ci * by dev_queue_xmit_nit(), input packets are processed by net_bh 211762306a36Sopenharmony_ci * sequentially, so that if we return skb to original state on exit, 211862306a36Sopenharmony_ci * we will not harm anyone. 211962306a36Sopenharmony_ci */ 212062306a36Sopenharmony_ci 212162306a36Sopenharmony_cistatic int packet_rcv(struct sk_buff *skb, struct net_device *dev, 212262306a36Sopenharmony_ci struct packet_type *pt, struct net_device *orig_dev) 212362306a36Sopenharmony_ci{ 212462306a36Sopenharmony_ci struct sock *sk; 212562306a36Sopenharmony_ci struct sockaddr_ll *sll; 212662306a36Sopenharmony_ci struct packet_sock *po; 212762306a36Sopenharmony_ci u8 *skb_head = skb->data; 212862306a36Sopenharmony_ci int skb_len = skb->len; 212962306a36Sopenharmony_ci unsigned int snaplen, res; 213062306a36Sopenharmony_ci bool is_drop_n_account = false; 213162306a36Sopenharmony_ci 213262306a36Sopenharmony_ci if (skb->pkt_type == PACKET_LOOPBACK) 213362306a36Sopenharmony_ci goto drop; 213462306a36Sopenharmony_ci 213562306a36Sopenharmony_ci sk = pt->af_packet_priv; 213662306a36Sopenharmony_ci po = pkt_sk(sk); 213762306a36Sopenharmony_ci 213862306a36Sopenharmony_ci if (!net_eq(dev_net(dev), sock_net(sk))) 213962306a36Sopenharmony_ci goto drop; 214062306a36Sopenharmony_ci 214162306a36Sopenharmony_ci skb->dev = dev; 214262306a36Sopenharmony_ci 214362306a36Sopenharmony_ci if (dev_has_header(dev)) { 214462306a36Sopenharmony_ci /* The device has an explicit notion of ll header, 214562306a36Sopenharmony_ci * exported to higher levels. 214662306a36Sopenharmony_ci * 214762306a36Sopenharmony_ci * Otherwise, the device hides details of its frame 214862306a36Sopenharmony_ci * structure, so that corresponding packet head is 214962306a36Sopenharmony_ci * never delivered to user. 215062306a36Sopenharmony_ci */ 215162306a36Sopenharmony_ci if (sk->sk_type != SOCK_DGRAM) 215262306a36Sopenharmony_ci skb_push(skb, skb->data - skb_mac_header(skb)); 215362306a36Sopenharmony_ci else if (skb->pkt_type == PACKET_OUTGOING) { 215462306a36Sopenharmony_ci /* Special case: outgoing packets have ll header at head */ 215562306a36Sopenharmony_ci skb_pull(skb, skb_network_offset(skb)); 215662306a36Sopenharmony_ci } 215762306a36Sopenharmony_ci } 215862306a36Sopenharmony_ci 215962306a36Sopenharmony_ci snaplen = skb->len; 216062306a36Sopenharmony_ci 216162306a36Sopenharmony_ci res = run_filter(skb, sk, snaplen); 216262306a36Sopenharmony_ci if (!res) 216362306a36Sopenharmony_ci goto drop_n_restore; 216462306a36Sopenharmony_ci if (snaplen > res) 216562306a36Sopenharmony_ci snaplen = res; 216662306a36Sopenharmony_ci 216762306a36Sopenharmony_ci if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) 216862306a36Sopenharmony_ci goto drop_n_acct; 216962306a36Sopenharmony_ci 217062306a36Sopenharmony_ci if (skb_shared(skb)) { 217162306a36Sopenharmony_ci struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); 217262306a36Sopenharmony_ci if (nskb == NULL) 217362306a36Sopenharmony_ci goto drop_n_acct; 217462306a36Sopenharmony_ci 217562306a36Sopenharmony_ci if (skb_head != skb->data) { 217662306a36Sopenharmony_ci skb->data = skb_head; 217762306a36Sopenharmony_ci skb->len = skb_len; 217862306a36Sopenharmony_ci } 217962306a36Sopenharmony_ci consume_skb(skb); 218062306a36Sopenharmony_ci skb = nskb; 218162306a36Sopenharmony_ci } 218262306a36Sopenharmony_ci 218362306a36Sopenharmony_ci sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8); 218462306a36Sopenharmony_ci 218562306a36Sopenharmony_ci sll = &PACKET_SKB_CB(skb)->sa.ll; 218662306a36Sopenharmony_ci sll->sll_hatype = dev->type; 218762306a36Sopenharmony_ci sll->sll_pkttype = skb->pkt_type; 218862306a36Sopenharmony_ci if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) 218962306a36Sopenharmony_ci sll->sll_ifindex = orig_dev->ifindex; 219062306a36Sopenharmony_ci else 219162306a36Sopenharmony_ci sll->sll_ifindex = dev->ifindex; 219262306a36Sopenharmony_ci 219362306a36Sopenharmony_ci sll->sll_halen = dev_parse_header(skb, sll->sll_addr); 219462306a36Sopenharmony_ci 219562306a36Sopenharmony_ci /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg(). 219662306a36Sopenharmony_ci * Use their space for storing the original skb length. 219762306a36Sopenharmony_ci */ 219862306a36Sopenharmony_ci PACKET_SKB_CB(skb)->sa.origlen = skb->len; 219962306a36Sopenharmony_ci 220062306a36Sopenharmony_ci if (pskb_trim(skb, snaplen)) 220162306a36Sopenharmony_ci goto drop_n_acct; 220262306a36Sopenharmony_ci 220362306a36Sopenharmony_ci skb_set_owner_r(skb, sk); 220462306a36Sopenharmony_ci skb->dev = NULL; 220562306a36Sopenharmony_ci skb_dst_drop(skb); 220662306a36Sopenharmony_ci 220762306a36Sopenharmony_ci /* drop conntrack reference */ 220862306a36Sopenharmony_ci nf_reset_ct(skb); 220962306a36Sopenharmony_ci 221062306a36Sopenharmony_ci spin_lock(&sk->sk_receive_queue.lock); 221162306a36Sopenharmony_ci po->stats.stats1.tp_packets++; 221262306a36Sopenharmony_ci sock_skb_set_dropcount(sk, skb); 221362306a36Sopenharmony_ci skb_clear_delivery_time(skb); 221462306a36Sopenharmony_ci __skb_queue_tail(&sk->sk_receive_queue, skb); 221562306a36Sopenharmony_ci spin_unlock(&sk->sk_receive_queue.lock); 221662306a36Sopenharmony_ci sk->sk_data_ready(sk); 221762306a36Sopenharmony_ci return 0; 221862306a36Sopenharmony_ci 221962306a36Sopenharmony_cidrop_n_acct: 222062306a36Sopenharmony_ci is_drop_n_account = true; 222162306a36Sopenharmony_ci atomic_inc(&po->tp_drops); 222262306a36Sopenharmony_ci atomic_inc(&sk->sk_drops); 222362306a36Sopenharmony_ci 222462306a36Sopenharmony_cidrop_n_restore: 222562306a36Sopenharmony_ci if (skb_head != skb->data && skb_shared(skb)) { 222662306a36Sopenharmony_ci skb->data = skb_head; 222762306a36Sopenharmony_ci skb->len = skb_len; 222862306a36Sopenharmony_ci } 222962306a36Sopenharmony_cidrop: 223062306a36Sopenharmony_ci if (!is_drop_n_account) 223162306a36Sopenharmony_ci consume_skb(skb); 223262306a36Sopenharmony_ci else 223362306a36Sopenharmony_ci kfree_skb(skb); 223462306a36Sopenharmony_ci return 0; 223562306a36Sopenharmony_ci} 223662306a36Sopenharmony_ci 223762306a36Sopenharmony_cistatic int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, 223862306a36Sopenharmony_ci struct packet_type *pt, struct net_device *orig_dev) 223962306a36Sopenharmony_ci{ 224062306a36Sopenharmony_ci struct sock *sk; 224162306a36Sopenharmony_ci struct packet_sock *po; 224262306a36Sopenharmony_ci struct sockaddr_ll *sll; 224362306a36Sopenharmony_ci union tpacket_uhdr h; 224462306a36Sopenharmony_ci u8 *skb_head = skb->data; 224562306a36Sopenharmony_ci int skb_len = skb->len; 224662306a36Sopenharmony_ci unsigned int snaplen, res; 224762306a36Sopenharmony_ci unsigned long status = TP_STATUS_USER; 224862306a36Sopenharmony_ci unsigned short macoff, hdrlen; 224962306a36Sopenharmony_ci unsigned int netoff; 225062306a36Sopenharmony_ci struct sk_buff *copy_skb = NULL; 225162306a36Sopenharmony_ci struct timespec64 ts; 225262306a36Sopenharmony_ci __u32 ts_status; 225362306a36Sopenharmony_ci bool is_drop_n_account = false; 225462306a36Sopenharmony_ci unsigned int slot_id = 0; 225562306a36Sopenharmony_ci int vnet_hdr_sz = 0; 225662306a36Sopenharmony_ci 225762306a36Sopenharmony_ci /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. 225862306a36Sopenharmony_ci * We may add members to them until current aligned size without forcing 225962306a36Sopenharmony_ci * userspace to call getsockopt(..., PACKET_HDRLEN, ...). 226062306a36Sopenharmony_ci */ 226162306a36Sopenharmony_ci BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h2)) != 32); 226262306a36Sopenharmony_ci BUILD_BUG_ON(TPACKET_ALIGN(sizeof(*h.h3)) != 48); 226362306a36Sopenharmony_ci 226462306a36Sopenharmony_ci if (skb->pkt_type == PACKET_LOOPBACK) 226562306a36Sopenharmony_ci goto drop; 226662306a36Sopenharmony_ci 226762306a36Sopenharmony_ci sk = pt->af_packet_priv; 226862306a36Sopenharmony_ci po = pkt_sk(sk); 226962306a36Sopenharmony_ci 227062306a36Sopenharmony_ci if (!net_eq(dev_net(dev), sock_net(sk))) 227162306a36Sopenharmony_ci goto drop; 227262306a36Sopenharmony_ci 227362306a36Sopenharmony_ci if (dev_has_header(dev)) { 227462306a36Sopenharmony_ci if (sk->sk_type != SOCK_DGRAM) 227562306a36Sopenharmony_ci skb_push(skb, skb->data - skb_mac_header(skb)); 227662306a36Sopenharmony_ci else if (skb->pkt_type == PACKET_OUTGOING) { 227762306a36Sopenharmony_ci /* Special case: outgoing packets have ll header at head */ 227862306a36Sopenharmony_ci skb_pull(skb, skb_network_offset(skb)); 227962306a36Sopenharmony_ci } 228062306a36Sopenharmony_ci } 228162306a36Sopenharmony_ci 228262306a36Sopenharmony_ci snaplen = skb->len; 228362306a36Sopenharmony_ci 228462306a36Sopenharmony_ci res = run_filter(skb, sk, snaplen); 228562306a36Sopenharmony_ci if (!res) 228662306a36Sopenharmony_ci goto drop_n_restore; 228762306a36Sopenharmony_ci 228862306a36Sopenharmony_ci /* If we are flooded, just give up */ 228962306a36Sopenharmony_ci if (__packet_rcv_has_room(po, skb) == ROOM_NONE) { 229062306a36Sopenharmony_ci atomic_inc(&po->tp_drops); 229162306a36Sopenharmony_ci goto drop_n_restore; 229262306a36Sopenharmony_ci } 229362306a36Sopenharmony_ci 229462306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_PARTIAL) 229562306a36Sopenharmony_ci status |= TP_STATUS_CSUMNOTREADY; 229662306a36Sopenharmony_ci else if (skb->pkt_type != PACKET_OUTGOING && 229762306a36Sopenharmony_ci skb_csum_unnecessary(skb)) 229862306a36Sopenharmony_ci status |= TP_STATUS_CSUM_VALID; 229962306a36Sopenharmony_ci if (skb_is_gso(skb) && skb_is_gso_tcp(skb)) 230062306a36Sopenharmony_ci status |= TP_STATUS_GSO_TCP; 230162306a36Sopenharmony_ci 230262306a36Sopenharmony_ci if (snaplen > res) 230362306a36Sopenharmony_ci snaplen = res; 230462306a36Sopenharmony_ci 230562306a36Sopenharmony_ci if (sk->sk_type == SOCK_DGRAM) { 230662306a36Sopenharmony_ci macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 + 230762306a36Sopenharmony_ci po->tp_reserve; 230862306a36Sopenharmony_ci } else { 230962306a36Sopenharmony_ci unsigned int maclen = skb_network_offset(skb); 231062306a36Sopenharmony_ci netoff = TPACKET_ALIGN(po->tp_hdrlen + 231162306a36Sopenharmony_ci (maclen < 16 ? 16 : maclen)) + 231262306a36Sopenharmony_ci po->tp_reserve; 231362306a36Sopenharmony_ci vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); 231462306a36Sopenharmony_ci if (vnet_hdr_sz) 231562306a36Sopenharmony_ci netoff += vnet_hdr_sz; 231662306a36Sopenharmony_ci macoff = netoff - maclen; 231762306a36Sopenharmony_ci } 231862306a36Sopenharmony_ci if (netoff > USHRT_MAX) { 231962306a36Sopenharmony_ci atomic_inc(&po->tp_drops); 232062306a36Sopenharmony_ci goto drop_n_restore; 232162306a36Sopenharmony_ci } 232262306a36Sopenharmony_ci if (po->tp_version <= TPACKET_V2) { 232362306a36Sopenharmony_ci if (macoff + snaplen > po->rx_ring.frame_size) { 232462306a36Sopenharmony_ci if (po->copy_thresh && 232562306a36Sopenharmony_ci atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf) { 232662306a36Sopenharmony_ci if (skb_shared(skb)) { 232762306a36Sopenharmony_ci copy_skb = skb_clone(skb, GFP_ATOMIC); 232862306a36Sopenharmony_ci } else { 232962306a36Sopenharmony_ci copy_skb = skb_get(skb); 233062306a36Sopenharmony_ci skb_head = skb->data; 233162306a36Sopenharmony_ci } 233262306a36Sopenharmony_ci if (copy_skb) { 233362306a36Sopenharmony_ci memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0, 233462306a36Sopenharmony_ci sizeof(PACKET_SKB_CB(copy_skb)->sa.ll)); 233562306a36Sopenharmony_ci skb_set_owner_r(copy_skb, sk); 233662306a36Sopenharmony_ci } 233762306a36Sopenharmony_ci } 233862306a36Sopenharmony_ci snaplen = po->rx_ring.frame_size - macoff; 233962306a36Sopenharmony_ci if ((int)snaplen < 0) { 234062306a36Sopenharmony_ci snaplen = 0; 234162306a36Sopenharmony_ci vnet_hdr_sz = 0; 234262306a36Sopenharmony_ci } 234362306a36Sopenharmony_ci } 234462306a36Sopenharmony_ci } else if (unlikely(macoff + snaplen > 234562306a36Sopenharmony_ci GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { 234662306a36Sopenharmony_ci u32 nval; 234762306a36Sopenharmony_ci 234862306a36Sopenharmony_ci nval = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len - macoff; 234962306a36Sopenharmony_ci pr_err_once("tpacket_rcv: packet too big, clamped from %u to %u. macoff=%u\n", 235062306a36Sopenharmony_ci snaplen, nval, macoff); 235162306a36Sopenharmony_ci snaplen = nval; 235262306a36Sopenharmony_ci if (unlikely((int)snaplen < 0)) { 235362306a36Sopenharmony_ci snaplen = 0; 235462306a36Sopenharmony_ci macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; 235562306a36Sopenharmony_ci vnet_hdr_sz = 0; 235662306a36Sopenharmony_ci } 235762306a36Sopenharmony_ci } 235862306a36Sopenharmony_ci spin_lock(&sk->sk_receive_queue.lock); 235962306a36Sopenharmony_ci h.raw = packet_current_rx_frame(po, skb, 236062306a36Sopenharmony_ci TP_STATUS_KERNEL, (macoff+snaplen)); 236162306a36Sopenharmony_ci if (!h.raw) 236262306a36Sopenharmony_ci goto drop_n_account; 236362306a36Sopenharmony_ci 236462306a36Sopenharmony_ci if (po->tp_version <= TPACKET_V2) { 236562306a36Sopenharmony_ci slot_id = po->rx_ring.head; 236662306a36Sopenharmony_ci if (test_bit(slot_id, po->rx_ring.rx_owner_map)) 236762306a36Sopenharmony_ci goto drop_n_account; 236862306a36Sopenharmony_ci __set_bit(slot_id, po->rx_ring.rx_owner_map); 236962306a36Sopenharmony_ci } 237062306a36Sopenharmony_ci 237162306a36Sopenharmony_ci if (vnet_hdr_sz && 237262306a36Sopenharmony_ci virtio_net_hdr_from_skb(skb, h.raw + macoff - 237362306a36Sopenharmony_ci sizeof(struct virtio_net_hdr), 237462306a36Sopenharmony_ci vio_le(), true, 0)) { 237562306a36Sopenharmony_ci if (po->tp_version == TPACKET_V3) 237662306a36Sopenharmony_ci prb_clear_blk_fill_status(&po->rx_ring); 237762306a36Sopenharmony_ci goto drop_n_account; 237862306a36Sopenharmony_ci } 237962306a36Sopenharmony_ci 238062306a36Sopenharmony_ci if (po->tp_version <= TPACKET_V2) { 238162306a36Sopenharmony_ci packet_increment_rx_head(po, &po->rx_ring); 238262306a36Sopenharmony_ci /* 238362306a36Sopenharmony_ci * LOSING will be reported till you read the stats, 238462306a36Sopenharmony_ci * because it's COR - Clear On Read. 238562306a36Sopenharmony_ci * Anyways, moving it for V1/V2 only as V3 doesn't need this 238662306a36Sopenharmony_ci * at packet level. 238762306a36Sopenharmony_ci */ 238862306a36Sopenharmony_ci if (atomic_read(&po->tp_drops)) 238962306a36Sopenharmony_ci status |= TP_STATUS_LOSING; 239062306a36Sopenharmony_ci } 239162306a36Sopenharmony_ci 239262306a36Sopenharmony_ci po->stats.stats1.tp_packets++; 239362306a36Sopenharmony_ci if (copy_skb) { 239462306a36Sopenharmony_ci status |= TP_STATUS_COPY; 239562306a36Sopenharmony_ci skb_clear_delivery_time(copy_skb); 239662306a36Sopenharmony_ci __skb_queue_tail(&sk->sk_receive_queue, copy_skb); 239762306a36Sopenharmony_ci } 239862306a36Sopenharmony_ci spin_unlock(&sk->sk_receive_queue.lock); 239962306a36Sopenharmony_ci 240062306a36Sopenharmony_ci skb_copy_bits(skb, 0, h.raw + macoff, snaplen); 240162306a36Sopenharmony_ci 240262306a36Sopenharmony_ci /* Always timestamp; prefer an existing software timestamp taken 240362306a36Sopenharmony_ci * closer to the time of capture. 240462306a36Sopenharmony_ci */ 240562306a36Sopenharmony_ci ts_status = tpacket_get_timestamp(skb, &ts, 240662306a36Sopenharmony_ci READ_ONCE(po->tp_tstamp) | 240762306a36Sopenharmony_ci SOF_TIMESTAMPING_SOFTWARE); 240862306a36Sopenharmony_ci if (!ts_status) 240962306a36Sopenharmony_ci ktime_get_real_ts64(&ts); 241062306a36Sopenharmony_ci 241162306a36Sopenharmony_ci status |= ts_status; 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_ci switch (po->tp_version) { 241462306a36Sopenharmony_ci case TPACKET_V1: 241562306a36Sopenharmony_ci h.h1->tp_len = skb->len; 241662306a36Sopenharmony_ci h.h1->tp_snaplen = snaplen; 241762306a36Sopenharmony_ci h.h1->tp_mac = macoff; 241862306a36Sopenharmony_ci h.h1->tp_net = netoff; 241962306a36Sopenharmony_ci h.h1->tp_sec = ts.tv_sec; 242062306a36Sopenharmony_ci h.h1->tp_usec = ts.tv_nsec / NSEC_PER_USEC; 242162306a36Sopenharmony_ci hdrlen = sizeof(*h.h1); 242262306a36Sopenharmony_ci break; 242362306a36Sopenharmony_ci case TPACKET_V2: 242462306a36Sopenharmony_ci h.h2->tp_len = skb->len; 242562306a36Sopenharmony_ci h.h2->tp_snaplen = snaplen; 242662306a36Sopenharmony_ci h.h2->tp_mac = macoff; 242762306a36Sopenharmony_ci h.h2->tp_net = netoff; 242862306a36Sopenharmony_ci h.h2->tp_sec = ts.tv_sec; 242962306a36Sopenharmony_ci h.h2->tp_nsec = ts.tv_nsec; 243062306a36Sopenharmony_ci if (skb_vlan_tag_present(skb)) { 243162306a36Sopenharmony_ci h.h2->tp_vlan_tci = skb_vlan_tag_get(skb); 243262306a36Sopenharmony_ci h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto); 243362306a36Sopenharmony_ci status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; 243462306a36Sopenharmony_ci } else { 243562306a36Sopenharmony_ci h.h2->tp_vlan_tci = 0; 243662306a36Sopenharmony_ci h.h2->tp_vlan_tpid = 0; 243762306a36Sopenharmony_ci } 243862306a36Sopenharmony_ci memset(h.h2->tp_padding, 0, sizeof(h.h2->tp_padding)); 243962306a36Sopenharmony_ci hdrlen = sizeof(*h.h2); 244062306a36Sopenharmony_ci break; 244162306a36Sopenharmony_ci case TPACKET_V3: 244262306a36Sopenharmony_ci /* tp_nxt_offset,vlan are already populated above. 244362306a36Sopenharmony_ci * So DONT clear those fields here 244462306a36Sopenharmony_ci */ 244562306a36Sopenharmony_ci h.h3->tp_status |= status; 244662306a36Sopenharmony_ci h.h3->tp_len = skb->len; 244762306a36Sopenharmony_ci h.h3->tp_snaplen = snaplen; 244862306a36Sopenharmony_ci h.h3->tp_mac = macoff; 244962306a36Sopenharmony_ci h.h3->tp_net = netoff; 245062306a36Sopenharmony_ci h.h3->tp_sec = ts.tv_sec; 245162306a36Sopenharmony_ci h.h3->tp_nsec = ts.tv_nsec; 245262306a36Sopenharmony_ci memset(h.h3->tp_padding, 0, sizeof(h.h3->tp_padding)); 245362306a36Sopenharmony_ci hdrlen = sizeof(*h.h3); 245462306a36Sopenharmony_ci break; 245562306a36Sopenharmony_ci default: 245662306a36Sopenharmony_ci BUG(); 245762306a36Sopenharmony_ci } 245862306a36Sopenharmony_ci 245962306a36Sopenharmony_ci sll = h.raw + TPACKET_ALIGN(hdrlen); 246062306a36Sopenharmony_ci sll->sll_halen = dev_parse_header(skb, sll->sll_addr); 246162306a36Sopenharmony_ci sll->sll_family = AF_PACKET; 246262306a36Sopenharmony_ci sll->sll_hatype = dev->type; 246362306a36Sopenharmony_ci sll->sll_protocol = skb->protocol; 246462306a36Sopenharmony_ci sll->sll_pkttype = skb->pkt_type; 246562306a36Sopenharmony_ci if (unlikely(packet_sock_flag(po, PACKET_SOCK_ORIGDEV))) 246662306a36Sopenharmony_ci sll->sll_ifindex = orig_dev->ifindex; 246762306a36Sopenharmony_ci else 246862306a36Sopenharmony_ci sll->sll_ifindex = dev->ifindex; 246962306a36Sopenharmony_ci 247062306a36Sopenharmony_ci smp_mb(); 247162306a36Sopenharmony_ci 247262306a36Sopenharmony_ci#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1 247362306a36Sopenharmony_ci if (po->tp_version <= TPACKET_V2) { 247462306a36Sopenharmony_ci u8 *start, *end; 247562306a36Sopenharmony_ci 247662306a36Sopenharmony_ci end = (u8 *) PAGE_ALIGN((unsigned long) h.raw + 247762306a36Sopenharmony_ci macoff + snaplen); 247862306a36Sopenharmony_ci 247962306a36Sopenharmony_ci for (start = h.raw; start < end; start += PAGE_SIZE) 248062306a36Sopenharmony_ci flush_dcache_page(pgv_to_page(start)); 248162306a36Sopenharmony_ci } 248262306a36Sopenharmony_ci smp_wmb(); 248362306a36Sopenharmony_ci#endif 248462306a36Sopenharmony_ci 248562306a36Sopenharmony_ci if (po->tp_version <= TPACKET_V2) { 248662306a36Sopenharmony_ci spin_lock(&sk->sk_receive_queue.lock); 248762306a36Sopenharmony_ci __packet_set_status(po, h.raw, status); 248862306a36Sopenharmony_ci __clear_bit(slot_id, po->rx_ring.rx_owner_map); 248962306a36Sopenharmony_ci spin_unlock(&sk->sk_receive_queue.lock); 249062306a36Sopenharmony_ci sk->sk_data_ready(sk); 249162306a36Sopenharmony_ci } else if (po->tp_version == TPACKET_V3) { 249262306a36Sopenharmony_ci prb_clear_blk_fill_status(&po->rx_ring); 249362306a36Sopenharmony_ci } 249462306a36Sopenharmony_ci 249562306a36Sopenharmony_cidrop_n_restore: 249662306a36Sopenharmony_ci if (skb_head != skb->data && skb_shared(skb)) { 249762306a36Sopenharmony_ci skb->data = skb_head; 249862306a36Sopenharmony_ci skb->len = skb_len; 249962306a36Sopenharmony_ci } 250062306a36Sopenharmony_cidrop: 250162306a36Sopenharmony_ci if (!is_drop_n_account) 250262306a36Sopenharmony_ci consume_skb(skb); 250362306a36Sopenharmony_ci else 250462306a36Sopenharmony_ci kfree_skb(skb); 250562306a36Sopenharmony_ci return 0; 250662306a36Sopenharmony_ci 250762306a36Sopenharmony_cidrop_n_account: 250862306a36Sopenharmony_ci spin_unlock(&sk->sk_receive_queue.lock); 250962306a36Sopenharmony_ci atomic_inc(&po->tp_drops); 251062306a36Sopenharmony_ci is_drop_n_account = true; 251162306a36Sopenharmony_ci 251262306a36Sopenharmony_ci sk->sk_data_ready(sk); 251362306a36Sopenharmony_ci kfree_skb(copy_skb); 251462306a36Sopenharmony_ci goto drop_n_restore; 251562306a36Sopenharmony_ci} 251662306a36Sopenharmony_ci 251762306a36Sopenharmony_cistatic void tpacket_destruct_skb(struct sk_buff *skb) 251862306a36Sopenharmony_ci{ 251962306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(skb->sk); 252062306a36Sopenharmony_ci 252162306a36Sopenharmony_ci if (likely(po->tx_ring.pg_vec)) { 252262306a36Sopenharmony_ci void *ph; 252362306a36Sopenharmony_ci __u32 ts; 252462306a36Sopenharmony_ci 252562306a36Sopenharmony_ci ph = skb_zcopy_get_nouarg(skb); 252662306a36Sopenharmony_ci packet_dec_pending(&po->tx_ring); 252762306a36Sopenharmony_ci 252862306a36Sopenharmony_ci ts = __packet_set_timestamp(po, ph, skb); 252962306a36Sopenharmony_ci __packet_set_status(po, ph, TP_STATUS_AVAILABLE | ts); 253062306a36Sopenharmony_ci 253162306a36Sopenharmony_ci if (!packet_read_pending(&po->tx_ring)) 253262306a36Sopenharmony_ci complete(&po->skb_completion); 253362306a36Sopenharmony_ci } 253462306a36Sopenharmony_ci 253562306a36Sopenharmony_ci sock_wfree(skb); 253662306a36Sopenharmony_ci} 253762306a36Sopenharmony_ci 253862306a36Sopenharmony_cistatic int __packet_snd_vnet_parse(struct virtio_net_hdr *vnet_hdr, size_t len) 253962306a36Sopenharmony_ci{ 254062306a36Sopenharmony_ci if ((vnet_hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) && 254162306a36Sopenharmony_ci (__virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + 254262306a36Sopenharmony_ci __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2 > 254362306a36Sopenharmony_ci __virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len))) 254462306a36Sopenharmony_ci vnet_hdr->hdr_len = __cpu_to_virtio16(vio_le(), 254562306a36Sopenharmony_ci __virtio16_to_cpu(vio_le(), vnet_hdr->csum_start) + 254662306a36Sopenharmony_ci __virtio16_to_cpu(vio_le(), vnet_hdr->csum_offset) + 2); 254762306a36Sopenharmony_ci 254862306a36Sopenharmony_ci if (__virtio16_to_cpu(vio_le(), vnet_hdr->hdr_len) > len) 254962306a36Sopenharmony_ci return -EINVAL; 255062306a36Sopenharmony_ci 255162306a36Sopenharmony_ci return 0; 255262306a36Sopenharmony_ci} 255362306a36Sopenharmony_ci 255462306a36Sopenharmony_cistatic int packet_snd_vnet_parse(struct msghdr *msg, size_t *len, 255562306a36Sopenharmony_ci struct virtio_net_hdr *vnet_hdr, int vnet_hdr_sz) 255662306a36Sopenharmony_ci{ 255762306a36Sopenharmony_ci int ret; 255862306a36Sopenharmony_ci 255962306a36Sopenharmony_ci if (*len < vnet_hdr_sz) 256062306a36Sopenharmony_ci return -EINVAL; 256162306a36Sopenharmony_ci *len -= vnet_hdr_sz; 256262306a36Sopenharmony_ci 256362306a36Sopenharmony_ci if (!copy_from_iter_full(vnet_hdr, sizeof(*vnet_hdr), &msg->msg_iter)) 256462306a36Sopenharmony_ci return -EFAULT; 256562306a36Sopenharmony_ci 256662306a36Sopenharmony_ci ret = __packet_snd_vnet_parse(vnet_hdr, *len); 256762306a36Sopenharmony_ci if (ret) 256862306a36Sopenharmony_ci return ret; 256962306a36Sopenharmony_ci 257062306a36Sopenharmony_ci /* move iter to point to the start of mac header */ 257162306a36Sopenharmony_ci if (vnet_hdr_sz != sizeof(struct virtio_net_hdr)) 257262306a36Sopenharmony_ci iov_iter_advance(&msg->msg_iter, vnet_hdr_sz - sizeof(struct virtio_net_hdr)); 257362306a36Sopenharmony_ci 257462306a36Sopenharmony_ci return 0; 257562306a36Sopenharmony_ci} 257662306a36Sopenharmony_ci 257762306a36Sopenharmony_cistatic int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb, 257862306a36Sopenharmony_ci void *frame, struct net_device *dev, void *data, int tp_len, 257962306a36Sopenharmony_ci __be16 proto, unsigned char *addr, int hlen, int copylen, 258062306a36Sopenharmony_ci const struct sockcm_cookie *sockc) 258162306a36Sopenharmony_ci{ 258262306a36Sopenharmony_ci union tpacket_uhdr ph; 258362306a36Sopenharmony_ci int to_write, offset, len, nr_frags, len_max; 258462306a36Sopenharmony_ci struct socket *sock = po->sk.sk_socket; 258562306a36Sopenharmony_ci struct page *page; 258662306a36Sopenharmony_ci int err; 258762306a36Sopenharmony_ci 258862306a36Sopenharmony_ci ph.raw = frame; 258962306a36Sopenharmony_ci 259062306a36Sopenharmony_ci skb->protocol = proto; 259162306a36Sopenharmony_ci skb->dev = dev; 259262306a36Sopenharmony_ci skb->priority = READ_ONCE(po->sk.sk_priority); 259362306a36Sopenharmony_ci skb->mark = READ_ONCE(po->sk.sk_mark); 259462306a36Sopenharmony_ci skb->tstamp = sockc->transmit_time; 259562306a36Sopenharmony_ci skb_setup_tx_timestamp(skb, sockc->tsflags); 259662306a36Sopenharmony_ci skb_zcopy_set_nouarg(skb, ph.raw); 259762306a36Sopenharmony_ci 259862306a36Sopenharmony_ci skb_reserve(skb, hlen); 259962306a36Sopenharmony_ci skb_reset_network_header(skb); 260062306a36Sopenharmony_ci 260162306a36Sopenharmony_ci to_write = tp_len; 260262306a36Sopenharmony_ci 260362306a36Sopenharmony_ci if (sock->type == SOCK_DGRAM) { 260462306a36Sopenharmony_ci err = dev_hard_header(skb, dev, ntohs(proto), addr, 260562306a36Sopenharmony_ci NULL, tp_len); 260662306a36Sopenharmony_ci if (unlikely(err < 0)) 260762306a36Sopenharmony_ci return -EINVAL; 260862306a36Sopenharmony_ci } else if (copylen) { 260962306a36Sopenharmony_ci int hdrlen = min_t(int, copylen, tp_len); 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_ci skb_push(skb, dev->hard_header_len); 261262306a36Sopenharmony_ci skb_put(skb, copylen - dev->hard_header_len); 261362306a36Sopenharmony_ci err = skb_store_bits(skb, 0, data, hdrlen); 261462306a36Sopenharmony_ci if (unlikely(err)) 261562306a36Sopenharmony_ci return err; 261662306a36Sopenharmony_ci if (!dev_validate_header(dev, skb->data, hdrlen)) 261762306a36Sopenharmony_ci return -EINVAL; 261862306a36Sopenharmony_ci 261962306a36Sopenharmony_ci data += hdrlen; 262062306a36Sopenharmony_ci to_write -= hdrlen; 262162306a36Sopenharmony_ci } 262262306a36Sopenharmony_ci 262362306a36Sopenharmony_ci offset = offset_in_page(data); 262462306a36Sopenharmony_ci len_max = PAGE_SIZE - offset; 262562306a36Sopenharmony_ci len = ((to_write > len_max) ? len_max : to_write); 262662306a36Sopenharmony_ci 262762306a36Sopenharmony_ci skb->data_len = to_write; 262862306a36Sopenharmony_ci skb->len += to_write; 262962306a36Sopenharmony_ci skb->truesize += to_write; 263062306a36Sopenharmony_ci refcount_add(to_write, &po->sk.sk_wmem_alloc); 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_ci while (likely(to_write)) { 263362306a36Sopenharmony_ci nr_frags = skb_shinfo(skb)->nr_frags; 263462306a36Sopenharmony_ci 263562306a36Sopenharmony_ci if (unlikely(nr_frags >= MAX_SKB_FRAGS)) { 263662306a36Sopenharmony_ci pr_err("Packet exceed the number of skb frags(%u)\n", 263762306a36Sopenharmony_ci (unsigned int)MAX_SKB_FRAGS); 263862306a36Sopenharmony_ci return -EFAULT; 263962306a36Sopenharmony_ci } 264062306a36Sopenharmony_ci 264162306a36Sopenharmony_ci page = pgv_to_page(data); 264262306a36Sopenharmony_ci data += len; 264362306a36Sopenharmony_ci flush_dcache_page(page); 264462306a36Sopenharmony_ci get_page(page); 264562306a36Sopenharmony_ci skb_fill_page_desc(skb, nr_frags, page, offset, len); 264662306a36Sopenharmony_ci to_write -= len; 264762306a36Sopenharmony_ci offset = 0; 264862306a36Sopenharmony_ci len_max = PAGE_SIZE; 264962306a36Sopenharmony_ci len = ((to_write > len_max) ? len_max : to_write); 265062306a36Sopenharmony_ci } 265162306a36Sopenharmony_ci 265262306a36Sopenharmony_ci packet_parse_headers(skb, sock); 265362306a36Sopenharmony_ci 265462306a36Sopenharmony_ci return tp_len; 265562306a36Sopenharmony_ci} 265662306a36Sopenharmony_ci 265762306a36Sopenharmony_cistatic int tpacket_parse_header(struct packet_sock *po, void *frame, 265862306a36Sopenharmony_ci int size_max, void **data) 265962306a36Sopenharmony_ci{ 266062306a36Sopenharmony_ci union tpacket_uhdr ph; 266162306a36Sopenharmony_ci int tp_len, off; 266262306a36Sopenharmony_ci 266362306a36Sopenharmony_ci ph.raw = frame; 266462306a36Sopenharmony_ci 266562306a36Sopenharmony_ci switch (po->tp_version) { 266662306a36Sopenharmony_ci case TPACKET_V3: 266762306a36Sopenharmony_ci if (ph.h3->tp_next_offset != 0) { 266862306a36Sopenharmony_ci pr_warn_once("variable sized slot not supported"); 266962306a36Sopenharmony_ci return -EINVAL; 267062306a36Sopenharmony_ci } 267162306a36Sopenharmony_ci tp_len = ph.h3->tp_len; 267262306a36Sopenharmony_ci break; 267362306a36Sopenharmony_ci case TPACKET_V2: 267462306a36Sopenharmony_ci tp_len = ph.h2->tp_len; 267562306a36Sopenharmony_ci break; 267662306a36Sopenharmony_ci default: 267762306a36Sopenharmony_ci tp_len = ph.h1->tp_len; 267862306a36Sopenharmony_ci break; 267962306a36Sopenharmony_ci } 268062306a36Sopenharmony_ci if (unlikely(tp_len > size_max)) { 268162306a36Sopenharmony_ci pr_err("packet size is too long (%d > %d)\n", tp_len, size_max); 268262306a36Sopenharmony_ci return -EMSGSIZE; 268362306a36Sopenharmony_ci } 268462306a36Sopenharmony_ci 268562306a36Sopenharmony_ci if (unlikely(packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF))) { 268662306a36Sopenharmony_ci int off_min, off_max; 268762306a36Sopenharmony_ci 268862306a36Sopenharmony_ci off_min = po->tp_hdrlen - sizeof(struct sockaddr_ll); 268962306a36Sopenharmony_ci off_max = po->tx_ring.frame_size - tp_len; 269062306a36Sopenharmony_ci if (po->sk.sk_type == SOCK_DGRAM) { 269162306a36Sopenharmony_ci switch (po->tp_version) { 269262306a36Sopenharmony_ci case TPACKET_V3: 269362306a36Sopenharmony_ci off = ph.h3->tp_net; 269462306a36Sopenharmony_ci break; 269562306a36Sopenharmony_ci case TPACKET_V2: 269662306a36Sopenharmony_ci off = ph.h2->tp_net; 269762306a36Sopenharmony_ci break; 269862306a36Sopenharmony_ci default: 269962306a36Sopenharmony_ci off = ph.h1->tp_net; 270062306a36Sopenharmony_ci break; 270162306a36Sopenharmony_ci } 270262306a36Sopenharmony_ci } else { 270362306a36Sopenharmony_ci switch (po->tp_version) { 270462306a36Sopenharmony_ci case TPACKET_V3: 270562306a36Sopenharmony_ci off = ph.h3->tp_mac; 270662306a36Sopenharmony_ci break; 270762306a36Sopenharmony_ci case TPACKET_V2: 270862306a36Sopenharmony_ci off = ph.h2->tp_mac; 270962306a36Sopenharmony_ci break; 271062306a36Sopenharmony_ci default: 271162306a36Sopenharmony_ci off = ph.h1->tp_mac; 271262306a36Sopenharmony_ci break; 271362306a36Sopenharmony_ci } 271462306a36Sopenharmony_ci } 271562306a36Sopenharmony_ci if (unlikely((off < off_min) || (off_max < off))) 271662306a36Sopenharmony_ci return -EINVAL; 271762306a36Sopenharmony_ci } else { 271862306a36Sopenharmony_ci off = po->tp_hdrlen - sizeof(struct sockaddr_ll); 271962306a36Sopenharmony_ci } 272062306a36Sopenharmony_ci 272162306a36Sopenharmony_ci *data = frame + off; 272262306a36Sopenharmony_ci return tp_len; 272362306a36Sopenharmony_ci} 272462306a36Sopenharmony_ci 272562306a36Sopenharmony_cistatic int tpacket_snd(struct packet_sock *po, struct msghdr *msg) 272662306a36Sopenharmony_ci{ 272762306a36Sopenharmony_ci struct sk_buff *skb = NULL; 272862306a36Sopenharmony_ci struct net_device *dev; 272962306a36Sopenharmony_ci struct virtio_net_hdr *vnet_hdr = NULL; 273062306a36Sopenharmony_ci struct sockcm_cookie sockc; 273162306a36Sopenharmony_ci __be16 proto; 273262306a36Sopenharmony_ci int err, reserve = 0; 273362306a36Sopenharmony_ci void *ph; 273462306a36Sopenharmony_ci DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); 273562306a36Sopenharmony_ci bool need_wait = !(msg->msg_flags & MSG_DONTWAIT); 273662306a36Sopenharmony_ci int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); 273762306a36Sopenharmony_ci unsigned char *addr = NULL; 273862306a36Sopenharmony_ci int tp_len, size_max; 273962306a36Sopenharmony_ci void *data; 274062306a36Sopenharmony_ci int len_sum = 0; 274162306a36Sopenharmony_ci int status = TP_STATUS_AVAILABLE; 274262306a36Sopenharmony_ci int hlen, tlen, copylen = 0; 274362306a36Sopenharmony_ci long timeo = 0; 274462306a36Sopenharmony_ci 274562306a36Sopenharmony_ci mutex_lock(&po->pg_vec_lock); 274662306a36Sopenharmony_ci 274762306a36Sopenharmony_ci /* packet_sendmsg() check on tx_ring.pg_vec was lockless, 274862306a36Sopenharmony_ci * we need to confirm it under protection of pg_vec_lock. 274962306a36Sopenharmony_ci */ 275062306a36Sopenharmony_ci if (unlikely(!po->tx_ring.pg_vec)) { 275162306a36Sopenharmony_ci err = -EBUSY; 275262306a36Sopenharmony_ci goto out; 275362306a36Sopenharmony_ci } 275462306a36Sopenharmony_ci if (likely(saddr == NULL)) { 275562306a36Sopenharmony_ci dev = packet_cached_dev_get(po); 275662306a36Sopenharmony_ci proto = READ_ONCE(po->num); 275762306a36Sopenharmony_ci } else { 275862306a36Sopenharmony_ci err = -EINVAL; 275962306a36Sopenharmony_ci if (msg->msg_namelen < sizeof(struct sockaddr_ll)) 276062306a36Sopenharmony_ci goto out; 276162306a36Sopenharmony_ci if (msg->msg_namelen < (saddr->sll_halen 276262306a36Sopenharmony_ci + offsetof(struct sockaddr_ll, 276362306a36Sopenharmony_ci sll_addr))) 276462306a36Sopenharmony_ci goto out; 276562306a36Sopenharmony_ci proto = saddr->sll_protocol; 276662306a36Sopenharmony_ci dev = dev_get_by_index(sock_net(&po->sk), saddr->sll_ifindex); 276762306a36Sopenharmony_ci if (po->sk.sk_socket->type == SOCK_DGRAM) { 276862306a36Sopenharmony_ci if (dev && msg->msg_namelen < dev->addr_len + 276962306a36Sopenharmony_ci offsetof(struct sockaddr_ll, sll_addr)) 277062306a36Sopenharmony_ci goto out_put; 277162306a36Sopenharmony_ci addr = saddr->sll_addr; 277262306a36Sopenharmony_ci } 277362306a36Sopenharmony_ci } 277462306a36Sopenharmony_ci 277562306a36Sopenharmony_ci err = -ENXIO; 277662306a36Sopenharmony_ci if (unlikely(dev == NULL)) 277762306a36Sopenharmony_ci goto out; 277862306a36Sopenharmony_ci err = -ENETDOWN; 277962306a36Sopenharmony_ci if (unlikely(!(dev->flags & IFF_UP))) 278062306a36Sopenharmony_ci goto out_put; 278162306a36Sopenharmony_ci 278262306a36Sopenharmony_ci sockcm_init(&sockc, &po->sk); 278362306a36Sopenharmony_ci if (msg->msg_controllen) { 278462306a36Sopenharmony_ci err = sock_cmsg_send(&po->sk, msg, &sockc); 278562306a36Sopenharmony_ci if (unlikely(err)) 278662306a36Sopenharmony_ci goto out_put; 278762306a36Sopenharmony_ci } 278862306a36Sopenharmony_ci 278962306a36Sopenharmony_ci if (po->sk.sk_socket->type == SOCK_RAW) 279062306a36Sopenharmony_ci reserve = dev->hard_header_len; 279162306a36Sopenharmony_ci size_max = po->tx_ring.frame_size 279262306a36Sopenharmony_ci - (po->tp_hdrlen - sizeof(struct sockaddr_ll)); 279362306a36Sopenharmony_ci 279462306a36Sopenharmony_ci if ((size_max > dev->mtu + reserve + VLAN_HLEN) && !vnet_hdr_sz) 279562306a36Sopenharmony_ci size_max = dev->mtu + reserve + VLAN_HLEN; 279662306a36Sopenharmony_ci 279762306a36Sopenharmony_ci reinit_completion(&po->skb_completion); 279862306a36Sopenharmony_ci 279962306a36Sopenharmony_ci do { 280062306a36Sopenharmony_ci ph = packet_current_frame(po, &po->tx_ring, 280162306a36Sopenharmony_ci TP_STATUS_SEND_REQUEST); 280262306a36Sopenharmony_ci if (unlikely(ph == NULL)) { 280362306a36Sopenharmony_ci if (need_wait && skb) { 280462306a36Sopenharmony_ci timeo = sock_sndtimeo(&po->sk, msg->msg_flags & MSG_DONTWAIT); 280562306a36Sopenharmony_ci timeo = wait_for_completion_interruptible_timeout(&po->skb_completion, timeo); 280662306a36Sopenharmony_ci if (timeo <= 0) { 280762306a36Sopenharmony_ci err = !timeo ? -ETIMEDOUT : -ERESTARTSYS; 280862306a36Sopenharmony_ci goto out_put; 280962306a36Sopenharmony_ci } 281062306a36Sopenharmony_ci } 281162306a36Sopenharmony_ci /* check for additional frames */ 281262306a36Sopenharmony_ci continue; 281362306a36Sopenharmony_ci } 281462306a36Sopenharmony_ci 281562306a36Sopenharmony_ci skb = NULL; 281662306a36Sopenharmony_ci tp_len = tpacket_parse_header(po, ph, size_max, &data); 281762306a36Sopenharmony_ci if (tp_len < 0) 281862306a36Sopenharmony_ci goto tpacket_error; 281962306a36Sopenharmony_ci 282062306a36Sopenharmony_ci status = TP_STATUS_SEND_REQUEST; 282162306a36Sopenharmony_ci hlen = LL_RESERVED_SPACE(dev); 282262306a36Sopenharmony_ci tlen = dev->needed_tailroom; 282362306a36Sopenharmony_ci if (vnet_hdr_sz) { 282462306a36Sopenharmony_ci vnet_hdr = data; 282562306a36Sopenharmony_ci data += vnet_hdr_sz; 282662306a36Sopenharmony_ci tp_len -= vnet_hdr_sz; 282762306a36Sopenharmony_ci if (tp_len < 0 || 282862306a36Sopenharmony_ci __packet_snd_vnet_parse(vnet_hdr, tp_len)) { 282962306a36Sopenharmony_ci tp_len = -EINVAL; 283062306a36Sopenharmony_ci goto tpacket_error; 283162306a36Sopenharmony_ci } 283262306a36Sopenharmony_ci copylen = __virtio16_to_cpu(vio_le(), 283362306a36Sopenharmony_ci vnet_hdr->hdr_len); 283462306a36Sopenharmony_ci } 283562306a36Sopenharmony_ci copylen = max_t(int, copylen, dev->hard_header_len); 283662306a36Sopenharmony_ci skb = sock_alloc_send_skb(&po->sk, 283762306a36Sopenharmony_ci hlen + tlen + sizeof(struct sockaddr_ll) + 283862306a36Sopenharmony_ci (copylen - dev->hard_header_len), 283962306a36Sopenharmony_ci !need_wait, &err); 284062306a36Sopenharmony_ci 284162306a36Sopenharmony_ci if (unlikely(skb == NULL)) { 284262306a36Sopenharmony_ci /* we assume the socket was initially writeable ... */ 284362306a36Sopenharmony_ci if (likely(len_sum > 0)) 284462306a36Sopenharmony_ci err = len_sum; 284562306a36Sopenharmony_ci goto out_status; 284662306a36Sopenharmony_ci } 284762306a36Sopenharmony_ci tp_len = tpacket_fill_skb(po, skb, ph, dev, data, tp_len, proto, 284862306a36Sopenharmony_ci addr, hlen, copylen, &sockc); 284962306a36Sopenharmony_ci if (likely(tp_len >= 0) && 285062306a36Sopenharmony_ci tp_len > dev->mtu + reserve && 285162306a36Sopenharmony_ci !vnet_hdr_sz && 285262306a36Sopenharmony_ci !packet_extra_vlan_len_allowed(dev, skb)) 285362306a36Sopenharmony_ci tp_len = -EMSGSIZE; 285462306a36Sopenharmony_ci 285562306a36Sopenharmony_ci if (unlikely(tp_len < 0)) { 285662306a36Sopenharmony_citpacket_error: 285762306a36Sopenharmony_ci if (packet_sock_flag(po, PACKET_SOCK_TP_LOSS)) { 285862306a36Sopenharmony_ci __packet_set_status(po, ph, 285962306a36Sopenharmony_ci TP_STATUS_AVAILABLE); 286062306a36Sopenharmony_ci packet_increment_head(&po->tx_ring); 286162306a36Sopenharmony_ci kfree_skb(skb); 286262306a36Sopenharmony_ci continue; 286362306a36Sopenharmony_ci } else { 286462306a36Sopenharmony_ci status = TP_STATUS_WRONG_FORMAT; 286562306a36Sopenharmony_ci err = tp_len; 286662306a36Sopenharmony_ci goto out_status; 286762306a36Sopenharmony_ci } 286862306a36Sopenharmony_ci } 286962306a36Sopenharmony_ci 287062306a36Sopenharmony_ci if (vnet_hdr_sz) { 287162306a36Sopenharmony_ci if (virtio_net_hdr_to_skb(skb, vnet_hdr, vio_le())) { 287262306a36Sopenharmony_ci tp_len = -EINVAL; 287362306a36Sopenharmony_ci goto tpacket_error; 287462306a36Sopenharmony_ci } 287562306a36Sopenharmony_ci virtio_net_hdr_set_proto(skb, vnet_hdr); 287662306a36Sopenharmony_ci } 287762306a36Sopenharmony_ci 287862306a36Sopenharmony_ci skb->destructor = tpacket_destruct_skb; 287962306a36Sopenharmony_ci __packet_set_status(po, ph, TP_STATUS_SENDING); 288062306a36Sopenharmony_ci packet_inc_pending(&po->tx_ring); 288162306a36Sopenharmony_ci 288262306a36Sopenharmony_ci status = TP_STATUS_SEND_REQUEST; 288362306a36Sopenharmony_ci err = packet_xmit(po, skb); 288462306a36Sopenharmony_ci if (unlikely(err != 0)) { 288562306a36Sopenharmony_ci if (err > 0) 288662306a36Sopenharmony_ci err = net_xmit_errno(err); 288762306a36Sopenharmony_ci if (err && __packet_get_status(po, ph) == 288862306a36Sopenharmony_ci TP_STATUS_AVAILABLE) { 288962306a36Sopenharmony_ci /* skb was destructed already */ 289062306a36Sopenharmony_ci skb = NULL; 289162306a36Sopenharmony_ci goto out_status; 289262306a36Sopenharmony_ci } 289362306a36Sopenharmony_ci /* 289462306a36Sopenharmony_ci * skb was dropped but not destructed yet; 289562306a36Sopenharmony_ci * let's treat it like congestion or err < 0 289662306a36Sopenharmony_ci */ 289762306a36Sopenharmony_ci err = 0; 289862306a36Sopenharmony_ci } 289962306a36Sopenharmony_ci packet_increment_head(&po->tx_ring); 290062306a36Sopenharmony_ci len_sum += tp_len; 290162306a36Sopenharmony_ci } while (likely((ph != NULL) || 290262306a36Sopenharmony_ci /* Note: packet_read_pending() might be slow if we have 290362306a36Sopenharmony_ci * to call it as it's per_cpu variable, but in fast-path 290462306a36Sopenharmony_ci * we already short-circuit the loop with the first 290562306a36Sopenharmony_ci * condition, and luckily don't have to go that path 290662306a36Sopenharmony_ci * anyway. 290762306a36Sopenharmony_ci */ 290862306a36Sopenharmony_ci (need_wait && packet_read_pending(&po->tx_ring)))); 290962306a36Sopenharmony_ci 291062306a36Sopenharmony_ci err = len_sum; 291162306a36Sopenharmony_ci goto out_put; 291262306a36Sopenharmony_ci 291362306a36Sopenharmony_ciout_status: 291462306a36Sopenharmony_ci __packet_set_status(po, ph, status); 291562306a36Sopenharmony_ci kfree_skb(skb); 291662306a36Sopenharmony_ciout_put: 291762306a36Sopenharmony_ci dev_put(dev); 291862306a36Sopenharmony_ciout: 291962306a36Sopenharmony_ci mutex_unlock(&po->pg_vec_lock); 292062306a36Sopenharmony_ci return err; 292162306a36Sopenharmony_ci} 292262306a36Sopenharmony_ci 292362306a36Sopenharmony_cistatic struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad, 292462306a36Sopenharmony_ci size_t reserve, size_t len, 292562306a36Sopenharmony_ci size_t linear, int noblock, 292662306a36Sopenharmony_ci int *err) 292762306a36Sopenharmony_ci{ 292862306a36Sopenharmony_ci struct sk_buff *skb; 292962306a36Sopenharmony_ci 293062306a36Sopenharmony_ci /* Under a page? Don't bother with paged skb. */ 293162306a36Sopenharmony_ci if (prepad + len < PAGE_SIZE || !linear) 293262306a36Sopenharmony_ci linear = len; 293362306a36Sopenharmony_ci 293462306a36Sopenharmony_ci if (len - linear > MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) 293562306a36Sopenharmony_ci linear = len - MAX_SKB_FRAGS * (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER); 293662306a36Sopenharmony_ci skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock, 293762306a36Sopenharmony_ci err, PAGE_ALLOC_COSTLY_ORDER); 293862306a36Sopenharmony_ci if (!skb) 293962306a36Sopenharmony_ci return NULL; 294062306a36Sopenharmony_ci 294162306a36Sopenharmony_ci skb_reserve(skb, reserve); 294262306a36Sopenharmony_ci skb_put(skb, linear); 294362306a36Sopenharmony_ci skb->data_len = len - linear; 294462306a36Sopenharmony_ci skb->len += len - linear; 294562306a36Sopenharmony_ci 294662306a36Sopenharmony_ci return skb; 294762306a36Sopenharmony_ci} 294862306a36Sopenharmony_ci 294962306a36Sopenharmony_cistatic int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) 295062306a36Sopenharmony_ci{ 295162306a36Sopenharmony_ci struct sock *sk = sock->sk; 295262306a36Sopenharmony_ci DECLARE_SOCKADDR(struct sockaddr_ll *, saddr, msg->msg_name); 295362306a36Sopenharmony_ci struct sk_buff *skb; 295462306a36Sopenharmony_ci struct net_device *dev; 295562306a36Sopenharmony_ci __be16 proto; 295662306a36Sopenharmony_ci unsigned char *addr = NULL; 295762306a36Sopenharmony_ci int err, reserve = 0; 295862306a36Sopenharmony_ci struct sockcm_cookie sockc; 295962306a36Sopenharmony_ci struct virtio_net_hdr vnet_hdr = { 0 }; 296062306a36Sopenharmony_ci int offset = 0; 296162306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 296262306a36Sopenharmony_ci int vnet_hdr_sz = READ_ONCE(po->vnet_hdr_sz); 296362306a36Sopenharmony_ci int hlen, tlen, linear; 296462306a36Sopenharmony_ci int extra_len = 0; 296562306a36Sopenharmony_ci 296662306a36Sopenharmony_ci /* 296762306a36Sopenharmony_ci * Get and verify the address. 296862306a36Sopenharmony_ci */ 296962306a36Sopenharmony_ci 297062306a36Sopenharmony_ci if (likely(saddr == NULL)) { 297162306a36Sopenharmony_ci dev = packet_cached_dev_get(po); 297262306a36Sopenharmony_ci proto = READ_ONCE(po->num); 297362306a36Sopenharmony_ci } else { 297462306a36Sopenharmony_ci err = -EINVAL; 297562306a36Sopenharmony_ci if (msg->msg_namelen < sizeof(struct sockaddr_ll)) 297662306a36Sopenharmony_ci goto out; 297762306a36Sopenharmony_ci if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr))) 297862306a36Sopenharmony_ci goto out; 297962306a36Sopenharmony_ci proto = saddr->sll_protocol; 298062306a36Sopenharmony_ci dev = dev_get_by_index(sock_net(sk), saddr->sll_ifindex); 298162306a36Sopenharmony_ci if (sock->type == SOCK_DGRAM) { 298262306a36Sopenharmony_ci if (dev && msg->msg_namelen < dev->addr_len + 298362306a36Sopenharmony_ci offsetof(struct sockaddr_ll, sll_addr)) 298462306a36Sopenharmony_ci goto out_unlock; 298562306a36Sopenharmony_ci addr = saddr->sll_addr; 298662306a36Sopenharmony_ci } 298762306a36Sopenharmony_ci } 298862306a36Sopenharmony_ci 298962306a36Sopenharmony_ci err = -ENXIO; 299062306a36Sopenharmony_ci if (unlikely(dev == NULL)) 299162306a36Sopenharmony_ci goto out_unlock; 299262306a36Sopenharmony_ci err = -ENETDOWN; 299362306a36Sopenharmony_ci if (unlikely(!(dev->flags & IFF_UP))) 299462306a36Sopenharmony_ci goto out_unlock; 299562306a36Sopenharmony_ci 299662306a36Sopenharmony_ci sockcm_init(&sockc, sk); 299762306a36Sopenharmony_ci sockc.mark = READ_ONCE(sk->sk_mark); 299862306a36Sopenharmony_ci if (msg->msg_controllen) { 299962306a36Sopenharmony_ci err = sock_cmsg_send(sk, msg, &sockc); 300062306a36Sopenharmony_ci if (unlikely(err)) 300162306a36Sopenharmony_ci goto out_unlock; 300262306a36Sopenharmony_ci } 300362306a36Sopenharmony_ci 300462306a36Sopenharmony_ci if (sock->type == SOCK_RAW) 300562306a36Sopenharmony_ci reserve = dev->hard_header_len; 300662306a36Sopenharmony_ci if (vnet_hdr_sz) { 300762306a36Sopenharmony_ci err = packet_snd_vnet_parse(msg, &len, &vnet_hdr, vnet_hdr_sz); 300862306a36Sopenharmony_ci if (err) 300962306a36Sopenharmony_ci goto out_unlock; 301062306a36Sopenharmony_ci } 301162306a36Sopenharmony_ci 301262306a36Sopenharmony_ci if (unlikely(sock_flag(sk, SOCK_NOFCS))) { 301362306a36Sopenharmony_ci if (!netif_supports_nofcs(dev)) { 301462306a36Sopenharmony_ci err = -EPROTONOSUPPORT; 301562306a36Sopenharmony_ci goto out_unlock; 301662306a36Sopenharmony_ci } 301762306a36Sopenharmony_ci extra_len = 4; /* We're doing our own CRC */ 301862306a36Sopenharmony_ci } 301962306a36Sopenharmony_ci 302062306a36Sopenharmony_ci err = -EMSGSIZE; 302162306a36Sopenharmony_ci if (!vnet_hdr.gso_type && 302262306a36Sopenharmony_ci (len > dev->mtu + reserve + VLAN_HLEN + extra_len)) 302362306a36Sopenharmony_ci goto out_unlock; 302462306a36Sopenharmony_ci 302562306a36Sopenharmony_ci err = -ENOBUFS; 302662306a36Sopenharmony_ci hlen = LL_RESERVED_SPACE(dev); 302762306a36Sopenharmony_ci tlen = dev->needed_tailroom; 302862306a36Sopenharmony_ci linear = __virtio16_to_cpu(vio_le(), vnet_hdr.hdr_len); 302962306a36Sopenharmony_ci linear = max(linear, min_t(int, len, dev->hard_header_len)); 303062306a36Sopenharmony_ci skb = packet_alloc_skb(sk, hlen + tlen, hlen, len, linear, 303162306a36Sopenharmony_ci msg->msg_flags & MSG_DONTWAIT, &err); 303262306a36Sopenharmony_ci if (skb == NULL) 303362306a36Sopenharmony_ci goto out_unlock; 303462306a36Sopenharmony_ci 303562306a36Sopenharmony_ci skb_reset_network_header(skb); 303662306a36Sopenharmony_ci 303762306a36Sopenharmony_ci err = -EINVAL; 303862306a36Sopenharmony_ci if (sock->type == SOCK_DGRAM) { 303962306a36Sopenharmony_ci offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); 304062306a36Sopenharmony_ci if (unlikely(offset < 0)) 304162306a36Sopenharmony_ci goto out_free; 304262306a36Sopenharmony_ci } else if (reserve) { 304362306a36Sopenharmony_ci skb_reserve(skb, -reserve); 304462306a36Sopenharmony_ci if (len < reserve + sizeof(struct ipv6hdr) && 304562306a36Sopenharmony_ci dev->min_header_len != dev->hard_header_len) 304662306a36Sopenharmony_ci skb_reset_network_header(skb); 304762306a36Sopenharmony_ci } 304862306a36Sopenharmony_ci 304962306a36Sopenharmony_ci /* Returns -EFAULT on error */ 305062306a36Sopenharmony_ci err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len); 305162306a36Sopenharmony_ci if (err) 305262306a36Sopenharmony_ci goto out_free; 305362306a36Sopenharmony_ci 305462306a36Sopenharmony_ci if ((sock->type == SOCK_RAW && 305562306a36Sopenharmony_ci !dev_validate_header(dev, skb->data, len)) || !skb->len) { 305662306a36Sopenharmony_ci err = -EINVAL; 305762306a36Sopenharmony_ci goto out_free; 305862306a36Sopenharmony_ci } 305962306a36Sopenharmony_ci 306062306a36Sopenharmony_ci skb_setup_tx_timestamp(skb, sockc.tsflags); 306162306a36Sopenharmony_ci 306262306a36Sopenharmony_ci if (!vnet_hdr.gso_type && (len > dev->mtu + reserve + extra_len) && 306362306a36Sopenharmony_ci !packet_extra_vlan_len_allowed(dev, skb)) { 306462306a36Sopenharmony_ci err = -EMSGSIZE; 306562306a36Sopenharmony_ci goto out_free; 306662306a36Sopenharmony_ci } 306762306a36Sopenharmony_ci 306862306a36Sopenharmony_ci skb->protocol = proto; 306962306a36Sopenharmony_ci skb->dev = dev; 307062306a36Sopenharmony_ci skb->priority = READ_ONCE(sk->sk_priority); 307162306a36Sopenharmony_ci skb->mark = sockc.mark; 307262306a36Sopenharmony_ci skb->tstamp = sockc.transmit_time; 307362306a36Sopenharmony_ci 307462306a36Sopenharmony_ci if (unlikely(extra_len == 4)) 307562306a36Sopenharmony_ci skb->no_fcs = 1; 307662306a36Sopenharmony_ci 307762306a36Sopenharmony_ci packet_parse_headers(skb, sock); 307862306a36Sopenharmony_ci 307962306a36Sopenharmony_ci if (vnet_hdr_sz) { 308062306a36Sopenharmony_ci err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); 308162306a36Sopenharmony_ci if (err) 308262306a36Sopenharmony_ci goto out_free; 308362306a36Sopenharmony_ci len += vnet_hdr_sz; 308462306a36Sopenharmony_ci virtio_net_hdr_set_proto(skb, &vnet_hdr); 308562306a36Sopenharmony_ci } 308662306a36Sopenharmony_ci 308762306a36Sopenharmony_ci err = packet_xmit(po, skb); 308862306a36Sopenharmony_ci 308962306a36Sopenharmony_ci if (unlikely(err != 0)) { 309062306a36Sopenharmony_ci if (err > 0) 309162306a36Sopenharmony_ci err = net_xmit_errno(err); 309262306a36Sopenharmony_ci if (err) 309362306a36Sopenharmony_ci goto out_unlock; 309462306a36Sopenharmony_ci } 309562306a36Sopenharmony_ci 309662306a36Sopenharmony_ci dev_put(dev); 309762306a36Sopenharmony_ci 309862306a36Sopenharmony_ci return len; 309962306a36Sopenharmony_ci 310062306a36Sopenharmony_ciout_free: 310162306a36Sopenharmony_ci kfree_skb(skb); 310262306a36Sopenharmony_ciout_unlock: 310362306a36Sopenharmony_ci dev_put(dev); 310462306a36Sopenharmony_ciout: 310562306a36Sopenharmony_ci return err; 310662306a36Sopenharmony_ci} 310762306a36Sopenharmony_ci 310862306a36Sopenharmony_cistatic int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) 310962306a36Sopenharmony_ci{ 311062306a36Sopenharmony_ci struct sock *sk = sock->sk; 311162306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 311262306a36Sopenharmony_ci 311362306a36Sopenharmony_ci /* Reading tx_ring.pg_vec without holding pg_vec_lock is racy. 311462306a36Sopenharmony_ci * tpacket_snd() will redo the check safely. 311562306a36Sopenharmony_ci */ 311662306a36Sopenharmony_ci if (data_race(po->tx_ring.pg_vec)) 311762306a36Sopenharmony_ci return tpacket_snd(po, msg); 311862306a36Sopenharmony_ci 311962306a36Sopenharmony_ci return packet_snd(sock, msg, len); 312062306a36Sopenharmony_ci} 312162306a36Sopenharmony_ci 312262306a36Sopenharmony_ci/* 312362306a36Sopenharmony_ci * Close a PACKET socket. This is fairly simple. We immediately go 312462306a36Sopenharmony_ci * to 'closed' state and remove our protocol entry in the device list. 312562306a36Sopenharmony_ci */ 312662306a36Sopenharmony_ci 312762306a36Sopenharmony_cistatic int packet_release(struct socket *sock) 312862306a36Sopenharmony_ci{ 312962306a36Sopenharmony_ci struct sock *sk = sock->sk; 313062306a36Sopenharmony_ci struct packet_sock *po; 313162306a36Sopenharmony_ci struct packet_fanout *f; 313262306a36Sopenharmony_ci struct net *net; 313362306a36Sopenharmony_ci union tpacket_req_u req_u; 313462306a36Sopenharmony_ci 313562306a36Sopenharmony_ci if (!sk) 313662306a36Sopenharmony_ci return 0; 313762306a36Sopenharmony_ci 313862306a36Sopenharmony_ci net = sock_net(sk); 313962306a36Sopenharmony_ci po = pkt_sk(sk); 314062306a36Sopenharmony_ci 314162306a36Sopenharmony_ci mutex_lock(&net->packet.sklist_lock); 314262306a36Sopenharmony_ci sk_del_node_init_rcu(sk); 314362306a36Sopenharmony_ci mutex_unlock(&net->packet.sklist_lock); 314462306a36Sopenharmony_ci 314562306a36Sopenharmony_ci sock_prot_inuse_add(net, sk->sk_prot, -1); 314662306a36Sopenharmony_ci 314762306a36Sopenharmony_ci spin_lock(&po->bind_lock); 314862306a36Sopenharmony_ci unregister_prot_hook(sk, false); 314962306a36Sopenharmony_ci packet_cached_dev_reset(po); 315062306a36Sopenharmony_ci 315162306a36Sopenharmony_ci if (po->prot_hook.dev) { 315262306a36Sopenharmony_ci netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); 315362306a36Sopenharmony_ci po->prot_hook.dev = NULL; 315462306a36Sopenharmony_ci } 315562306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 315662306a36Sopenharmony_ci 315762306a36Sopenharmony_ci packet_flush_mclist(sk); 315862306a36Sopenharmony_ci 315962306a36Sopenharmony_ci lock_sock(sk); 316062306a36Sopenharmony_ci if (po->rx_ring.pg_vec) { 316162306a36Sopenharmony_ci memset(&req_u, 0, sizeof(req_u)); 316262306a36Sopenharmony_ci packet_set_ring(sk, &req_u, 1, 0); 316362306a36Sopenharmony_ci } 316462306a36Sopenharmony_ci 316562306a36Sopenharmony_ci if (po->tx_ring.pg_vec) { 316662306a36Sopenharmony_ci memset(&req_u, 0, sizeof(req_u)); 316762306a36Sopenharmony_ci packet_set_ring(sk, &req_u, 1, 1); 316862306a36Sopenharmony_ci } 316962306a36Sopenharmony_ci release_sock(sk); 317062306a36Sopenharmony_ci 317162306a36Sopenharmony_ci f = fanout_release(sk); 317262306a36Sopenharmony_ci 317362306a36Sopenharmony_ci synchronize_net(); 317462306a36Sopenharmony_ci 317562306a36Sopenharmony_ci kfree(po->rollover); 317662306a36Sopenharmony_ci if (f) { 317762306a36Sopenharmony_ci fanout_release_data(f); 317862306a36Sopenharmony_ci kvfree(f); 317962306a36Sopenharmony_ci } 318062306a36Sopenharmony_ci /* 318162306a36Sopenharmony_ci * Now the socket is dead. No more input will appear. 318262306a36Sopenharmony_ci */ 318362306a36Sopenharmony_ci sock_orphan(sk); 318462306a36Sopenharmony_ci sock->sk = NULL; 318562306a36Sopenharmony_ci 318662306a36Sopenharmony_ci /* Purge queues */ 318762306a36Sopenharmony_ci 318862306a36Sopenharmony_ci skb_queue_purge(&sk->sk_receive_queue); 318962306a36Sopenharmony_ci packet_free_pending(po); 319062306a36Sopenharmony_ci 319162306a36Sopenharmony_ci sock_put(sk); 319262306a36Sopenharmony_ci return 0; 319362306a36Sopenharmony_ci} 319462306a36Sopenharmony_ci 319562306a36Sopenharmony_ci/* 319662306a36Sopenharmony_ci * Attach a packet hook. 319762306a36Sopenharmony_ci */ 319862306a36Sopenharmony_ci 319962306a36Sopenharmony_cistatic int packet_do_bind(struct sock *sk, const char *name, int ifindex, 320062306a36Sopenharmony_ci __be16 proto) 320162306a36Sopenharmony_ci{ 320262306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 320362306a36Sopenharmony_ci struct net_device *dev = NULL; 320462306a36Sopenharmony_ci bool unlisted = false; 320562306a36Sopenharmony_ci bool need_rehook; 320662306a36Sopenharmony_ci int ret = 0; 320762306a36Sopenharmony_ci 320862306a36Sopenharmony_ci lock_sock(sk); 320962306a36Sopenharmony_ci spin_lock(&po->bind_lock); 321062306a36Sopenharmony_ci if (!proto) 321162306a36Sopenharmony_ci proto = po->num; 321262306a36Sopenharmony_ci 321362306a36Sopenharmony_ci rcu_read_lock(); 321462306a36Sopenharmony_ci 321562306a36Sopenharmony_ci if (po->fanout) { 321662306a36Sopenharmony_ci ret = -EINVAL; 321762306a36Sopenharmony_ci goto out_unlock; 321862306a36Sopenharmony_ci } 321962306a36Sopenharmony_ci 322062306a36Sopenharmony_ci if (name) { 322162306a36Sopenharmony_ci dev = dev_get_by_name_rcu(sock_net(sk), name); 322262306a36Sopenharmony_ci if (!dev) { 322362306a36Sopenharmony_ci ret = -ENODEV; 322462306a36Sopenharmony_ci goto out_unlock; 322562306a36Sopenharmony_ci } 322662306a36Sopenharmony_ci } else if (ifindex) { 322762306a36Sopenharmony_ci dev = dev_get_by_index_rcu(sock_net(sk), ifindex); 322862306a36Sopenharmony_ci if (!dev) { 322962306a36Sopenharmony_ci ret = -ENODEV; 323062306a36Sopenharmony_ci goto out_unlock; 323162306a36Sopenharmony_ci } 323262306a36Sopenharmony_ci } 323362306a36Sopenharmony_ci 323462306a36Sopenharmony_ci need_rehook = po->prot_hook.type != proto || po->prot_hook.dev != dev; 323562306a36Sopenharmony_ci 323662306a36Sopenharmony_ci if (need_rehook) { 323762306a36Sopenharmony_ci dev_hold(dev); 323862306a36Sopenharmony_ci if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { 323962306a36Sopenharmony_ci rcu_read_unlock(); 324062306a36Sopenharmony_ci /* prevents packet_notifier() from calling 324162306a36Sopenharmony_ci * register_prot_hook() 324262306a36Sopenharmony_ci */ 324362306a36Sopenharmony_ci WRITE_ONCE(po->num, 0); 324462306a36Sopenharmony_ci __unregister_prot_hook(sk, true); 324562306a36Sopenharmony_ci rcu_read_lock(); 324662306a36Sopenharmony_ci if (dev) 324762306a36Sopenharmony_ci unlisted = !dev_get_by_index_rcu(sock_net(sk), 324862306a36Sopenharmony_ci dev->ifindex); 324962306a36Sopenharmony_ci } 325062306a36Sopenharmony_ci 325162306a36Sopenharmony_ci BUG_ON(packet_sock_flag(po, PACKET_SOCK_RUNNING)); 325262306a36Sopenharmony_ci WRITE_ONCE(po->num, proto); 325362306a36Sopenharmony_ci po->prot_hook.type = proto; 325462306a36Sopenharmony_ci 325562306a36Sopenharmony_ci netdev_put(po->prot_hook.dev, &po->prot_hook.dev_tracker); 325662306a36Sopenharmony_ci 325762306a36Sopenharmony_ci if (unlikely(unlisted)) { 325862306a36Sopenharmony_ci po->prot_hook.dev = NULL; 325962306a36Sopenharmony_ci WRITE_ONCE(po->ifindex, -1); 326062306a36Sopenharmony_ci packet_cached_dev_reset(po); 326162306a36Sopenharmony_ci } else { 326262306a36Sopenharmony_ci netdev_hold(dev, &po->prot_hook.dev_tracker, 326362306a36Sopenharmony_ci GFP_ATOMIC); 326462306a36Sopenharmony_ci po->prot_hook.dev = dev; 326562306a36Sopenharmony_ci WRITE_ONCE(po->ifindex, dev ? dev->ifindex : 0); 326662306a36Sopenharmony_ci packet_cached_dev_assign(po, dev); 326762306a36Sopenharmony_ci } 326862306a36Sopenharmony_ci dev_put(dev); 326962306a36Sopenharmony_ci } 327062306a36Sopenharmony_ci 327162306a36Sopenharmony_ci if (proto == 0 || !need_rehook) 327262306a36Sopenharmony_ci goto out_unlock; 327362306a36Sopenharmony_ci 327462306a36Sopenharmony_ci if (!unlisted && (!dev || (dev->flags & IFF_UP))) { 327562306a36Sopenharmony_ci register_prot_hook(sk); 327662306a36Sopenharmony_ci } else { 327762306a36Sopenharmony_ci sk->sk_err = ENETDOWN; 327862306a36Sopenharmony_ci if (!sock_flag(sk, SOCK_DEAD)) 327962306a36Sopenharmony_ci sk_error_report(sk); 328062306a36Sopenharmony_ci } 328162306a36Sopenharmony_ci 328262306a36Sopenharmony_ciout_unlock: 328362306a36Sopenharmony_ci rcu_read_unlock(); 328462306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 328562306a36Sopenharmony_ci release_sock(sk); 328662306a36Sopenharmony_ci return ret; 328762306a36Sopenharmony_ci} 328862306a36Sopenharmony_ci 328962306a36Sopenharmony_ci/* 329062306a36Sopenharmony_ci * Bind a packet socket to a device 329162306a36Sopenharmony_ci */ 329262306a36Sopenharmony_ci 329362306a36Sopenharmony_cistatic int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, 329462306a36Sopenharmony_ci int addr_len) 329562306a36Sopenharmony_ci{ 329662306a36Sopenharmony_ci struct sock *sk = sock->sk; 329762306a36Sopenharmony_ci char name[sizeof(uaddr->sa_data_min) + 1]; 329862306a36Sopenharmony_ci 329962306a36Sopenharmony_ci /* 330062306a36Sopenharmony_ci * Check legality 330162306a36Sopenharmony_ci */ 330262306a36Sopenharmony_ci 330362306a36Sopenharmony_ci if (addr_len != sizeof(struct sockaddr)) 330462306a36Sopenharmony_ci return -EINVAL; 330562306a36Sopenharmony_ci /* uaddr->sa_data comes from the userspace, it's not guaranteed to be 330662306a36Sopenharmony_ci * zero-terminated. 330762306a36Sopenharmony_ci */ 330862306a36Sopenharmony_ci memcpy(name, uaddr->sa_data, sizeof(uaddr->sa_data_min)); 330962306a36Sopenharmony_ci name[sizeof(uaddr->sa_data_min)] = 0; 331062306a36Sopenharmony_ci 331162306a36Sopenharmony_ci return packet_do_bind(sk, name, 0, 0); 331262306a36Sopenharmony_ci} 331362306a36Sopenharmony_ci 331462306a36Sopenharmony_cistatic int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) 331562306a36Sopenharmony_ci{ 331662306a36Sopenharmony_ci struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr; 331762306a36Sopenharmony_ci struct sock *sk = sock->sk; 331862306a36Sopenharmony_ci 331962306a36Sopenharmony_ci /* 332062306a36Sopenharmony_ci * Check legality 332162306a36Sopenharmony_ci */ 332262306a36Sopenharmony_ci 332362306a36Sopenharmony_ci if (addr_len < sizeof(struct sockaddr_ll)) 332462306a36Sopenharmony_ci return -EINVAL; 332562306a36Sopenharmony_ci if (sll->sll_family != AF_PACKET) 332662306a36Sopenharmony_ci return -EINVAL; 332762306a36Sopenharmony_ci 332862306a36Sopenharmony_ci return packet_do_bind(sk, NULL, sll->sll_ifindex, sll->sll_protocol); 332962306a36Sopenharmony_ci} 333062306a36Sopenharmony_ci 333162306a36Sopenharmony_cistatic struct proto packet_proto = { 333262306a36Sopenharmony_ci .name = "PACKET", 333362306a36Sopenharmony_ci .owner = THIS_MODULE, 333462306a36Sopenharmony_ci .obj_size = sizeof(struct packet_sock), 333562306a36Sopenharmony_ci}; 333662306a36Sopenharmony_ci 333762306a36Sopenharmony_ci/* 333862306a36Sopenharmony_ci * Create a packet of type SOCK_PACKET. 333962306a36Sopenharmony_ci */ 334062306a36Sopenharmony_ci 334162306a36Sopenharmony_cistatic int packet_create(struct net *net, struct socket *sock, int protocol, 334262306a36Sopenharmony_ci int kern) 334362306a36Sopenharmony_ci{ 334462306a36Sopenharmony_ci struct sock *sk; 334562306a36Sopenharmony_ci struct packet_sock *po; 334662306a36Sopenharmony_ci __be16 proto = (__force __be16)protocol; /* weird, but documented */ 334762306a36Sopenharmony_ci int err; 334862306a36Sopenharmony_ci 334962306a36Sopenharmony_ci if (!ns_capable(net->user_ns, CAP_NET_RAW)) 335062306a36Sopenharmony_ci return -EPERM; 335162306a36Sopenharmony_ci if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW && 335262306a36Sopenharmony_ci sock->type != SOCK_PACKET) 335362306a36Sopenharmony_ci return -ESOCKTNOSUPPORT; 335462306a36Sopenharmony_ci 335562306a36Sopenharmony_ci sock->state = SS_UNCONNECTED; 335662306a36Sopenharmony_ci 335762306a36Sopenharmony_ci err = -ENOBUFS; 335862306a36Sopenharmony_ci sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); 335962306a36Sopenharmony_ci if (sk == NULL) 336062306a36Sopenharmony_ci goto out; 336162306a36Sopenharmony_ci 336262306a36Sopenharmony_ci sock->ops = &packet_ops; 336362306a36Sopenharmony_ci if (sock->type == SOCK_PACKET) 336462306a36Sopenharmony_ci sock->ops = &packet_ops_spkt; 336562306a36Sopenharmony_ci 336662306a36Sopenharmony_ci sock_init_data(sock, sk); 336762306a36Sopenharmony_ci 336862306a36Sopenharmony_ci po = pkt_sk(sk); 336962306a36Sopenharmony_ci init_completion(&po->skb_completion); 337062306a36Sopenharmony_ci sk->sk_family = PF_PACKET; 337162306a36Sopenharmony_ci po->num = proto; 337262306a36Sopenharmony_ci 337362306a36Sopenharmony_ci err = packet_alloc_pending(po); 337462306a36Sopenharmony_ci if (err) 337562306a36Sopenharmony_ci goto out2; 337662306a36Sopenharmony_ci 337762306a36Sopenharmony_ci packet_cached_dev_reset(po); 337862306a36Sopenharmony_ci 337962306a36Sopenharmony_ci sk->sk_destruct = packet_sock_destruct; 338062306a36Sopenharmony_ci 338162306a36Sopenharmony_ci /* 338262306a36Sopenharmony_ci * Attach a protocol block 338362306a36Sopenharmony_ci */ 338462306a36Sopenharmony_ci 338562306a36Sopenharmony_ci spin_lock_init(&po->bind_lock); 338662306a36Sopenharmony_ci mutex_init(&po->pg_vec_lock); 338762306a36Sopenharmony_ci po->rollover = NULL; 338862306a36Sopenharmony_ci po->prot_hook.func = packet_rcv; 338962306a36Sopenharmony_ci 339062306a36Sopenharmony_ci if (sock->type == SOCK_PACKET) 339162306a36Sopenharmony_ci po->prot_hook.func = packet_rcv_spkt; 339262306a36Sopenharmony_ci 339362306a36Sopenharmony_ci po->prot_hook.af_packet_priv = sk; 339462306a36Sopenharmony_ci po->prot_hook.af_packet_net = sock_net(sk); 339562306a36Sopenharmony_ci 339662306a36Sopenharmony_ci if (proto) { 339762306a36Sopenharmony_ci po->prot_hook.type = proto; 339862306a36Sopenharmony_ci __register_prot_hook(sk); 339962306a36Sopenharmony_ci } 340062306a36Sopenharmony_ci 340162306a36Sopenharmony_ci mutex_lock(&net->packet.sklist_lock); 340262306a36Sopenharmony_ci sk_add_node_tail_rcu(sk, &net->packet.sklist); 340362306a36Sopenharmony_ci mutex_unlock(&net->packet.sklist_lock); 340462306a36Sopenharmony_ci 340562306a36Sopenharmony_ci sock_prot_inuse_add(net, &packet_proto, 1); 340662306a36Sopenharmony_ci 340762306a36Sopenharmony_ci return 0; 340862306a36Sopenharmony_ciout2: 340962306a36Sopenharmony_ci sk_free(sk); 341062306a36Sopenharmony_ciout: 341162306a36Sopenharmony_ci return err; 341262306a36Sopenharmony_ci} 341362306a36Sopenharmony_ci 341462306a36Sopenharmony_ci/* 341562306a36Sopenharmony_ci * Pull a packet from our receive queue and hand it to the user. 341662306a36Sopenharmony_ci * If necessary we block. 341762306a36Sopenharmony_ci */ 341862306a36Sopenharmony_ci 341962306a36Sopenharmony_cistatic int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, 342062306a36Sopenharmony_ci int flags) 342162306a36Sopenharmony_ci{ 342262306a36Sopenharmony_ci struct sock *sk = sock->sk; 342362306a36Sopenharmony_ci struct sk_buff *skb; 342462306a36Sopenharmony_ci int copied, err; 342562306a36Sopenharmony_ci int vnet_hdr_len = READ_ONCE(pkt_sk(sk)->vnet_hdr_sz); 342662306a36Sopenharmony_ci unsigned int origlen = 0; 342762306a36Sopenharmony_ci 342862306a36Sopenharmony_ci err = -EINVAL; 342962306a36Sopenharmony_ci if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE)) 343062306a36Sopenharmony_ci goto out; 343162306a36Sopenharmony_ci 343262306a36Sopenharmony_ci#if 0 343362306a36Sopenharmony_ci /* What error should we return now? EUNATTACH? */ 343462306a36Sopenharmony_ci if (pkt_sk(sk)->ifindex < 0) 343562306a36Sopenharmony_ci return -ENODEV; 343662306a36Sopenharmony_ci#endif 343762306a36Sopenharmony_ci 343862306a36Sopenharmony_ci if (flags & MSG_ERRQUEUE) { 343962306a36Sopenharmony_ci err = sock_recv_errqueue(sk, msg, len, 344062306a36Sopenharmony_ci SOL_PACKET, PACKET_TX_TIMESTAMP); 344162306a36Sopenharmony_ci goto out; 344262306a36Sopenharmony_ci } 344362306a36Sopenharmony_ci 344462306a36Sopenharmony_ci /* 344562306a36Sopenharmony_ci * Call the generic datagram receiver. This handles all sorts 344662306a36Sopenharmony_ci * of horrible races and re-entrancy so we can forget about it 344762306a36Sopenharmony_ci * in the protocol layers. 344862306a36Sopenharmony_ci * 344962306a36Sopenharmony_ci * Now it will return ENETDOWN, if device have just gone down, 345062306a36Sopenharmony_ci * but then it will block. 345162306a36Sopenharmony_ci */ 345262306a36Sopenharmony_ci 345362306a36Sopenharmony_ci skb = skb_recv_datagram(sk, flags, &err); 345462306a36Sopenharmony_ci 345562306a36Sopenharmony_ci /* 345662306a36Sopenharmony_ci * An error occurred so return it. Because skb_recv_datagram() 345762306a36Sopenharmony_ci * handles the blocking we don't see and worry about blocking 345862306a36Sopenharmony_ci * retries. 345962306a36Sopenharmony_ci */ 346062306a36Sopenharmony_ci 346162306a36Sopenharmony_ci if (skb == NULL) 346262306a36Sopenharmony_ci goto out; 346362306a36Sopenharmony_ci 346462306a36Sopenharmony_ci packet_rcv_try_clear_pressure(pkt_sk(sk)); 346562306a36Sopenharmony_ci 346662306a36Sopenharmony_ci if (vnet_hdr_len) { 346762306a36Sopenharmony_ci err = packet_rcv_vnet(msg, skb, &len, vnet_hdr_len); 346862306a36Sopenharmony_ci if (err) 346962306a36Sopenharmony_ci goto out_free; 347062306a36Sopenharmony_ci } 347162306a36Sopenharmony_ci 347262306a36Sopenharmony_ci /* You lose any data beyond the buffer you gave. If it worries 347362306a36Sopenharmony_ci * a user program they can ask the device for its MTU 347462306a36Sopenharmony_ci * anyway. 347562306a36Sopenharmony_ci */ 347662306a36Sopenharmony_ci copied = skb->len; 347762306a36Sopenharmony_ci if (copied > len) { 347862306a36Sopenharmony_ci copied = len; 347962306a36Sopenharmony_ci msg->msg_flags |= MSG_TRUNC; 348062306a36Sopenharmony_ci } 348162306a36Sopenharmony_ci 348262306a36Sopenharmony_ci err = skb_copy_datagram_msg(skb, 0, msg, copied); 348362306a36Sopenharmony_ci if (err) 348462306a36Sopenharmony_ci goto out_free; 348562306a36Sopenharmony_ci 348662306a36Sopenharmony_ci if (sock->type != SOCK_PACKET) { 348762306a36Sopenharmony_ci struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; 348862306a36Sopenharmony_ci 348962306a36Sopenharmony_ci /* Original length was stored in sockaddr_ll fields */ 349062306a36Sopenharmony_ci origlen = PACKET_SKB_CB(skb)->sa.origlen; 349162306a36Sopenharmony_ci sll->sll_family = AF_PACKET; 349262306a36Sopenharmony_ci sll->sll_protocol = skb->protocol; 349362306a36Sopenharmony_ci } 349462306a36Sopenharmony_ci 349562306a36Sopenharmony_ci sock_recv_cmsgs(msg, sk, skb); 349662306a36Sopenharmony_ci 349762306a36Sopenharmony_ci if (msg->msg_name) { 349862306a36Sopenharmony_ci const size_t max_len = min(sizeof(skb->cb), 349962306a36Sopenharmony_ci sizeof(struct sockaddr_storage)); 350062306a36Sopenharmony_ci int copy_len; 350162306a36Sopenharmony_ci 350262306a36Sopenharmony_ci /* If the address length field is there to be filled 350362306a36Sopenharmony_ci * in, we fill it in now. 350462306a36Sopenharmony_ci */ 350562306a36Sopenharmony_ci if (sock->type == SOCK_PACKET) { 350662306a36Sopenharmony_ci __sockaddr_check_size(sizeof(struct sockaddr_pkt)); 350762306a36Sopenharmony_ci msg->msg_namelen = sizeof(struct sockaddr_pkt); 350862306a36Sopenharmony_ci copy_len = msg->msg_namelen; 350962306a36Sopenharmony_ci } else { 351062306a36Sopenharmony_ci struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll; 351162306a36Sopenharmony_ci 351262306a36Sopenharmony_ci msg->msg_namelen = sll->sll_halen + 351362306a36Sopenharmony_ci offsetof(struct sockaddr_ll, sll_addr); 351462306a36Sopenharmony_ci copy_len = msg->msg_namelen; 351562306a36Sopenharmony_ci if (msg->msg_namelen < sizeof(struct sockaddr_ll)) { 351662306a36Sopenharmony_ci memset(msg->msg_name + 351762306a36Sopenharmony_ci offsetof(struct sockaddr_ll, sll_addr), 351862306a36Sopenharmony_ci 0, sizeof(sll->sll_addr)); 351962306a36Sopenharmony_ci msg->msg_namelen = sizeof(struct sockaddr_ll); 352062306a36Sopenharmony_ci } 352162306a36Sopenharmony_ci } 352262306a36Sopenharmony_ci if (WARN_ON_ONCE(copy_len > max_len)) { 352362306a36Sopenharmony_ci copy_len = max_len; 352462306a36Sopenharmony_ci msg->msg_namelen = copy_len; 352562306a36Sopenharmony_ci } 352662306a36Sopenharmony_ci memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len); 352762306a36Sopenharmony_ci } 352862306a36Sopenharmony_ci 352962306a36Sopenharmony_ci if (packet_sock_flag(pkt_sk(sk), PACKET_SOCK_AUXDATA)) { 353062306a36Sopenharmony_ci struct tpacket_auxdata aux; 353162306a36Sopenharmony_ci 353262306a36Sopenharmony_ci aux.tp_status = TP_STATUS_USER; 353362306a36Sopenharmony_ci if (skb->ip_summed == CHECKSUM_PARTIAL) 353462306a36Sopenharmony_ci aux.tp_status |= TP_STATUS_CSUMNOTREADY; 353562306a36Sopenharmony_ci else if (skb->pkt_type != PACKET_OUTGOING && 353662306a36Sopenharmony_ci skb_csum_unnecessary(skb)) 353762306a36Sopenharmony_ci aux.tp_status |= TP_STATUS_CSUM_VALID; 353862306a36Sopenharmony_ci if (skb_is_gso(skb) && skb_is_gso_tcp(skb)) 353962306a36Sopenharmony_ci aux.tp_status |= TP_STATUS_GSO_TCP; 354062306a36Sopenharmony_ci 354162306a36Sopenharmony_ci aux.tp_len = origlen; 354262306a36Sopenharmony_ci aux.tp_snaplen = skb->len; 354362306a36Sopenharmony_ci aux.tp_mac = 0; 354462306a36Sopenharmony_ci aux.tp_net = skb_network_offset(skb); 354562306a36Sopenharmony_ci if (skb_vlan_tag_present(skb)) { 354662306a36Sopenharmony_ci aux.tp_vlan_tci = skb_vlan_tag_get(skb); 354762306a36Sopenharmony_ci aux.tp_vlan_tpid = ntohs(skb->vlan_proto); 354862306a36Sopenharmony_ci aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID; 354962306a36Sopenharmony_ci } else { 355062306a36Sopenharmony_ci aux.tp_vlan_tci = 0; 355162306a36Sopenharmony_ci aux.tp_vlan_tpid = 0; 355262306a36Sopenharmony_ci } 355362306a36Sopenharmony_ci put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux); 355462306a36Sopenharmony_ci } 355562306a36Sopenharmony_ci 355662306a36Sopenharmony_ci /* 355762306a36Sopenharmony_ci * Free or return the buffer as appropriate. Again this 355862306a36Sopenharmony_ci * hides all the races and re-entrancy issues from us. 355962306a36Sopenharmony_ci */ 356062306a36Sopenharmony_ci err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied); 356162306a36Sopenharmony_ci 356262306a36Sopenharmony_ciout_free: 356362306a36Sopenharmony_ci skb_free_datagram(sk, skb); 356462306a36Sopenharmony_ciout: 356562306a36Sopenharmony_ci return err; 356662306a36Sopenharmony_ci} 356762306a36Sopenharmony_ci 356862306a36Sopenharmony_cistatic int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr, 356962306a36Sopenharmony_ci int peer) 357062306a36Sopenharmony_ci{ 357162306a36Sopenharmony_ci struct net_device *dev; 357262306a36Sopenharmony_ci struct sock *sk = sock->sk; 357362306a36Sopenharmony_ci 357462306a36Sopenharmony_ci if (peer) 357562306a36Sopenharmony_ci return -EOPNOTSUPP; 357662306a36Sopenharmony_ci 357762306a36Sopenharmony_ci uaddr->sa_family = AF_PACKET; 357862306a36Sopenharmony_ci memset(uaddr->sa_data, 0, sizeof(uaddr->sa_data_min)); 357962306a36Sopenharmony_ci rcu_read_lock(); 358062306a36Sopenharmony_ci dev = dev_get_by_index_rcu(sock_net(sk), READ_ONCE(pkt_sk(sk)->ifindex)); 358162306a36Sopenharmony_ci if (dev) 358262306a36Sopenharmony_ci strscpy(uaddr->sa_data, dev->name, sizeof(uaddr->sa_data_min)); 358362306a36Sopenharmony_ci rcu_read_unlock(); 358462306a36Sopenharmony_ci 358562306a36Sopenharmony_ci return sizeof(*uaddr); 358662306a36Sopenharmony_ci} 358762306a36Sopenharmony_ci 358862306a36Sopenharmony_cistatic int packet_getname(struct socket *sock, struct sockaddr *uaddr, 358962306a36Sopenharmony_ci int peer) 359062306a36Sopenharmony_ci{ 359162306a36Sopenharmony_ci struct net_device *dev; 359262306a36Sopenharmony_ci struct sock *sk = sock->sk; 359362306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 359462306a36Sopenharmony_ci DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr); 359562306a36Sopenharmony_ci int ifindex; 359662306a36Sopenharmony_ci 359762306a36Sopenharmony_ci if (peer) 359862306a36Sopenharmony_ci return -EOPNOTSUPP; 359962306a36Sopenharmony_ci 360062306a36Sopenharmony_ci ifindex = READ_ONCE(po->ifindex); 360162306a36Sopenharmony_ci sll->sll_family = AF_PACKET; 360262306a36Sopenharmony_ci sll->sll_ifindex = ifindex; 360362306a36Sopenharmony_ci sll->sll_protocol = READ_ONCE(po->num); 360462306a36Sopenharmony_ci sll->sll_pkttype = 0; 360562306a36Sopenharmony_ci rcu_read_lock(); 360662306a36Sopenharmony_ci dev = dev_get_by_index_rcu(sock_net(sk), ifindex); 360762306a36Sopenharmony_ci if (dev) { 360862306a36Sopenharmony_ci sll->sll_hatype = dev->type; 360962306a36Sopenharmony_ci sll->sll_halen = dev->addr_len; 361062306a36Sopenharmony_ci 361162306a36Sopenharmony_ci /* Let __fortify_memcpy_chk() know the actual buffer size. */ 361262306a36Sopenharmony_ci memcpy(((struct sockaddr_storage *)sll)->__data + 361362306a36Sopenharmony_ci offsetof(struct sockaddr_ll, sll_addr) - 361462306a36Sopenharmony_ci offsetofend(struct sockaddr_ll, sll_family), 361562306a36Sopenharmony_ci dev->dev_addr, dev->addr_len); 361662306a36Sopenharmony_ci } else { 361762306a36Sopenharmony_ci sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */ 361862306a36Sopenharmony_ci sll->sll_halen = 0; 361962306a36Sopenharmony_ci } 362062306a36Sopenharmony_ci rcu_read_unlock(); 362162306a36Sopenharmony_ci 362262306a36Sopenharmony_ci return offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen; 362362306a36Sopenharmony_ci} 362462306a36Sopenharmony_ci 362562306a36Sopenharmony_cistatic int packet_dev_mc(struct net_device *dev, struct packet_mclist *i, 362662306a36Sopenharmony_ci int what) 362762306a36Sopenharmony_ci{ 362862306a36Sopenharmony_ci switch (i->type) { 362962306a36Sopenharmony_ci case PACKET_MR_MULTICAST: 363062306a36Sopenharmony_ci if (i->alen != dev->addr_len) 363162306a36Sopenharmony_ci return -EINVAL; 363262306a36Sopenharmony_ci if (what > 0) 363362306a36Sopenharmony_ci return dev_mc_add(dev, i->addr); 363462306a36Sopenharmony_ci else 363562306a36Sopenharmony_ci return dev_mc_del(dev, i->addr); 363662306a36Sopenharmony_ci break; 363762306a36Sopenharmony_ci case PACKET_MR_PROMISC: 363862306a36Sopenharmony_ci return dev_set_promiscuity(dev, what); 363962306a36Sopenharmony_ci case PACKET_MR_ALLMULTI: 364062306a36Sopenharmony_ci return dev_set_allmulti(dev, what); 364162306a36Sopenharmony_ci case PACKET_MR_UNICAST: 364262306a36Sopenharmony_ci if (i->alen != dev->addr_len) 364362306a36Sopenharmony_ci return -EINVAL; 364462306a36Sopenharmony_ci if (what > 0) 364562306a36Sopenharmony_ci return dev_uc_add(dev, i->addr); 364662306a36Sopenharmony_ci else 364762306a36Sopenharmony_ci return dev_uc_del(dev, i->addr); 364862306a36Sopenharmony_ci break; 364962306a36Sopenharmony_ci default: 365062306a36Sopenharmony_ci break; 365162306a36Sopenharmony_ci } 365262306a36Sopenharmony_ci return 0; 365362306a36Sopenharmony_ci} 365462306a36Sopenharmony_ci 365562306a36Sopenharmony_cistatic void packet_dev_mclist_delete(struct net_device *dev, 365662306a36Sopenharmony_ci struct packet_mclist **mlp) 365762306a36Sopenharmony_ci{ 365862306a36Sopenharmony_ci struct packet_mclist *ml; 365962306a36Sopenharmony_ci 366062306a36Sopenharmony_ci while ((ml = *mlp) != NULL) { 366162306a36Sopenharmony_ci if (ml->ifindex == dev->ifindex) { 366262306a36Sopenharmony_ci packet_dev_mc(dev, ml, -1); 366362306a36Sopenharmony_ci *mlp = ml->next; 366462306a36Sopenharmony_ci kfree(ml); 366562306a36Sopenharmony_ci } else 366662306a36Sopenharmony_ci mlp = &ml->next; 366762306a36Sopenharmony_ci } 366862306a36Sopenharmony_ci} 366962306a36Sopenharmony_ci 367062306a36Sopenharmony_cistatic int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq) 367162306a36Sopenharmony_ci{ 367262306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 367362306a36Sopenharmony_ci struct packet_mclist *ml, *i; 367462306a36Sopenharmony_ci struct net_device *dev; 367562306a36Sopenharmony_ci int err; 367662306a36Sopenharmony_ci 367762306a36Sopenharmony_ci rtnl_lock(); 367862306a36Sopenharmony_ci 367962306a36Sopenharmony_ci err = -ENODEV; 368062306a36Sopenharmony_ci dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex); 368162306a36Sopenharmony_ci if (!dev) 368262306a36Sopenharmony_ci goto done; 368362306a36Sopenharmony_ci 368462306a36Sopenharmony_ci err = -EINVAL; 368562306a36Sopenharmony_ci if (mreq->mr_alen > dev->addr_len) 368662306a36Sopenharmony_ci goto done; 368762306a36Sopenharmony_ci 368862306a36Sopenharmony_ci err = -ENOBUFS; 368962306a36Sopenharmony_ci i = kmalloc(sizeof(*i), GFP_KERNEL); 369062306a36Sopenharmony_ci if (i == NULL) 369162306a36Sopenharmony_ci goto done; 369262306a36Sopenharmony_ci 369362306a36Sopenharmony_ci err = 0; 369462306a36Sopenharmony_ci for (ml = po->mclist; ml; ml = ml->next) { 369562306a36Sopenharmony_ci if (ml->ifindex == mreq->mr_ifindex && 369662306a36Sopenharmony_ci ml->type == mreq->mr_type && 369762306a36Sopenharmony_ci ml->alen == mreq->mr_alen && 369862306a36Sopenharmony_ci memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { 369962306a36Sopenharmony_ci ml->count++; 370062306a36Sopenharmony_ci /* Free the new element ... */ 370162306a36Sopenharmony_ci kfree(i); 370262306a36Sopenharmony_ci goto done; 370362306a36Sopenharmony_ci } 370462306a36Sopenharmony_ci } 370562306a36Sopenharmony_ci 370662306a36Sopenharmony_ci i->type = mreq->mr_type; 370762306a36Sopenharmony_ci i->ifindex = mreq->mr_ifindex; 370862306a36Sopenharmony_ci i->alen = mreq->mr_alen; 370962306a36Sopenharmony_ci memcpy(i->addr, mreq->mr_address, i->alen); 371062306a36Sopenharmony_ci memset(i->addr + i->alen, 0, sizeof(i->addr) - i->alen); 371162306a36Sopenharmony_ci i->count = 1; 371262306a36Sopenharmony_ci i->next = po->mclist; 371362306a36Sopenharmony_ci po->mclist = i; 371462306a36Sopenharmony_ci err = packet_dev_mc(dev, i, 1); 371562306a36Sopenharmony_ci if (err) { 371662306a36Sopenharmony_ci po->mclist = i->next; 371762306a36Sopenharmony_ci kfree(i); 371862306a36Sopenharmony_ci } 371962306a36Sopenharmony_ci 372062306a36Sopenharmony_cidone: 372162306a36Sopenharmony_ci rtnl_unlock(); 372262306a36Sopenharmony_ci return err; 372362306a36Sopenharmony_ci} 372462306a36Sopenharmony_ci 372562306a36Sopenharmony_cistatic int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq) 372662306a36Sopenharmony_ci{ 372762306a36Sopenharmony_ci struct packet_mclist *ml, **mlp; 372862306a36Sopenharmony_ci 372962306a36Sopenharmony_ci rtnl_lock(); 373062306a36Sopenharmony_ci 373162306a36Sopenharmony_ci for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) { 373262306a36Sopenharmony_ci if (ml->ifindex == mreq->mr_ifindex && 373362306a36Sopenharmony_ci ml->type == mreq->mr_type && 373462306a36Sopenharmony_ci ml->alen == mreq->mr_alen && 373562306a36Sopenharmony_ci memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) { 373662306a36Sopenharmony_ci if (--ml->count == 0) { 373762306a36Sopenharmony_ci struct net_device *dev; 373862306a36Sopenharmony_ci *mlp = ml->next; 373962306a36Sopenharmony_ci dev = __dev_get_by_index(sock_net(sk), ml->ifindex); 374062306a36Sopenharmony_ci if (dev) 374162306a36Sopenharmony_ci packet_dev_mc(dev, ml, -1); 374262306a36Sopenharmony_ci kfree(ml); 374362306a36Sopenharmony_ci } 374462306a36Sopenharmony_ci break; 374562306a36Sopenharmony_ci } 374662306a36Sopenharmony_ci } 374762306a36Sopenharmony_ci rtnl_unlock(); 374862306a36Sopenharmony_ci return 0; 374962306a36Sopenharmony_ci} 375062306a36Sopenharmony_ci 375162306a36Sopenharmony_cistatic void packet_flush_mclist(struct sock *sk) 375262306a36Sopenharmony_ci{ 375362306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 375462306a36Sopenharmony_ci struct packet_mclist *ml; 375562306a36Sopenharmony_ci 375662306a36Sopenharmony_ci if (!po->mclist) 375762306a36Sopenharmony_ci return; 375862306a36Sopenharmony_ci 375962306a36Sopenharmony_ci rtnl_lock(); 376062306a36Sopenharmony_ci while ((ml = po->mclist) != NULL) { 376162306a36Sopenharmony_ci struct net_device *dev; 376262306a36Sopenharmony_ci 376362306a36Sopenharmony_ci po->mclist = ml->next; 376462306a36Sopenharmony_ci dev = __dev_get_by_index(sock_net(sk), ml->ifindex); 376562306a36Sopenharmony_ci if (dev != NULL) 376662306a36Sopenharmony_ci packet_dev_mc(dev, ml, -1); 376762306a36Sopenharmony_ci kfree(ml); 376862306a36Sopenharmony_ci } 376962306a36Sopenharmony_ci rtnl_unlock(); 377062306a36Sopenharmony_ci} 377162306a36Sopenharmony_ci 377262306a36Sopenharmony_cistatic int 377362306a36Sopenharmony_cipacket_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, 377462306a36Sopenharmony_ci unsigned int optlen) 377562306a36Sopenharmony_ci{ 377662306a36Sopenharmony_ci struct sock *sk = sock->sk; 377762306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 377862306a36Sopenharmony_ci int ret; 377962306a36Sopenharmony_ci 378062306a36Sopenharmony_ci if (level != SOL_PACKET) 378162306a36Sopenharmony_ci return -ENOPROTOOPT; 378262306a36Sopenharmony_ci 378362306a36Sopenharmony_ci switch (optname) { 378462306a36Sopenharmony_ci case PACKET_ADD_MEMBERSHIP: 378562306a36Sopenharmony_ci case PACKET_DROP_MEMBERSHIP: 378662306a36Sopenharmony_ci { 378762306a36Sopenharmony_ci struct packet_mreq_max mreq; 378862306a36Sopenharmony_ci int len = optlen; 378962306a36Sopenharmony_ci memset(&mreq, 0, sizeof(mreq)); 379062306a36Sopenharmony_ci if (len < sizeof(struct packet_mreq)) 379162306a36Sopenharmony_ci return -EINVAL; 379262306a36Sopenharmony_ci if (len > sizeof(mreq)) 379362306a36Sopenharmony_ci len = sizeof(mreq); 379462306a36Sopenharmony_ci if (copy_from_sockptr(&mreq, optval, len)) 379562306a36Sopenharmony_ci return -EFAULT; 379662306a36Sopenharmony_ci if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address))) 379762306a36Sopenharmony_ci return -EINVAL; 379862306a36Sopenharmony_ci if (optname == PACKET_ADD_MEMBERSHIP) 379962306a36Sopenharmony_ci ret = packet_mc_add(sk, &mreq); 380062306a36Sopenharmony_ci else 380162306a36Sopenharmony_ci ret = packet_mc_drop(sk, &mreq); 380262306a36Sopenharmony_ci return ret; 380362306a36Sopenharmony_ci } 380462306a36Sopenharmony_ci 380562306a36Sopenharmony_ci case PACKET_RX_RING: 380662306a36Sopenharmony_ci case PACKET_TX_RING: 380762306a36Sopenharmony_ci { 380862306a36Sopenharmony_ci union tpacket_req_u req_u; 380962306a36Sopenharmony_ci int len; 381062306a36Sopenharmony_ci 381162306a36Sopenharmony_ci lock_sock(sk); 381262306a36Sopenharmony_ci switch (po->tp_version) { 381362306a36Sopenharmony_ci case TPACKET_V1: 381462306a36Sopenharmony_ci case TPACKET_V2: 381562306a36Sopenharmony_ci len = sizeof(req_u.req); 381662306a36Sopenharmony_ci break; 381762306a36Sopenharmony_ci case TPACKET_V3: 381862306a36Sopenharmony_ci default: 381962306a36Sopenharmony_ci len = sizeof(req_u.req3); 382062306a36Sopenharmony_ci break; 382162306a36Sopenharmony_ci } 382262306a36Sopenharmony_ci if (optlen < len) { 382362306a36Sopenharmony_ci ret = -EINVAL; 382462306a36Sopenharmony_ci } else { 382562306a36Sopenharmony_ci if (copy_from_sockptr(&req_u.req, optval, len)) 382662306a36Sopenharmony_ci ret = -EFAULT; 382762306a36Sopenharmony_ci else 382862306a36Sopenharmony_ci ret = packet_set_ring(sk, &req_u, 0, 382962306a36Sopenharmony_ci optname == PACKET_TX_RING); 383062306a36Sopenharmony_ci } 383162306a36Sopenharmony_ci release_sock(sk); 383262306a36Sopenharmony_ci return ret; 383362306a36Sopenharmony_ci } 383462306a36Sopenharmony_ci case PACKET_COPY_THRESH: 383562306a36Sopenharmony_ci { 383662306a36Sopenharmony_ci int val; 383762306a36Sopenharmony_ci 383862306a36Sopenharmony_ci if (optlen != sizeof(val)) 383962306a36Sopenharmony_ci return -EINVAL; 384062306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 384162306a36Sopenharmony_ci return -EFAULT; 384262306a36Sopenharmony_ci 384362306a36Sopenharmony_ci pkt_sk(sk)->copy_thresh = val; 384462306a36Sopenharmony_ci return 0; 384562306a36Sopenharmony_ci } 384662306a36Sopenharmony_ci case PACKET_VERSION: 384762306a36Sopenharmony_ci { 384862306a36Sopenharmony_ci int val; 384962306a36Sopenharmony_ci 385062306a36Sopenharmony_ci if (optlen != sizeof(val)) 385162306a36Sopenharmony_ci return -EINVAL; 385262306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 385362306a36Sopenharmony_ci return -EFAULT; 385462306a36Sopenharmony_ci switch (val) { 385562306a36Sopenharmony_ci case TPACKET_V1: 385662306a36Sopenharmony_ci case TPACKET_V2: 385762306a36Sopenharmony_ci case TPACKET_V3: 385862306a36Sopenharmony_ci break; 385962306a36Sopenharmony_ci default: 386062306a36Sopenharmony_ci return -EINVAL; 386162306a36Sopenharmony_ci } 386262306a36Sopenharmony_ci lock_sock(sk); 386362306a36Sopenharmony_ci if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { 386462306a36Sopenharmony_ci ret = -EBUSY; 386562306a36Sopenharmony_ci } else { 386662306a36Sopenharmony_ci po->tp_version = val; 386762306a36Sopenharmony_ci ret = 0; 386862306a36Sopenharmony_ci } 386962306a36Sopenharmony_ci release_sock(sk); 387062306a36Sopenharmony_ci return ret; 387162306a36Sopenharmony_ci } 387262306a36Sopenharmony_ci case PACKET_RESERVE: 387362306a36Sopenharmony_ci { 387462306a36Sopenharmony_ci unsigned int val; 387562306a36Sopenharmony_ci 387662306a36Sopenharmony_ci if (optlen != sizeof(val)) 387762306a36Sopenharmony_ci return -EINVAL; 387862306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 387962306a36Sopenharmony_ci return -EFAULT; 388062306a36Sopenharmony_ci if (val > INT_MAX) 388162306a36Sopenharmony_ci return -EINVAL; 388262306a36Sopenharmony_ci lock_sock(sk); 388362306a36Sopenharmony_ci if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { 388462306a36Sopenharmony_ci ret = -EBUSY; 388562306a36Sopenharmony_ci } else { 388662306a36Sopenharmony_ci po->tp_reserve = val; 388762306a36Sopenharmony_ci ret = 0; 388862306a36Sopenharmony_ci } 388962306a36Sopenharmony_ci release_sock(sk); 389062306a36Sopenharmony_ci return ret; 389162306a36Sopenharmony_ci } 389262306a36Sopenharmony_ci case PACKET_LOSS: 389362306a36Sopenharmony_ci { 389462306a36Sopenharmony_ci unsigned int val; 389562306a36Sopenharmony_ci 389662306a36Sopenharmony_ci if (optlen != sizeof(val)) 389762306a36Sopenharmony_ci return -EINVAL; 389862306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 389962306a36Sopenharmony_ci return -EFAULT; 390062306a36Sopenharmony_ci 390162306a36Sopenharmony_ci lock_sock(sk); 390262306a36Sopenharmony_ci if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { 390362306a36Sopenharmony_ci ret = -EBUSY; 390462306a36Sopenharmony_ci } else { 390562306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_TP_LOSS, val); 390662306a36Sopenharmony_ci ret = 0; 390762306a36Sopenharmony_ci } 390862306a36Sopenharmony_ci release_sock(sk); 390962306a36Sopenharmony_ci return ret; 391062306a36Sopenharmony_ci } 391162306a36Sopenharmony_ci case PACKET_AUXDATA: 391262306a36Sopenharmony_ci { 391362306a36Sopenharmony_ci int val; 391462306a36Sopenharmony_ci 391562306a36Sopenharmony_ci if (optlen < sizeof(val)) 391662306a36Sopenharmony_ci return -EINVAL; 391762306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 391862306a36Sopenharmony_ci return -EFAULT; 391962306a36Sopenharmony_ci 392062306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_AUXDATA, val); 392162306a36Sopenharmony_ci return 0; 392262306a36Sopenharmony_ci } 392362306a36Sopenharmony_ci case PACKET_ORIGDEV: 392462306a36Sopenharmony_ci { 392562306a36Sopenharmony_ci int val; 392662306a36Sopenharmony_ci 392762306a36Sopenharmony_ci if (optlen < sizeof(val)) 392862306a36Sopenharmony_ci return -EINVAL; 392962306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 393062306a36Sopenharmony_ci return -EFAULT; 393162306a36Sopenharmony_ci 393262306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_ORIGDEV, val); 393362306a36Sopenharmony_ci return 0; 393462306a36Sopenharmony_ci } 393562306a36Sopenharmony_ci case PACKET_VNET_HDR: 393662306a36Sopenharmony_ci case PACKET_VNET_HDR_SZ: 393762306a36Sopenharmony_ci { 393862306a36Sopenharmony_ci int val, hdr_len; 393962306a36Sopenharmony_ci 394062306a36Sopenharmony_ci if (sock->type != SOCK_RAW) 394162306a36Sopenharmony_ci return -EINVAL; 394262306a36Sopenharmony_ci if (optlen < sizeof(val)) 394362306a36Sopenharmony_ci return -EINVAL; 394462306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 394562306a36Sopenharmony_ci return -EFAULT; 394662306a36Sopenharmony_ci 394762306a36Sopenharmony_ci if (optname == PACKET_VNET_HDR_SZ) { 394862306a36Sopenharmony_ci if (val && val != sizeof(struct virtio_net_hdr) && 394962306a36Sopenharmony_ci val != sizeof(struct virtio_net_hdr_mrg_rxbuf)) 395062306a36Sopenharmony_ci return -EINVAL; 395162306a36Sopenharmony_ci hdr_len = val; 395262306a36Sopenharmony_ci } else { 395362306a36Sopenharmony_ci hdr_len = val ? sizeof(struct virtio_net_hdr) : 0; 395462306a36Sopenharmony_ci } 395562306a36Sopenharmony_ci lock_sock(sk); 395662306a36Sopenharmony_ci if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { 395762306a36Sopenharmony_ci ret = -EBUSY; 395862306a36Sopenharmony_ci } else { 395962306a36Sopenharmony_ci WRITE_ONCE(po->vnet_hdr_sz, hdr_len); 396062306a36Sopenharmony_ci ret = 0; 396162306a36Sopenharmony_ci } 396262306a36Sopenharmony_ci release_sock(sk); 396362306a36Sopenharmony_ci return ret; 396462306a36Sopenharmony_ci } 396562306a36Sopenharmony_ci case PACKET_TIMESTAMP: 396662306a36Sopenharmony_ci { 396762306a36Sopenharmony_ci int val; 396862306a36Sopenharmony_ci 396962306a36Sopenharmony_ci if (optlen != sizeof(val)) 397062306a36Sopenharmony_ci return -EINVAL; 397162306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 397262306a36Sopenharmony_ci return -EFAULT; 397362306a36Sopenharmony_ci 397462306a36Sopenharmony_ci WRITE_ONCE(po->tp_tstamp, val); 397562306a36Sopenharmony_ci return 0; 397662306a36Sopenharmony_ci } 397762306a36Sopenharmony_ci case PACKET_FANOUT: 397862306a36Sopenharmony_ci { 397962306a36Sopenharmony_ci struct fanout_args args = { 0 }; 398062306a36Sopenharmony_ci 398162306a36Sopenharmony_ci if (optlen != sizeof(int) && optlen != sizeof(args)) 398262306a36Sopenharmony_ci return -EINVAL; 398362306a36Sopenharmony_ci if (copy_from_sockptr(&args, optval, optlen)) 398462306a36Sopenharmony_ci return -EFAULT; 398562306a36Sopenharmony_ci 398662306a36Sopenharmony_ci return fanout_add(sk, &args); 398762306a36Sopenharmony_ci } 398862306a36Sopenharmony_ci case PACKET_FANOUT_DATA: 398962306a36Sopenharmony_ci { 399062306a36Sopenharmony_ci /* Paired with the WRITE_ONCE() in fanout_add() */ 399162306a36Sopenharmony_ci if (!READ_ONCE(po->fanout)) 399262306a36Sopenharmony_ci return -EINVAL; 399362306a36Sopenharmony_ci 399462306a36Sopenharmony_ci return fanout_set_data(po, optval, optlen); 399562306a36Sopenharmony_ci } 399662306a36Sopenharmony_ci case PACKET_IGNORE_OUTGOING: 399762306a36Sopenharmony_ci { 399862306a36Sopenharmony_ci int val; 399962306a36Sopenharmony_ci 400062306a36Sopenharmony_ci if (optlen != sizeof(val)) 400162306a36Sopenharmony_ci return -EINVAL; 400262306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 400362306a36Sopenharmony_ci return -EFAULT; 400462306a36Sopenharmony_ci if (val < 0 || val > 1) 400562306a36Sopenharmony_ci return -EINVAL; 400662306a36Sopenharmony_ci 400762306a36Sopenharmony_ci WRITE_ONCE(po->prot_hook.ignore_outgoing, !!val); 400862306a36Sopenharmony_ci return 0; 400962306a36Sopenharmony_ci } 401062306a36Sopenharmony_ci case PACKET_TX_HAS_OFF: 401162306a36Sopenharmony_ci { 401262306a36Sopenharmony_ci unsigned int val; 401362306a36Sopenharmony_ci 401462306a36Sopenharmony_ci if (optlen != sizeof(val)) 401562306a36Sopenharmony_ci return -EINVAL; 401662306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 401762306a36Sopenharmony_ci return -EFAULT; 401862306a36Sopenharmony_ci 401962306a36Sopenharmony_ci lock_sock(sk); 402062306a36Sopenharmony_ci if (!po->rx_ring.pg_vec && !po->tx_ring.pg_vec) 402162306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_TX_HAS_OFF, val); 402262306a36Sopenharmony_ci 402362306a36Sopenharmony_ci release_sock(sk); 402462306a36Sopenharmony_ci return 0; 402562306a36Sopenharmony_ci } 402662306a36Sopenharmony_ci case PACKET_QDISC_BYPASS: 402762306a36Sopenharmony_ci { 402862306a36Sopenharmony_ci int val; 402962306a36Sopenharmony_ci 403062306a36Sopenharmony_ci if (optlen != sizeof(val)) 403162306a36Sopenharmony_ci return -EINVAL; 403262306a36Sopenharmony_ci if (copy_from_sockptr(&val, optval, sizeof(val))) 403362306a36Sopenharmony_ci return -EFAULT; 403462306a36Sopenharmony_ci 403562306a36Sopenharmony_ci packet_sock_flag_set(po, PACKET_SOCK_QDISC_BYPASS, val); 403662306a36Sopenharmony_ci return 0; 403762306a36Sopenharmony_ci } 403862306a36Sopenharmony_ci default: 403962306a36Sopenharmony_ci return -ENOPROTOOPT; 404062306a36Sopenharmony_ci } 404162306a36Sopenharmony_ci} 404262306a36Sopenharmony_ci 404362306a36Sopenharmony_cistatic int packet_getsockopt(struct socket *sock, int level, int optname, 404462306a36Sopenharmony_ci char __user *optval, int __user *optlen) 404562306a36Sopenharmony_ci{ 404662306a36Sopenharmony_ci int len; 404762306a36Sopenharmony_ci int val, lv = sizeof(val); 404862306a36Sopenharmony_ci struct sock *sk = sock->sk; 404962306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 405062306a36Sopenharmony_ci void *data = &val; 405162306a36Sopenharmony_ci union tpacket_stats_u st; 405262306a36Sopenharmony_ci struct tpacket_rollover_stats rstats; 405362306a36Sopenharmony_ci int drops; 405462306a36Sopenharmony_ci 405562306a36Sopenharmony_ci if (level != SOL_PACKET) 405662306a36Sopenharmony_ci return -ENOPROTOOPT; 405762306a36Sopenharmony_ci 405862306a36Sopenharmony_ci if (get_user(len, optlen)) 405962306a36Sopenharmony_ci return -EFAULT; 406062306a36Sopenharmony_ci 406162306a36Sopenharmony_ci if (len < 0) 406262306a36Sopenharmony_ci return -EINVAL; 406362306a36Sopenharmony_ci 406462306a36Sopenharmony_ci switch (optname) { 406562306a36Sopenharmony_ci case PACKET_STATISTICS: 406662306a36Sopenharmony_ci spin_lock_bh(&sk->sk_receive_queue.lock); 406762306a36Sopenharmony_ci memcpy(&st, &po->stats, sizeof(st)); 406862306a36Sopenharmony_ci memset(&po->stats, 0, sizeof(po->stats)); 406962306a36Sopenharmony_ci spin_unlock_bh(&sk->sk_receive_queue.lock); 407062306a36Sopenharmony_ci drops = atomic_xchg(&po->tp_drops, 0); 407162306a36Sopenharmony_ci 407262306a36Sopenharmony_ci if (po->tp_version == TPACKET_V3) { 407362306a36Sopenharmony_ci lv = sizeof(struct tpacket_stats_v3); 407462306a36Sopenharmony_ci st.stats3.tp_drops = drops; 407562306a36Sopenharmony_ci st.stats3.tp_packets += drops; 407662306a36Sopenharmony_ci data = &st.stats3; 407762306a36Sopenharmony_ci } else { 407862306a36Sopenharmony_ci lv = sizeof(struct tpacket_stats); 407962306a36Sopenharmony_ci st.stats1.tp_drops = drops; 408062306a36Sopenharmony_ci st.stats1.tp_packets += drops; 408162306a36Sopenharmony_ci data = &st.stats1; 408262306a36Sopenharmony_ci } 408362306a36Sopenharmony_ci 408462306a36Sopenharmony_ci break; 408562306a36Sopenharmony_ci case PACKET_AUXDATA: 408662306a36Sopenharmony_ci val = packet_sock_flag(po, PACKET_SOCK_AUXDATA); 408762306a36Sopenharmony_ci break; 408862306a36Sopenharmony_ci case PACKET_ORIGDEV: 408962306a36Sopenharmony_ci val = packet_sock_flag(po, PACKET_SOCK_ORIGDEV); 409062306a36Sopenharmony_ci break; 409162306a36Sopenharmony_ci case PACKET_VNET_HDR: 409262306a36Sopenharmony_ci val = !!READ_ONCE(po->vnet_hdr_sz); 409362306a36Sopenharmony_ci break; 409462306a36Sopenharmony_ci case PACKET_VNET_HDR_SZ: 409562306a36Sopenharmony_ci val = READ_ONCE(po->vnet_hdr_sz); 409662306a36Sopenharmony_ci break; 409762306a36Sopenharmony_ci case PACKET_VERSION: 409862306a36Sopenharmony_ci val = po->tp_version; 409962306a36Sopenharmony_ci break; 410062306a36Sopenharmony_ci case PACKET_HDRLEN: 410162306a36Sopenharmony_ci if (len > sizeof(int)) 410262306a36Sopenharmony_ci len = sizeof(int); 410362306a36Sopenharmony_ci if (len < sizeof(int)) 410462306a36Sopenharmony_ci return -EINVAL; 410562306a36Sopenharmony_ci if (copy_from_user(&val, optval, len)) 410662306a36Sopenharmony_ci return -EFAULT; 410762306a36Sopenharmony_ci switch (val) { 410862306a36Sopenharmony_ci case TPACKET_V1: 410962306a36Sopenharmony_ci val = sizeof(struct tpacket_hdr); 411062306a36Sopenharmony_ci break; 411162306a36Sopenharmony_ci case TPACKET_V2: 411262306a36Sopenharmony_ci val = sizeof(struct tpacket2_hdr); 411362306a36Sopenharmony_ci break; 411462306a36Sopenharmony_ci case TPACKET_V3: 411562306a36Sopenharmony_ci val = sizeof(struct tpacket3_hdr); 411662306a36Sopenharmony_ci break; 411762306a36Sopenharmony_ci default: 411862306a36Sopenharmony_ci return -EINVAL; 411962306a36Sopenharmony_ci } 412062306a36Sopenharmony_ci break; 412162306a36Sopenharmony_ci case PACKET_RESERVE: 412262306a36Sopenharmony_ci val = po->tp_reserve; 412362306a36Sopenharmony_ci break; 412462306a36Sopenharmony_ci case PACKET_LOSS: 412562306a36Sopenharmony_ci val = packet_sock_flag(po, PACKET_SOCK_TP_LOSS); 412662306a36Sopenharmony_ci break; 412762306a36Sopenharmony_ci case PACKET_TIMESTAMP: 412862306a36Sopenharmony_ci val = READ_ONCE(po->tp_tstamp); 412962306a36Sopenharmony_ci break; 413062306a36Sopenharmony_ci case PACKET_FANOUT: 413162306a36Sopenharmony_ci val = (po->fanout ? 413262306a36Sopenharmony_ci ((u32)po->fanout->id | 413362306a36Sopenharmony_ci ((u32)po->fanout->type << 16) | 413462306a36Sopenharmony_ci ((u32)po->fanout->flags << 24)) : 413562306a36Sopenharmony_ci 0); 413662306a36Sopenharmony_ci break; 413762306a36Sopenharmony_ci case PACKET_IGNORE_OUTGOING: 413862306a36Sopenharmony_ci val = READ_ONCE(po->prot_hook.ignore_outgoing); 413962306a36Sopenharmony_ci break; 414062306a36Sopenharmony_ci case PACKET_ROLLOVER_STATS: 414162306a36Sopenharmony_ci if (!po->rollover) 414262306a36Sopenharmony_ci return -EINVAL; 414362306a36Sopenharmony_ci rstats.tp_all = atomic_long_read(&po->rollover->num); 414462306a36Sopenharmony_ci rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); 414562306a36Sopenharmony_ci rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); 414662306a36Sopenharmony_ci data = &rstats; 414762306a36Sopenharmony_ci lv = sizeof(rstats); 414862306a36Sopenharmony_ci break; 414962306a36Sopenharmony_ci case PACKET_TX_HAS_OFF: 415062306a36Sopenharmony_ci val = packet_sock_flag(po, PACKET_SOCK_TX_HAS_OFF); 415162306a36Sopenharmony_ci break; 415262306a36Sopenharmony_ci case PACKET_QDISC_BYPASS: 415362306a36Sopenharmony_ci val = packet_sock_flag(po, PACKET_SOCK_QDISC_BYPASS); 415462306a36Sopenharmony_ci break; 415562306a36Sopenharmony_ci default: 415662306a36Sopenharmony_ci return -ENOPROTOOPT; 415762306a36Sopenharmony_ci } 415862306a36Sopenharmony_ci 415962306a36Sopenharmony_ci if (len > lv) 416062306a36Sopenharmony_ci len = lv; 416162306a36Sopenharmony_ci if (put_user(len, optlen)) 416262306a36Sopenharmony_ci return -EFAULT; 416362306a36Sopenharmony_ci if (copy_to_user(optval, data, len)) 416462306a36Sopenharmony_ci return -EFAULT; 416562306a36Sopenharmony_ci return 0; 416662306a36Sopenharmony_ci} 416762306a36Sopenharmony_ci 416862306a36Sopenharmony_cistatic int packet_notifier(struct notifier_block *this, 416962306a36Sopenharmony_ci unsigned long msg, void *ptr) 417062306a36Sopenharmony_ci{ 417162306a36Sopenharmony_ci struct sock *sk; 417262306a36Sopenharmony_ci struct net_device *dev = netdev_notifier_info_to_dev(ptr); 417362306a36Sopenharmony_ci struct net *net = dev_net(dev); 417462306a36Sopenharmony_ci 417562306a36Sopenharmony_ci rcu_read_lock(); 417662306a36Sopenharmony_ci sk_for_each_rcu(sk, &net->packet.sklist) { 417762306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 417862306a36Sopenharmony_ci 417962306a36Sopenharmony_ci switch (msg) { 418062306a36Sopenharmony_ci case NETDEV_UNREGISTER: 418162306a36Sopenharmony_ci if (po->mclist) 418262306a36Sopenharmony_ci packet_dev_mclist_delete(dev, &po->mclist); 418362306a36Sopenharmony_ci fallthrough; 418462306a36Sopenharmony_ci 418562306a36Sopenharmony_ci case NETDEV_DOWN: 418662306a36Sopenharmony_ci if (dev->ifindex == po->ifindex) { 418762306a36Sopenharmony_ci spin_lock(&po->bind_lock); 418862306a36Sopenharmony_ci if (packet_sock_flag(po, PACKET_SOCK_RUNNING)) { 418962306a36Sopenharmony_ci __unregister_prot_hook(sk, false); 419062306a36Sopenharmony_ci sk->sk_err = ENETDOWN; 419162306a36Sopenharmony_ci if (!sock_flag(sk, SOCK_DEAD)) 419262306a36Sopenharmony_ci sk_error_report(sk); 419362306a36Sopenharmony_ci } 419462306a36Sopenharmony_ci if (msg == NETDEV_UNREGISTER) { 419562306a36Sopenharmony_ci packet_cached_dev_reset(po); 419662306a36Sopenharmony_ci WRITE_ONCE(po->ifindex, -1); 419762306a36Sopenharmony_ci netdev_put(po->prot_hook.dev, 419862306a36Sopenharmony_ci &po->prot_hook.dev_tracker); 419962306a36Sopenharmony_ci po->prot_hook.dev = NULL; 420062306a36Sopenharmony_ci } 420162306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 420262306a36Sopenharmony_ci } 420362306a36Sopenharmony_ci break; 420462306a36Sopenharmony_ci case NETDEV_UP: 420562306a36Sopenharmony_ci if (dev->ifindex == po->ifindex) { 420662306a36Sopenharmony_ci spin_lock(&po->bind_lock); 420762306a36Sopenharmony_ci if (po->num) 420862306a36Sopenharmony_ci register_prot_hook(sk); 420962306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 421062306a36Sopenharmony_ci } 421162306a36Sopenharmony_ci break; 421262306a36Sopenharmony_ci } 421362306a36Sopenharmony_ci } 421462306a36Sopenharmony_ci rcu_read_unlock(); 421562306a36Sopenharmony_ci return NOTIFY_DONE; 421662306a36Sopenharmony_ci} 421762306a36Sopenharmony_ci 421862306a36Sopenharmony_ci 421962306a36Sopenharmony_cistatic int packet_ioctl(struct socket *sock, unsigned int cmd, 422062306a36Sopenharmony_ci unsigned long arg) 422162306a36Sopenharmony_ci{ 422262306a36Sopenharmony_ci struct sock *sk = sock->sk; 422362306a36Sopenharmony_ci 422462306a36Sopenharmony_ci switch (cmd) { 422562306a36Sopenharmony_ci case SIOCOUTQ: 422662306a36Sopenharmony_ci { 422762306a36Sopenharmony_ci int amount = sk_wmem_alloc_get(sk); 422862306a36Sopenharmony_ci 422962306a36Sopenharmony_ci return put_user(amount, (int __user *)arg); 423062306a36Sopenharmony_ci } 423162306a36Sopenharmony_ci case SIOCINQ: 423262306a36Sopenharmony_ci { 423362306a36Sopenharmony_ci struct sk_buff *skb; 423462306a36Sopenharmony_ci int amount = 0; 423562306a36Sopenharmony_ci 423662306a36Sopenharmony_ci spin_lock_bh(&sk->sk_receive_queue.lock); 423762306a36Sopenharmony_ci skb = skb_peek(&sk->sk_receive_queue); 423862306a36Sopenharmony_ci if (skb) 423962306a36Sopenharmony_ci amount = skb->len; 424062306a36Sopenharmony_ci spin_unlock_bh(&sk->sk_receive_queue.lock); 424162306a36Sopenharmony_ci return put_user(amount, (int __user *)arg); 424262306a36Sopenharmony_ci } 424362306a36Sopenharmony_ci#ifdef CONFIG_INET 424462306a36Sopenharmony_ci case SIOCADDRT: 424562306a36Sopenharmony_ci case SIOCDELRT: 424662306a36Sopenharmony_ci case SIOCDARP: 424762306a36Sopenharmony_ci case SIOCGARP: 424862306a36Sopenharmony_ci case SIOCSARP: 424962306a36Sopenharmony_ci case SIOCGIFADDR: 425062306a36Sopenharmony_ci case SIOCSIFADDR: 425162306a36Sopenharmony_ci case SIOCGIFBRDADDR: 425262306a36Sopenharmony_ci case SIOCSIFBRDADDR: 425362306a36Sopenharmony_ci case SIOCGIFNETMASK: 425462306a36Sopenharmony_ci case SIOCSIFNETMASK: 425562306a36Sopenharmony_ci case SIOCGIFDSTADDR: 425662306a36Sopenharmony_ci case SIOCSIFDSTADDR: 425762306a36Sopenharmony_ci case SIOCSIFFLAGS: 425862306a36Sopenharmony_ci return inet_dgram_ops.ioctl(sock, cmd, arg); 425962306a36Sopenharmony_ci#endif 426062306a36Sopenharmony_ci 426162306a36Sopenharmony_ci default: 426262306a36Sopenharmony_ci return -ENOIOCTLCMD; 426362306a36Sopenharmony_ci } 426462306a36Sopenharmony_ci return 0; 426562306a36Sopenharmony_ci} 426662306a36Sopenharmony_ci 426762306a36Sopenharmony_cistatic __poll_t packet_poll(struct file *file, struct socket *sock, 426862306a36Sopenharmony_ci poll_table *wait) 426962306a36Sopenharmony_ci{ 427062306a36Sopenharmony_ci struct sock *sk = sock->sk; 427162306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 427262306a36Sopenharmony_ci __poll_t mask = datagram_poll(file, sock, wait); 427362306a36Sopenharmony_ci 427462306a36Sopenharmony_ci spin_lock_bh(&sk->sk_receive_queue.lock); 427562306a36Sopenharmony_ci if (po->rx_ring.pg_vec) { 427662306a36Sopenharmony_ci if (!packet_previous_rx_frame(po, &po->rx_ring, 427762306a36Sopenharmony_ci TP_STATUS_KERNEL)) 427862306a36Sopenharmony_ci mask |= EPOLLIN | EPOLLRDNORM; 427962306a36Sopenharmony_ci } 428062306a36Sopenharmony_ci packet_rcv_try_clear_pressure(po); 428162306a36Sopenharmony_ci spin_unlock_bh(&sk->sk_receive_queue.lock); 428262306a36Sopenharmony_ci spin_lock_bh(&sk->sk_write_queue.lock); 428362306a36Sopenharmony_ci if (po->tx_ring.pg_vec) { 428462306a36Sopenharmony_ci if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE)) 428562306a36Sopenharmony_ci mask |= EPOLLOUT | EPOLLWRNORM; 428662306a36Sopenharmony_ci } 428762306a36Sopenharmony_ci spin_unlock_bh(&sk->sk_write_queue.lock); 428862306a36Sopenharmony_ci return mask; 428962306a36Sopenharmony_ci} 429062306a36Sopenharmony_ci 429162306a36Sopenharmony_ci 429262306a36Sopenharmony_ci/* Dirty? Well, I still did not learn better way to account 429362306a36Sopenharmony_ci * for user mmaps. 429462306a36Sopenharmony_ci */ 429562306a36Sopenharmony_ci 429662306a36Sopenharmony_cistatic void packet_mm_open(struct vm_area_struct *vma) 429762306a36Sopenharmony_ci{ 429862306a36Sopenharmony_ci struct file *file = vma->vm_file; 429962306a36Sopenharmony_ci struct socket *sock = file->private_data; 430062306a36Sopenharmony_ci struct sock *sk = sock->sk; 430162306a36Sopenharmony_ci 430262306a36Sopenharmony_ci if (sk) 430362306a36Sopenharmony_ci atomic_long_inc(&pkt_sk(sk)->mapped); 430462306a36Sopenharmony_ci} 430562306a36Sopenharmony_ci 430662306a36Sopenharmony_cistatic void packet_mm_close(struct vm_area_struct *vma) 430762306a36Sopenharmony_ci{ 430862306a36Sopenharmony_ci struct file *file = vma->vm_file; 430962306a36Sopenharmony_ci struct socket *sock = file->private_data; 431062306a36Sopenharmony_ci struct sock *sk = sock->sk; 431162306a36Sopenharmony_ci 431262306a36Sopenharmony_ci if (sk) 431362306a36Sopenharmony_ci atomic_long_dec(&pkt_sk(sk)->mapped); 431462306a36Sopenharmony_ci} 431562306a36Sopenharmony_ci 431662306a36Sopenharmony_cistatic const struct vm_operations_struct packet_mmap_ops = { 431762306a36Sopenharmony_ci .open = packet_mm_open, 431862306a36Sopenharmony_ci .close = packet_mm_close, 431962306a36Sopenharmony_ci}; 432062306a36Sopenharmony_ci 432162306a36Sopenharmony_cistatic void free_pg_vec(struct pgv *pg_vec, unsigned int order, 432262306a36Sopenharmony_ci unsigned int len) 432362306a36Sopenharmony_ci{ 432462306a36Sopenharmony_ci int i; 432562306a36Sopenharmony_ci 432662306a36Sopenharmony_ci for (i = 0; i < len; i++) { 432762306a36Sopenharmony_ci if (likely(pg_vec[i].buffer)) { 432862306a36Sopenharmony_ci if (is_vmalloc_addr(pg_vec[i].buffer)) 432962306a36Sopenharmony_ci vfree(pg_vec[i].buffer); 433062306a36Sopenharmony_ci else 433162306a36Sopenharmony_ci free_pages((unsigned long)pg_vec[i].buffer, 433262306a36Sopenharmony_ci order); 433362306a36Sopenharmony_ci pg_vec[i].buffer = NULL; 433462306a36Sopenharmony_ci } 433562306a36Sopenharmony_ci } 433662306a36Sopenharmony_ci kfree(pg_vec); 433762306a36Sopenharmony_ci} 433862306a36Sopenharmony_ci 433962306a36Sopenharmony_cistatic char *alloc_one_pg_vec_page(unsigned long order) 434062306a36Sopenharmony_ci{ 434162306a36Sopenharmony_ci char *buffer; 434262306a36Sopenharmony_ci gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | 434362306a36Sopenharmony_ci __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY; 434462306a36Sopenharmony_ci 434562306a36Sopenharmony_ci buffer = (char *) __get_free_pages(gfp_flags, order); 434662306a36Sopenharmony_ci if (buffer) 434762306a36Sopenharmony_ci return buffer; 434862306a36Sopenharmony_ci 434962306a36Sopenharmony_ci /* __get_free_pages failed, fall back to vmalloc */ 435062306a36Sopenharmony_ci buffer = vzalloc(array_size((1 << order), PAGE_SIZE)); 435162306a36Sopenharmony_ci if (buffer) 435262306a36Sopenharmony_ci return buffer; 435362306a36Sopenharmony_ci 435462306a36Sopenharmony_ci /* vmalloc failed, lets dig into swap here */ 435562306a36Sopenharmony_ci gfp_flags &= ~__GFP_NORETRY; 435662306a36Sopenharmony_ci buffer = (char *) __get_free_pages(gfp_flags, order); 435762306a36Sopenharmony_ci if (buffer) 435862306a36Sopenharmony_ci return buffer; 435962306a36Sopenharmony_ci 436062306a36Sopenharmony_ci /* complete and utter failure */ 436162306a36Sopenharmony_ci return NULL; 436262306a36Sopenharmony_ci} 436362306a36Sopenharmony_ci 436462306a36Sopenharmony_cistatic struct pgv *alloc_pg_vec(struct tpacket_req *req, int order) 436562306a36Sopenharmony_ci{ 436662306a36Sopenharmony_ci unsigned int block_nr = req->tp_block_nr; 436762306a36Sopenharmony_ci struct pgv *pg_vec; 436862306a36Sopenharmony_ci int i; 436962306a36Sopenharmony_ci 437062306a36Sopenharmony_ci pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL | __GFP_NOWARN); 437162306a36Sopenharmony_ci if (unlikely(!pg_vec)) 437262306a36Sopenharmony_ci goto out; 437362306a36Sopenharmony_ci 437462306a36Sopenharmony_ci for (i = 0; i < block_nr; i++) { 437562306a36Sopenharmony_ci pg_vec[i].buffer = alloc_one_pg_vec_page(order); 437662306a36Sopenharmony_ci if (unlikely(!pg_vec[i].buffer)) 437762306a36Sopenharmony_ci goto out_free_pgvec; 437862306a36Sopenharmony_ci } 437962306a36Sopenharmony_ci 438062306a36Sopenharmony_ciout: 438162306a36Sopenharmony_ci return pg_vec; 438262306a36Sopenharmony_ci 438362306a36Sopenharmony_ciout_free_pgvec: 438462306a36Sopenharmony_ci free_pg_vec(pg_vec, order, block_nr); 438562306a36Sopenharmony_ci pg_vec = NULL; 438662306a36Sopenharmony_ci goto out; 438762306a36Sopenharmony_ci} 438862306a36Sopenharmony_ci 438962306a36Sopenharmony_cistatic int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, 439062306a36Sopenharmony_ci int closing, int tx_ring) 439162306a36Sopenharmony_ci{ 439262306a36Sopenharmony_ci struct pgv *pg_vec = NULL; 439362306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 439462306a36Sopenharmony_ci unsigned long *rx_owner_map = NULL; 439562306a36Sopenharmony_ci int was_running, order = 0; 439662306a36Sopenharmony_ci struct packet_ring_buffer *rb; 439762306a36Sopenharmony_ci struct sk_buff_head *rb_queue; 439862306a36Sopenharmony_ci __be16 num; 439962306a36Sopenharmony_ci int err; 440062306a36Sopenharmony_ci /* Added to avoid minimal code churn */ 440162306a36Sopenharmony_ci struct tpacket_req *req = &req_u->req; 440262306a36Sopenharmony_ci 440362306a36Sopenharmony_ci rb = tx_ring ? &po->tx_ring : &po->rx_ring; 440462306a36Sopenharmony_ci rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; 440562306a36Sopenharmony_ci 440662306a36Sopenharmony_ci err = -EBUSY; 440762306a36Sopenharmony_ci if (!closing) { 440862306a36Sopenharmony_ci if (atomic_long_read(&po->mapped)) 440962306a36Sopenharmony_ci goto out; 441062306a36Sopenharmony_ci if (packet_read_pending(rb)) 441162306a36Sopenharmony_ci goto out; 441262306a36Sopenharmony_ci } 441362306a36Sopenharmony_ci 441462306a36Sopenharmony_ci if (req->tp_block_nr) { 441562306a36Sopenharmony_ci unsigned int min_frame_size; 441662306a36Sopenharmony_ci 441762306a36Sopenharmony_ci /* Sanity tests and some calculations */ 441862306a36Sopenharmony_ci err = -EBUSY; 441962306a36Sopenharmony_ci if (unlikely(rb->pg_vec)) 442062306a36Sopenharmony_ci goto out; 442162306a36Sopenharmony_ci 442262306a36Sopenharmony_ci switch (po->tp_version) { 442362306a36Sopenharmony_ci case TPACKET_V1: 442462306a36Sopenharmony_ci po->tp_hdrlen = TPACKET_HDRLEN; 442562306a36Sopenharmony_ci break; 442662306a36Sopenharmony_ci case TPACKET_V2: 442762306a36Sopenharmony_ci po->tp_hdrlen = TPACKET2_HDRLEN; 442862306a36Sopenharmony_ci break; 442962306a36Sopenharmony_ci case TPACKET_V3: 443062306a36Sopenharmony_ci po->tp_hdrlen = TPACKET3_HDRLEN; 443162306a36Sopenharmony_ci break; 443262306a36Sopenharmony_ci } 443362306a36Sopenharmony_ci 443462306a36Sopenharmony_ci err = -EINVAL; 443562306a36Sopenharmony_ci if (unlikely((int)req->tp_block_size <= 0)) 443662306a36Sopenharmony_ci goto out; 443762306a36Sopenharmony_ci if (unlikely(!PAGE_ALIGNED(req->tp_block_size))) 443862306a36Sopenharmony_ci goto out; 443962306a36Sopenharmony_ci min_frame_size = po->tp_hdrlen + po->tp_reserve; 444062306a36Sopenharmony_ci if (po->tp_version >= TPACKET_V3 && 444162306a36Sopenharmony_ci req->tp_block_size < 444262306a36Sopenharmony_ci BLK_PLUS_PRIV((u64)req_u->req3.tp_sizeof_priv) + min_frame_size) 444362306a36Sopenharmony_ci goto out; 444462306a36Sopenharmony_ci if (unlikely(req->tp_frame_size < min_frame_size)) 444562306a36Sopenharmony_ci goto out; 444662306a36Sopenharmony_ci if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1))) 444762306a36Sopenharmony_ci goto out; 444862306a36Sopenharmony_ci 444962306a36Sopenharmony_ci rb->frames_per_block = req->tp_block_size / req->tp_frame_size; 445062306a36Sopenharmony_ci if (unlikely(rb->frames_per_block == 0)) 445162306a36Sopenharmony_ci goto out; 445262306a36Sopenharmony_ci if (unlikely(rb->frames_per_block > UINT_MAX / req->tp_block_nr)) 445362306a36Sopenharmony_ci goto out; 445462306a36Sopenharmony_ci if (unlikely((rb->frames_per_block * req->tp_block_nr) != 445562306a36Sopenharmony_ci req->tp_frame_nr)) 445662306a36Sopenharmony_ci goto out; 445762306a36Sopenharmony_ci 445862306a36Sopenharmony_ci err = -ENOMEM; 445962306a36Sopenharmony_ci order = get_order(req->tp_block_size); 446062306a36Sopenharmony_ci pg_vec = alloc_pg_vec(req, order); 446162306a36Sopenharmony_ci if (unlikely(!pg_vec)) 446262306a36Sopenharmony_ci goto out; 446362306a36Sopenharmony_ci switch (po->tp_version) { 446462306a36Sopenharmony_ci case TPACKET_V3: 446562306a36Sopenharmony_ci /* Block transmit is not supported yet */ 446662306a36Sopenharmony_ci if (!tx_ring) { 446762306a36Sopenharmony_ci init_prb_bdqc(po, rb, pg_vec, req_u); 446862306a36Sopenharmony_ci } else { 446962306a36Sopenharmony_ci struct tpacket_req3 *req3 = &req_u->req3; 447062306a36Sopenharmony_ci 447162306a36Sopenharmony_ci if (req3->tp_retire_blk_tov || 447262306a36Sopenharmony_ci req3->tp_sizeof_priv || 447362306a36Sopenharmony_ci req3->tp_feature_req_word) { 447462306a36Sopenharmony_ci err = -EINVAL; 447562306a36Sopenharmony_ci goto out_free_pg_vec; 447662306a36Sopenharmony_ci } 447762306a36Sopenharmony_ci } 447862306a36Sopenharmony_ci break; 447962306a36Sopenharmony_ci default: 448062306a36Sopenharmony_ci if (!tx_ring) { 448162306a36Sopenharmony_ci rx_owner_map = bitmap_alloc(req->tp_frame_nr, 448262306a36Sopenharmony_ci GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 448362306a36Sopenharmony_ci if (!rx_owner_map) 448462306a36Sopenharmony_ci goto out_free_pg_vec; 448562306a36Sopenharmony_ci } 448662306a36Sopenharmony_ci break; 448762306a36Sopenharmony_ci } 448862306a36Sopenharmony_ci } 448962306a36Sopenharmony_ci /* Done */ 449062306a36Sopenharmony_ci else { 449162306a36Sopenharmony_ci err = -EINVAL; 449262306a36Sopenharmony_ci if (unlikely(req->tp_frame_nr)) 449362306a36Sopenharmony_ci goto out; 449462306a36Sopenharmony_ci } 449562306a36Sopenharmony_ci 449662306a36Sopenharmony_ci 449762306a36Sopenharmony_ci /* Detach socket from network */ 449862306a36Sopenharmony_ci spin_lock(&po->bind_lock); 449962306a36Sopenharmony_ci was_running = packet_sock_flag(po, PACKET_SOCK_RUNNING); 450062306a36Sopenharmony_ci num = po->num; 450162306a36Sopenharmony_ci if (was_running) { 450262306a36Sopenharmony_ci WRITE_ONCE(po->num, 0); 450362306a36Sopenharmony_ci __unregister_prot_hook(sk, false); 450462306a36Sopenharmony_ci } 450562306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 450662306a36Sopenharmony_ci 450762306a36Sopenharmony_ci synchronize_net(); 450862306a36Sopenharmony_ci 450962306a36Sopenharmony_ci err = -EBUSY; 451062306a36Sopenharmony_ci mutex_lock(&po->pg_vec_lock); 451162306a36Sopenharmony_ci if (closing || atomic_long_read(&po->mapped) == 0) { 451262306a36Sopenharmony_ci err = 0; 451362306a36Sopenharmony_ci spin_lock_bh(&rb_queue->lock); 451462306a36Sopenharmony_ci swap(rb->pg_vec, pg_vec); 451562306a36Sopenharmony_ci if (po->tp_version <= TPACKET_V2) 451662306a36Sopenharmony_ci swap(rb->rx_owner_map, rx_owner_map); 451762306a36Sopenharmony_ci rb->frame_max = (req->tp_frame_nr - 1); 451862306a36Sopenharmony_ci rb->head = 0; 451962306a36Sopenharmony_ci rb->frame_size = req->tp_frame_size; 452062306a36Sopenharmony_ci spin_unlock_bh(&rb_queue->lock); 452162306a36Sopenharmony_ci 452262306a36Sopenharmony_ci swap(rb->pg_vec_order, order); 452362306a36Sopenharmony_ci swap(rb->pg_vec_len, req->tp_block_nr); 452462306a36Sopenharmony_ci 452562306a36Sopenharmony_ci rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE; 452662306a36Sopenharmony_ci po->prot_hook.func = (po->rx_ring.pg_vec) ? 452762306a36Sopenharmony_ci tpacket_rcv : packet_rcv; 452862306a36Sopenharmony_ci skb_queue_purge(rb_queue); 452962306a36Sopenharmony_ci if (atomic_long_read(&po->mapped)) 453062306a36Sopenharmony_ci pr_err("packet_mmap: vma is busy: %ld\n", 453162306a36Sopenharmony_ci atomic_long_read(&po->mapped)); 453262306a36Sopenharmony_ci } 453362306a36Sopenharmony_ci mutex_unlock(&po->pg_vec_lock); 453462306a36Sopenharmony_ci 453562306a36Sopenharmony_ci spin_lock(&po->bind_lock); 453662306a36Sopenharmony_ci if (was_running) { 453762306a36Sopenharmony_ci WRITE_ONCE(po->num, num); 453862306a36Sopenharmony_ci register_prot_hook(sk); 453962306a36Sopenharmony_ci } 454062306a36Sopenharmony_ci spin_unlock(&po->bind_lock); 454162306a36Sopenharmony_ci if (pg_vec && (po->tp_version > TPACKET_V2)) { 454262306a36Sopenharmony_ci /* Because we don't support block-based V3 on tx-ring */ 454362306a36Sopenharmony_ci if (!tx_ring) 454462306a36Sopenharmony_ci prb_shutdown_retire_blk_timer(po, rb_queue); 454562306a36Sopenharmony_ci } 454662306a36Sopenharmony_ci 454762306a36Sopenharmony_ciout_free_pg_vec: 454862306a36Sopenharmony_ci if (pg_vec) { 454962306a36Sopenharmony_ci bitmap_free(rx_owner_map); 455062306a36Sopenharmony_ci free_pg_vec(pg_vec, order, req->tp_block_nr); 455162306a36Sopenharmony_ci } 455262306a36Sopenharmony_ciout: 455362306a36Sopenharmony_ci return err; 455462306a36Sopenharmony_ci} 455562306a36Sopenharmony_ci 455662306a36Sopenharmony_cistatic int packet_mmap(struct file *file, struct socket *sock, 455762306a36Sopenharmony_ci struct vm_area_struct *vma) 455862306a36Sopenharmony_ci{ 455962306a36Sopenharmony_ci struct sock *sk = sock->sk; 456062306a36Sopenharmony_ci struct packet_sock *po = pkt_sk(sk); 456162306a36Sopenharmony_ci unsigned long size, expected_size; 456262306a36Sopenharmony_ci struct packet_ring_buffer *rb; 456362306a36Sopenharmony_ci unsigned long start; 456462306a36Sopenharmony_ci int err = -EINVAL; 456562306a36Sopenharmony_ci int i; 456662306a36Sopenharmony_ci 456762306a36Sopenharmony_ci if (vma->vm_pgoff) 456862306a36Sopenharmony_ci return -EINVAL; 456962306a36Sopenharmony_ci 457062306a36Sopenharmony_ci mutex_lock(&po->pg_vec_lock); 457162306a36Sopenharmony_ci 457262306a36Sopenharmony_ci expected_size = 0; 457362306a36Sopenharmony_ci for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { 457462306a36Sopenharmony_ci if (rb->pg_vec) { 457562306a36Sopenharmony_ci expected_size += rb->pg_vec_len 457662306a36Sopenharmony_ci * rb->pg_vec_pages 457762306a36Sopenharmony_ci * PAGE_SIZE; 457862306a36Sopenharmony_ci } 457962306a36Sopenharmony_ci } 458062306a36Sopenharmony_ci 458162306a36Sopenharmony_ci if (expected_size == 0) 458262306a36Sopenharmony_ci goto out; 458362306a36Sopenharmony_ci 458462306a36Sopenharmony_ci size = vma->vm_end - vma->vm_start; 458562306a36Sopenharmony_ci if (size != expected_size) 458662306a36Sopenharmony_ci goto out; 458762306a36Sopenharmony_ci 458862306a36Sopenharmony_ci start = vma->vm_start; 458962306a36Sopenharmony_ci for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) { 459062306a36Sopenharmony_ci if (rb->pg_vec == NULL) 459162306a36Sopenharmony_ci continue; 459262306a36Sopenharmony_ci 459362306a36Sopenharmony_ci for (i = 0; i < rb->pg_vec_len; i++) { 459462306a36Sopenharmony_ci struct page *page; 459562306a36Sopenharmony_ci void *kaddr = rb->pg_vec[i].buffer; 459662306a36Sopenharmony_ci int pg_num; 459762306a36Sopenharmony_ci 459862306a36Sopenharmony_ci for (pg_num = 0; pg_num < rb->pg_vec_pages; pg_num++) { 459962306a36Sopenharmony_ci page = pgv_to_page(kaddr); 460062306a36Sopenharmony_ci err = vm_insert_page(vma, start, page); 460162306a36Sopenharmony_ci if (unlikely(err)) 460262306a36Sopenharmony_ci goto out; 460362306a36Sopenharmony_ci start += PAGE_SIZE; 460462306a36Sopenharmony_ci kaddr += PAGE_SIZE; 460562306a36Sopenharmony_ci } 460662306a36Sopenharmony_ci } 460762306a36Sopenharmony_ci } 460862306a36Sopenharmony_ci 460962306a36Sopenharmony_ci atomic_long_inc(&po->mapped); 461062306a36Sopenharmony_ci vma->vm_ops = &packet_mmap_ops; 461162306a36Sopenharmony_ci err = 0; 461262306a36Sopenharmony_ci 461362306a36Sopenharmony_ciout: 461462306a36Sopenharmony_ci mutex_unlock(&po->pg_vec_lock); 461562306a36Sopenharmony_ci return err; 461662306a36Sopenharmony_ci} 461762306a36Sopenharmony_ci 461862306a36Sopenharmony_cistatic const struct proto_ops packet_ops_spkt = { 461962306a36Sopenharmony_ci .family = PF_PACKET, 462062306a36Sopenharmony_ci .owner = THIS_MODULE, 462162306a36Sopenharmony_ci .release = packet_release, 462262306a36Sopenharmony_ci .bind = packet_bind_spkt, 462362306a36Sopenharmony_ci .connect = sock_no_connect, 462462306a36Sopenharmony_ci .socketpair = sock_no_socketpair, 462562306a36Sopenharmony_ci .accept = sock_no_accept, 462662306a36Sopenharmony_ci .getname = packet_getname_spkt, 462762306a36Sopenharmony_ci .poll = datagram_poll, 462862306a36Sopenharmony_ci .ioctl = packet_ioctl, 462962306a36Sopenharmony_ci .gettstamp = sock_gettstamp, 463062306a36Sopenharmony_ci .listen = sock_no_listen, 463162306a36Sopenharmony_ci .shutdown = sock_no_shutdown, 463262306a36Sopenharmony_ci .sendmsg = packet_sendmsg_spkt, 463362306a36Sopenharmony_ci .recvmsg = packet_recvmsg, 463462306a36Sopenharmony_ci .mmap = sock_no_mmap, 463562306a36Sopenharmony_ci}; 463662306a36Sopenharmony_ci 463762306a36Sopenharmony_cistatic const struct proto_ops packet_ops = { 463862306a36Sopenharmony_ci .family = PF_PACKET, 463962306a36Sopenharmony_ci .owner = THIS_MODULE, 464062306a36Sopenharmony_ci .release = packet_release, 464162306a36Sopenharmony_ci .bind = packet_bind, 464262306a36Sopenharmony_ci .connect = sock_no_connect, 464362306a36Sopenharmony_ci .socketpair = sock_no_socketpair, 464462306a36Sopenharmony_ci .accept = sock_no_accept, 464562306a36Sopenharmony_ci .getname = packet_getname, 464662306a36Sopenharmony_ci .poll = packet_poll, 464762306a36Sopenharmony_ci .ioctl = packet_ioctl, 464862306a36Sopenharmony_ci .gettstamp = sock_gettstamp, 464962306a36Sopenharmony_ci .listen = sock_no_listen, 465062306a36Sopenharmony_ci .shutdown = sock_no_shutdown, 465162306a36Sopenharmony_ci .setsockopt = packet_setsockopt, 465262306a36Sopenharmony_ci .getsockopt = packet_getsockopt, 465362306a36Sopenharmony_ci .sendmsg = packet_sendmsg, 465462306a36Sopenharmony_ci .recvmsg = packet_recvmsg, 465562306a36Sopenharmony_ci .mmap = packet_mmap, 465662306a36Sopenharmony_ci}; 465762306a36Sopenharmony_ci 465862306a36Sopenharmony_cistatic const struct net_proto_family packet_family_ops = { 465962306a36Sopenharmony_ci .family = PF_PACKET, 466062306a36Sopenharmony_ci .create = packet_create, 466162306a36Sopenharmony_ci .owner = THIS_MODULE, 466262306a36Sopenharmony_ci}; 466362306a36Sopenharmony_ci 466462306a36Sopenharmony_cistatic struct notifier_block packet_netdev_notifier = { 466562306a36Sopenharmony_ci .notifier_call = packet_notifier, 466662306a36Sopenharmony_ci}; 466762306a36Sopenharmony_ci 466862306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS 466962306a36Sopenharmony_ci 467062306a36Sopenharmony_cistatic void *packet_seq_start(struct seq_file *seq, loff_t *pos) 467162306a36Sopenharmony_ci __acquires(RCU) 467262306a36Sopenharmony_ci{ 467362306a36Sopenharmony_ci struct net *net = seq_file_net(seq); 467462306a36Sopenharmony_ci 467562306a36Sopenharmony_ci rcu_read_lock(); 467662306a36Sopenharmony_ci return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); 467762306a36Sopenharmony_ci} 467862306a36Sopenharmony_ci 467962306a36Sopenharmony_cistatic void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) 468062306a36Sopenharmony_ci{ 468162306a36Sopenharmony_ci struct net *net = seq_file_net(seq); 468262306a36Sopenharmony_ci return seq_hlist_next_rcu(v, &net->packet.sklist, pos); 468362306a36Sopenharmony_ci} 468462306a36Sopenharmony_ci 468562306a36Sopenharmony_cistatic void packet_seq_stop(struct seq_file *seq, void *v) 468662306a36Sopenharmony_ci __releases(RCU) 468762306a36Sopenharmony_ci{ 468862306a36Sopenharmony_ci rcu_read_unlock(); 468962306a36Sopenharmony_ci} 469062306a36Sopenharmony_ci 469162306a36Sopenharmony_cistatic int packet_seq_show(struct seq_file *seq, void *v) 469262306a36Sopenharmony_ci{ 469362306a36Sopenharmony_ci if (v == SEQ_START_TOKEN) 469462306a36Sopenharmony_ci seq_printf(seq, 469562306a36Sopenharmony_ci "%*sRefCnt Type Proto Iface R Rmem User Inode\n", 469662306a36Sopenharmony_ci IS_ENABLED(CONFIG_64BIT) ? -17 : -9, "sk"); 469762306a36Sopenharmony_ci else { 469862306a36Sopenharmony_ci struct sock *s = sk_entry(v); 469962306a36Sopenharmony_ci const struct packet_sock *po = pkt_sk(s); 470062306a36Sopenharmony_ci 470162306a36Sopenharmony_ci seq_printf(seq, 470262306a36Sopenharmony_ci "%pK %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n", 470362306a36Sopenharmony_ci s, 470462306a36Sopenharmony_ci refcount_read(&s->sk_refcnt), 470562306a36Sopenharmony_ci s->sk_type, 470662306a36Sopenharmony_ci ntohs(READ_ONCE(po->num)), 470762306a36Sopenharmony_ci READ_ONCE(po->ifindex), 470862306a36Sopenharmony_ci packet_sock_flag(po, PACKET_SOCK_RUNNING), 470962306a36Sopenharmony_ci atomic_read(&s->sk_rmem_alloc), 471062306a36Sopenharmony_ci from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), 471162306a36Sopenharmony_ci sock_i_ino(s)); 471262306a36Sopenharmony_ci } 471362306a36Sopenharmony_ci 471462306a36Sopenharmony_ci return 0; 471562306a36Sopenharmony_ci} 471662306a36Sopenharmony_ci 471762306a36Sopenharmony_cistatic const struct seq_operations packet_seq_ops = { 471862306a36Sopenharmony_ci .start = packet_seq_start, 471962306a36Sopenharmony_ci .next = packet_seq_next, 472062306a36Sopenharmony_ci .stop = packet_seq_stop, 472162306a36Sopenharmony_ci .show = packet_seq_show, 472262306a36Sopenharmony_ci}; 472362306a36Sopenharmony_ci#endif 472462306a36Sopenharmony_ci 472562306a36Sopenharmony_cistatic int __net_init packet_net_init(struct net *net) 472662306a36Sopenharmony_ci{ 472762306a36Sopenharmony_ci mutex_init(&net->packet.sklist_lock); 472862306a36Sopenharmony_ci INIT_HLIST_HEAD(&net->packet.sklist); 472962306a36Sopenharmony_ci 473062306a36Sopenharmony_ci#ifdef CONFIG_PROC_FS 473162306a36Sopenharmony_ci if (!proc_create_net("packet", 0, net->proc_net, &packet_seq_ops, 473262306a36Sopenharmony_ci sizeof(struct seq_net_private))) 473362306a36Sopenharmony_ci return -ENOMEM; 473462306a36Sopenharmony_ci#endif /* CONFIG_PROC_FS */ 473562306a36Sopenharmony_ci 473662306a36Sopenharmony_ci return 0; 473762306a36Sopenharmony_ci} 473862306a36Sopenharmony_ci 473962306a36Sopenharmony_cistatic void __net_exit packet_net_exit(struct net *net) 474062306a36Sopenharmony_ci{ 474162306a36Sopenharmony_ci remove_proc_entry("packet", net->proc_net); 474262306a36Sopenharmony_ci WARN_ON_ONCE(!hlist_empty(&net->packet.sklist)); 474362306a36Sopenharmony_ci} 474462306a36Sopenharmony_ci 474562306a36Sopenharmony_cistatic struct pernet_operations packet_net_ops = { 474662306a36Sopenharmony_ci .init = packet_net_init, 474762306a36Sopenharmony_ci .exit = packet_net_exit, 474862306a36Sopenharmony_ci}; 474962306a36Sopenharmony_ci 475062306a36Sopenharmony_ci 475162306a36Sopenharmony_cistatic void __exit packet_exit(void) 475262306a36Sopenharmony_ci{ 475362306a36Sopenharmony_ci sock_unregister(PF_PACKET); 475462306a36Sopenharmony_ci proto_unregister(&packet_proto); 475562306a36Sopenharmony_ci unregister_netdevice_notifier(&packet_netdev_notifier); 475662306a36Sopenharmony_ci unregister_pernet_subsys(&packet_net_ops); 475762306a36Sopenharmony_ci} 475862306a36Sopenharmony_ci 475962306a36Sopenharmony_cistatic int __init packet_init(void) 476062306a36Sopenharmony_ci{ 476162306a36Sopenharmony_ci int rc; 476262306a36Sopenharmony_ci 476362306a36Sopenharmony_ci rc = register_pernet_subsys(&packet_net_ops); 476462306a36Sopenharmony_ci if (rc) 476562306a36Sopenharmony_ci goto out; 476662306a36Sopenharmony_ci rc = register_netdevice_notifier(&packet_netdev_notifier); 476762306a36Sopenharmony_ci if (rc) 476862306a36Sopenharmony_ci goto out_pernet; 476962306a36Sopenharmony_ci rc = proto_register(&packet_proto, 0); 477062306a36Sopenharmony_ci if (rc) 477162306a36Sopenharmony_ci goto out_notifier; 477262306a36Sopenharmony_ci rc = sock_register(&packet_family_ops); 477362306a36Sopenharmony_ci if (rc) 477462306a36Sopenharmony_ci goto out_proto; 477562306a36Sopenharmony_ci 477662306a36Sopenharmony_ci return 0; 477762306a36Sopenharmony_ci 477862306a36Sopenharmony_ciout_proto: 477962306a36Sopenharmony_ci proto_unregister(&packet_proto); 478062306a36Sopenharmony_ciout_notifier: 478162306a36Sopenharmony_ci unregister_netdevice_notifier(&packet_netdev_notifier); 478262306a36Sopenharmony_ciout_pernet: 478362306a36Sopenharmony_ci unregister_pernet_subsys(&packet_net_ops); 478462306a36Sopenharmony_ciout: 478562306a36Sopenharmony_ci return rc; 478662306a36Sopenharmony_ci} 478762306a36Sopenharmony_ci 478862306a36Sopenharmony_cimodule_init(packet_init); 478962306a36Sopenharmony_cimodule_exit(packet_exit); 479062306a36Sopenharmony_ciMODULE_LICENSE("GPL"); 479162306a36Sopenharmony_ciMODULE_ALIAS_NETPROTO(PF_PACKET); 4792