1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * INET An implementation of the TCP/IP protocol suite for the LINUX
4 * operating system. INET is implemented using the BSD Socket
5 * interface as the means of communication with the user level.
6 *
7 * Generic socket support routines. Memory allocators, socket lock/release
8 * handler for protocols to use and generic option handler.
9 *
10 * Authors: Ross Biro
11 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Florian La Roche, <flla@stud.uni-sb.de>
13 * Alan Cox, <A.Cox@swansea.ac.uk>
14 *
15 * Fixes:
16 * Alan Cox : Numerous verify_area() problems
17 * Alan Cox : Connecting on a connecting socket
18 * now returns an error for tcp.
19 * Alan Cox : sock->protocol is set correctly.
20 * and is not sometimes left as 0.
21 * Alan Cox : connect handles icmp errors on a
22 * connect properly. Unfortunately there
23 * is a restart syscall nasty there. I
24 * can't match BSD without hacking the C
25 * library. Ideas urgently sought!
26 * Alan Cox : Disallow bind() to addresses that are
27 * not ours - especially broadcast ones!!
28 * Alan Cox : Socket 1024 _IS_ ok for users. (fencepost)
29 * Alan Cox : sock_wfree/sock_rfree don't destroy sockets,
30 * instead they leave that for the DESTROY timer.
31 * Alan Cox : Clean up error flag in accept
32 * Alan Cox : TCP ack handling is buggy, the DESTROY timer
33 * was buggy. Put a remove_sock() in the handler
34 * for memory when we hit 0. Also altered the timer
35 * code. The ACK stuff can wait and needs major
36 * TCP layer surgery.
37 * Alan Cox : Fixed TCP ack bug, removed remove sock
38 * and fixed timer/inet_bh race.
39 * Alan Cox : Added zapped flag for TCP
40 * Alan Cox : Move kfree_skb into skbuff.c and tidied up surplus code
41 * Alan Cox : for new sk_buff allocations wmalloc/rmalloc now call alloc_skb
42 * Alan Cox : kfree_s calls now are kfree_skbmem so we can track skb resources
43 * Alan Cox : Supports socket option broadcast now as does udp. Packet and raw need fixing.
44 * Alan Cox : Added RCVBUF,SNDBUF size setting. It suddenly occurred to me how easy it was so...
45 * Rick Sladkey : Relaxed UDP rules for matching packets.
46 * C.E.Hawkins : IFF_PROMISC/SIOCGHWADDR support
47 * Pauline Middelink : identd support
48 * Alan Cox : Fixed connect() taking signals I think.
49 * Alan Cox : SO_LINGER supported
50 * Alan Cox : Error reporting fixes
51 * Anonymous : inet_create tidied up (sk->reuse setting)
52 * Alan Cox : inet sockets don't set sk->type!
53 * Alan Cox : Split socket option code
54 * Alan Cox : Callbacks
55 * Alan Cox : Nagle flag for Charles & Johannes stuff
56 * Alex : Removed restriction on inet fioctl
57 * Alan Cox : Splitting INET from NET core
58 * Alan Cox : Fixed bogus SO_TYPE handling in getsockopt()
59 * Adam Caldwell : Missing return in SO_DONTROUTE/SO_DEBUG code
60 * Alan Cox : Split IP from generic code
61 * Alan Cox : New kfree_skbmem()
62 * Alan Cox : Make SO_DEBUG superuser only.
63 * Alan Cox : Allow anyone to clear SO_DEBUG
64 * (compatibility fix)
65 * Alan Cox : Added optimistic memory grabbing for AF_UNIX throughput.
66 * Alan Cox : Allocator for a socket is settable.
67 * Alan Cox : SO_ERROR includes soft errors.
68 * Alan Cox : Allow NULL arguments on some SO_ opts
69 * Alan Cox : Generic socket allocation to make hooks
70 * easier (suggested by Craig Metz).
71 * Michael Pall : SO_ERROR returns positive errno again
72 * Steve Whitehouse: Added default destructor to free
73 * protocol private data.
74 * Steve Whitehouse: Added various other default routines
75 * common to several socket families.
76 * Chris Evans : Call suser() check last on F_SETOWN
77 * Jay Schulist : Added SO_ATTACH_FILTER and SO_DETACH_FILTER.
78 * Andi Kleen : Add sock_kmalloc()/sock_kfree_s()
79 * Andi Kleen : Fix write_space callback
80 * Chris Evans : Security fixes - signedness again
81 * Arnaldo C. Melo : cleanups, use skb_queue_purge
82 *
83 * To Fix:
84 */
85
86 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
87
88 #include <asm/unaligned.h>
89 #include <linux/capability.h>
90 #include <linux/errno.h>
91 #include <linux/errqueue.h>
92 #include <linux/types.h>
93 #include <linux/socket.h>
94 #include <linux/in.h>
95 #include <linux/kernel.h>
96 #include <linux/module.h>
97 #include <linux/proc_fs.h>
98 #include <linux/seq_file.h>
99 #include <linux/sched.h>
100 #include <linux/sched/mm.h>
101 #include <linux/timer.h>
102 #include <linux/string.h>
103 #include <linux/sockios.h>
104 #include <linux/net.h>
105 #include <linux/mm.h>
106 #include <linux/slab.h>
107 #include <linux/interrupt.h>
108 #include <linux/poll.h>
109 #include <linux/tcp.h>
110 #include <linux/init.h>
111 #include <linux/highmem.h>
112 #include <linux/user_namespace.h>
113 #include <linux/static_key.h>
114 #include <linux/memcontrol.h>
115 #include <linux/prefetch.h>
116 #include <linux/compat.h>
117
118 #include <linux/uaccess.h>
119
120 #include <linux/netdevice.h>
121 #include <net/protocol.h>
122 #include <linux/skbuff.h>
123 #include <net/net_namespace.h>
124 #include <net/request_sock.h>
125 #include <net/sock.h>
126 #include <linux/net_tstamp.h>
127 #include <net/xfrm.h>
128 #include <linux/ipsec.h>
129 #include <net/cls_cgroup.h>
130 #include <net/netprio_cgroup.h>
131 #include <linux/sock_diag.h>
132
133 #include <linux/filter.h>
134 #include <net/sock_reuseport.h>
135 #include <net/bpf_sk_storage.h>
136
137 #include <trace/events/sock.h>
138
139 #include <net/tcp.h>
140 #include <net/busy_poll.h>
141
142 static DEFINE_MUTEX(proto_list_mutex);
143 static LIST_HEAD(proto_list);
144
145 static void sock_inuse_add(struct net *net, int val);
146
147 /**
148 * sk_ns_capable - General socket capability test
149 * @sk: Socket to use a capability on or through
150 * @user_ns: The user namespace of the capability to use
151 * @cap: The capability to use
152 *
153 * Test to see if the opener of the socket had when the socket was
154 * created and the current process has the capability @cap in the user
155 * namespace @user_ns.
156 */
sk_ns_capable(const struct sock *sk, struct user_namespace *user_ns, int cap)157 bool sk_ns_capable(const struct sock *sk,
158 struct user_namespace *user_ns, int cap)
159 {
160 return file_ns_capable(sk->sk_socket->file, user_ns, cap) &&
161 ns_capable(user_ns, cap);
162 }
163 EXPORT_SYMBOL(sk_ns_capable);
164
165 /**
166 * sk_capable - Socket global capability test
167 * @sk: Socket to use a capability on or through
168 * @cap: The global capability to use
169 *
170 * Test to see if the opener of the socket had when the socket was
171 * created and the current process has the capability @cap in all user
172 * namespaces.
173 */
sk_capable(const struct sock *sk, int cap)174 bool sk_capable(const struct sock *sk, int cap)
175 {
176 return sk_ns_capable(sk, &init_user_ns, cap);
177 }
178 EXPORT_SYMBOL(sk_capable);
179
180 /**
181 * sk_net_capable - Network namespace socket capability test
182 * @sk: Socket to use a capability on or through
183 * @cap: The capability to use
184 *
185 * Test to see if the opener of the socket had when the socket was created
186 * and the current process has the capability @cap over the network namespace
187 * the socket is a member of.
188 */
sk_net_capable(const struct sock *sk, int cap)189 bool sk_net_capable(const struct sock *sk, int cap)
190 {
191 return sk_ns_capable(sk, sock_net(sk)->user_ns, cap);
192 }
193 EXPORT_SYMBOL(sk_net_capable);
194
195 /*
196 * Each address family might have different locking rules, so we have
197 * one slock key per address family and separate keys for internal and
198 * userspace sockets.
199 */
200 static struct lock_class_key af_family_keys[AF_MAX];
201 static struct lock_class_key af_family_kern_keys[AF_MAX];
202 static struct lock_class_key af_family_slock_keys[AF_MAX];
203 static struct lock_class_key af_family_kern_slock_keys[AF_MAX];
204
205 /*
206 * Make lock validator output more readable. (we pre-construct these
207 * strings build-time, so that runtime initialization of socket
208 * locks is fast):
209 */
210
211 #define _sock_locks(x) \
212 x "AF_UNSPEC", x "AF_UNIX" , x "AF_INET" , \
213 x "AF_AX25" , x "AF_IPX" , x "AF_APPLETALK", \
214 x "AF_NETROM", x "AF_BRIDGE" , x "AF_ATMPVC" , \
215 x "AF_X25" , x "AF_INET6" , x "AF_ROSE" , \
216 x "AF_DECnet", x "AF_NETBEUI" , x "AF_SECURITY" , \
217 x "AF_KEY" , x "AF_NETLINK" , x "AF_PACKET" , \
218 x "AF_ASH" , x "AF_ECONET" , x "AF_ATMSVC" , \
219 x "AF_RDS" , x "AF_SNA" , x "AF_IRDA" , \
220 x "AF_PPPOX" , x "AF_WANPIPE" , x "AF_LLC" , \
221 x "27" , x "28" , x "AF_CAN" , \
222 x "AF_TIPC" , x "AF_BLUETOOTH", x "IUCV" , \
223 x "AF_RXRPC" , x "AF_ISDN" , x "AF_PHONET" , \
224 x "AF_IEEE802154", x "AF_CAIF" , x "AF_ALG" , \
225 x "AF_NFC" , x "AF_VSOCK" , x "AF_KCM" , \
226 x "AF_QIPCRTR", x "AF_SMC" , x "AF_XDP" , \
227 x "AF_MAX"
228
229 static const char *const af_family_key_strings[AF_MAX+1] = {
230 _sock_locks("sk_lock-")
231 };
232 static const char *const af_family_slock_key_strings[AF_MAX+1] = {
233 _sock_locks("slock-")
234 };
235 static const char *const af_family_clock_key_strings[AF_MAX+1] = {
236 _sock_locks("clock-")
237 };
238
239 static const char *const af_family_kern_key_strings[AF_MAX+1] = {
240 _sock_locks("k-sk_lock-")
241 };
242 static const char *const af_family_kern_slock_key_strings[AF_MAX+1] = {
243 _sock_locks("k-slock-")
244 };
245 static const char *const af_family_kern_clock_key_strings[AF_MAX+1] = {
246 _sock_locks("k-clock-")
247 };
248 static const char *const af_family_rlock_key_strings[AF_MAX+1] = {
249 _sock_locks("rlock-")
250 };
251 static const char *const af_family_wlock_key_strings[AF_MAX+1] = {
252 _sock_locks("wlock-")
253 };
254 static const char *const af_family_elock_key_strings[AF_MAX+1] = {
255 _sock_locks("elock-")
256 };
257
258 /*
259 * sk_callback_lock and sk queues locking rules are per-address-family,
260 * so split the lock classes by using a per-AF key:
261 */
262 static struct lock_class_key af_callback_keys[AF_MAX];
263 static struct lock_class_key af_rlock_keys[AF_MAX];
264 static struct lock_class_key af_wlock_keys[AF_MAX];
265 static struct lock_class_key af_elock_keys[AF_MAX];
266 static struct lock_class_key af_kern_callback_keys[AF_MAX];
267
268 /* Run time adjustable parameters. */
269 __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX;
270 EXPORT_SYMBOL(sysctl_wmem_max);
271 __u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX;
272 EXPORT_SYMBOL(sysctl_rmem_max);
273 __u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX;
274 __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
275
276 /* Maximal space eaten by iovec or ancillary data plus some space */
277 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
278 EXPORT_SYMBOL(sysctl_optmem_max);
279
280 int sysctl_tstamp_allow_data __read_mostly = 1;
281
282 DEFINE_STATIC_KEY_FALSE(memalloc_socks_key);
283 EXPORT_SYMBOL_GPL(memalloc_socks_key);
284
285 /**
286 * sk_set_memalloc - sets %SOCK_MEMALLOC
287 * @sk: socket to set it on
288 *
289 * Set %SOCK_MEMALLOC on a socket for access to emergency reserves.
290 * It's the responsibility of the admin to adjust min_free_kbytes
291 * to meet the requirements
292 */
sk_set_memalloc(struct sock *sk)293 void sk_set_memalloc(struct sock *sk)
294 {
295 sock_set_flag(sk, SOCK_MEMALLOC);
296 sk->sk_allocation |= __GFP_MEMALLOC;
297 static_branch_inc(&memalloc_socks_key);
298 }
299 EXPORT_SYMBOL_GPL(sk_set_memalloc);
300
sk_clear_memalloc(struct sock *sk)301 void sk_clear_memalloc(struct sock *sk)
302 {
303 sock_reset_flag(sk, SOCK_MEMALLOC);
304 sk->sk_allocation &= ~__GFP_MEMALLOC;
305 static_branch_dec(&memalloc_socks_key);
306
307 /*
308 * SOCK_MEMALLOC is allowed to ignore rmem limits to ensure forward
309 * progress of swapping. SOCK_MEMALLOC may be cleared while
310 * it has rmem allocations due to the last swapfile being deactivated
311 * but there is a risk that the socket is unusable due to exceeding
312 * the rmem limits. Reclaim the reserves and obey rmem limits again.
313 */
314 sk_mem_reclaim(sk);
315 }
316 EXPORT_SYMBOL_GPL(sk_clear_memalloc);
317
__sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)318 int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)
319 {
320 int ret;
321 unsigned int noreclaim_flag;
322
323 /* these should have been dropped before queueing */
324 BUG_ON(!sock_flag(sk, SOCK_MEMALLOC));
325
326 noreclaim_flag = memalloc_noreclaim_save();
327 ret = sk->sk_backlog_rcv(sk, skb);
328 memalloc_noreclaim_restore(noreclaim_flag);
329
330 return ret;
331 }
332 EXPORT_SYMBOL(__sk_backlog_rcv);
333
sock_get_timeout(long timeo, void *optval, bool old_timeval)334 static int sock_get_timeout(long timeo, void *optval, bool old_timeval)
335 {
336 struct __kernel_sock_timeval tv;
337
338 if (timeo == MAX_SCHEDULE_TIMEOUT) {
339 tv.tv_sec = 0;
340 tv.tv_usec = 0;
341 } else {
342 tv.tv_sec = timeo / HZ;
343 tv.tv_usec = ((timeo % HZ) * USEC_PER_SEC) / HZ;
344 }
345
346 if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
347 struct old_timeval32 tv32 = { tv.tv_sec, tv.tv_usec };
348 *(struct old_timeval32 *)optval = tv32;
349 return sizeof(tv32);
350 }
351
352 if (old_timeval) {
353 struct __kernel_old_timeval old_tv;
354 old_tv.tv_sec = tv.tv_sec;
355 old_tv.tv_usec = tv.tv_usec;
356 *(struct __kernel_old_timeval *)optval = old_tv;
357 return sizeof(old_tv);
358 }
359
360 *(struct __kernel_sock_timeval *)optval = tv;
361 return sizeof(tv);
362 }
363
sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen, bool old_timeval)364 static int sock_set_timeout(long *timeo_p, sockptr_t optval, int optlen,
365 bool old_timeval)
366 {
367 struct __kernel_sock_timeval tv;
368
369 if (old_timeval && in_compat_syscall() && !COMPAT_USE_64BIT_TIME) {
370 struct old_timeval32 tv32;
371
372 if (optlen < sizeof(tv32))
373 return -EINVAL;
374
375 if (copy_from_sockptr(&tv32, optval, sizeof(tv32)))
376 return -EFAULT;
377 tv.tv_sec = tv32.tv_sec;
378 tv.tv_usec = tv32.tv_usec;
379 } else if (old_timeval) {
380 struct __kernel_old_timeval old_tv;
381
382 if (optlen < sizeof(old_tv))
383 return -EINVAL;
384 if (copy_from_sockptr(&old_tv, optval, sizeof(old_tv)))
385 return -EFAULT;
386 tv.tv_sec = old_tv.tv_sec;
387 tv.tv_usec = old_tv.tv_usec;
388 } else {
389 if (optlen < sizeof(tv))
390 return -EINVAL;
391 if (copy_from_sockptr(&tv, optval, sizeof(tv)))
392 return -EFAULT;
393 }
394 if (tv.tv_usec < 0 || tv.tv_usec >= USEC_PER_SEC)
395 return -EDOM;
396
397 if (tv.tv_sec < 0) {
398 static int warned __read_mostly;
399
400 *timeo_p = 0;
401 if (warned < 10 && net_ratelimit()) {
402 warned++;
403 pr_info("%s: `%s' (pid %d) tries to set negative timeout\n",
404 __func__, current->comm, task_pid_nr(current));
405 }
406 return 0;
407 }
408 *timeo_p = MAX_SCHEDULE_TIMEOUT;
409 if (tv.tv_sec == 0 && tv.tv_usec == 0)
410 return 0;
411 if (tv.tv_sec < (MAX_SCHEDULE_TIMEOUT / HZ - 1))
412 *timeo_p = tv.tv_sec * HZ + DIV_ROUND_UP((unsigned long)tv.tv_usec, USEC_PER_SEC / HZ);
413 return 0;
414 }
415
sock_needs_netstamp(const struct sock *sk)416 static bool sock_needs_netstamp(const struct sock *sk)
417 {
418 switch (sk->sk_family) {
419 case AF_UNSPEC:
420 case AF_UNIX:
421 return false;
422 default:
423 return true;
424 }
425 }
426
sock_disable_timestamp(struct sock *sk, unsigned long flags)427 static void sock_disable_timestamp(struct sock *sk, unsigned long flags)
428 {
429 if (sk->sk_flags & flags) {
430 sk->sk_flags &= ~flags;
431 if (sock_needs_netstamp(sk) &&
432 !(sk->sk_flags & SK_FLAGS_TIMESTAMP))
433 net_disable_timestamp();
434 }
435 }
436
437
__sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)438 int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
439 {
440 unsigned long flags;
441 struct sk_buff_head *list = &sk->sk_receive_queue;
442
443 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) {
444 atomic_inc(&sk->sk_drops);
445 trace_sock_rcvqueue_full(sk, skb);
446 return -ENOMEM;
447 }
448
449 if (!sk_rmem_schedule(sk, skb, skb->truesize)) {
450 atomic_inc(&sk->sk_drops);
451 return -ENOBUFS;
452 }
453
454 skb->dev = NULL;
455 skb_set_owner_r(skb, sk);
456
457 /* we escape from rcu protected region, make sure we dont leak
458 * a norefcounted dst
459 */
460 skb_dst_force(skb);
461
462 spin_lock_irqsave(&list->lock, flags);
463 sock_skb_set_dropcount(sk, skb);
464 __skb_queue_tail(list, skb);
465 spin_unlock_irqrestore(&list->lock, flags);
466
467 if (!sock_flag(sk, SOCK_DEAD))
468 sk->sk_data_ready(sk);
469 return 0;
470 }
471 EXPORT_SYMBOL(__sock_queue_rcv_skb);
472
sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)473 int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
474 {
475 int err;
476
477 err = sk_filter(sk, skb);
478 if (err)
479 return err;
480
481 return __sock_queue_rcv_skb(sk, skb);
482 }
483 EXPORT_SYMBOL(sock_queue_rcv_skb);
484
__sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, unsigned int trim_cap, bool refcounted)485 int __sk_receive_skb(struct sock *sk, struct sk_buff *skb,
486 const int nested, unsigned int trim_cap, bool refcounted)
487 {
488 int rc = NET_RX_SUCCESS;
489
490 if (sk_filter_trim_cap(sk, skb, trim_cap))
491 goto discard_and_relse;
492
493 skb->dev = NULL;
494
495 if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) {
496 atomic_inc(&sk->sk_drops);
497 goto discard_and_relse;
498 }
499 if (nested)
500 bh_lock_sock_nested(sk);
501 else
502 bh_lock_sock(sk);
503 if (!sock_owned_by_user(sk)) {
504 /*
505 * trylock + unlock semantics:
506 */
507 mutex_acquire(&sk->sk_lock.dep_map, 0, 1, _RET_IP_);
508
509 rc = sk_backlog_rcv(sk, skb);
510
511 mutex_release(&sk->sk_lock.dep_map, _RET_IP_);
512 } else if (sk_add_backlog(sk, skb, READ_ONCE(sk->sk_rcvbuf))) {
513 bh_unlock_sock(sk);
514 atomic_inc(&sk->sk_drops);
515 goto discard_and_relse;
516 }
517
518 bh_unlock_sock(sk);
519 out:
520 if (refcounted)
521 sock_put(sk);
522 return rc;
523 discard_and_relse:
524 kfree_skb(skb);
525 goto out;
526 }
527 EXPORT_SYMBOL(__sk_receive_skb);
528
__sk_dst_check(struct sock *sk, u32 cookie)529 struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
530 {
531 struct dst_entry *dst = __sk_dst_get(sk);
532
533 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
534 sk_tx_queue_clear(sk);
535 WRITE_ONCE(sk->sk_dst_pending_confirm, 0);
536 RCU_INIT_POINTER(sk->sk_dst_cache, NULL);
537 dst_release(dst);
538 return NULL;
539 }
540
541 return dst;
542 }
543 EXPORT_SYMBOL(__sk_dst_check);
544
sk_dst_check(struct sock *sk, u32 cookie)545 struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
546 {
547 struct dst_entry *dst = sk_dst_get(sk);
548
549 if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
550 sk_dst_reset(sk);
551 dst_release(dst);
552 return NULL;
553 }
554
555 return dst;
556 }
557 EXPORT_SYMBOL(sk_dst_check);
558
sock_bindtoindex_locked(struct sock *sk, int ifindex)559 static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
560 {
561 int ret = -ENOPROTOOPT;
562 #ifdef CONFIG_NETDEVICES
563 struct net *net = sock_net(sk);
564
565 /* Sorry... */
566 ret = -EPERM;
567 if (sk->sk_bound_dev_if && !ns_capable(net->user_ns, CAP_NET_RAW))
568 goto out;
569
570 ret = -EINVAL;
571 if (ifindex < 0)
572 goto out;
573
574 sk->sk_bound_dev_if = ifindex;
575 if (sk->sk_prot->rehash)
576 sk->sk_prot->rehash(sk);
577 sk_dst_reset(sk);
578
579 ret = 0;
580
581 out:
582 #endif
583
584 return ret;
585 }
586
sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)587 int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
588 {
589 int ret;
590
591 if (lock_sk)
592 lock_sock(sk);
593 ret = sock_bindtoindex_locked(sk, ifindex);
594 if (lock_sk)
595 release_sock(sk);
596
597 return ret;
598 }
599 EXPORT_SYMBOL(sock_bindtoindex);
600
sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)601 static int sock_setbindtodevice(struct sock *sk, sockptr_t optval, int optlen)
602 {
603 int ret = -ENOPROTOOPT;
604 #ifdef CONFIG_NETDEVICES
605 struct net *net = sock_net(sk);
606 char devname[IFNAMSIZ];
607 int index;
608
609 ret = -EINVAL;
610 if (optlen < 0)
611 goto out;
612
613 /* Bind this socket to a particular device like "eth0",
614 * as specified in the passed interface name. If the
615 * name is "" or the option length is zero the socket
616 * is not bound.
617 */
618 if (optlen > IFNAMSIZ - 1)
619 optlen = IFNAMSIZ - 1;
620 memset(devname, 0, sizeof(devname));
621
622 ret = -EFAULT;
623 if (copy_from_sockptr(devname, optval, optlen))
624 goto out;
625
626 index = 0;
627 if (devname[0] != '\0') {
628 struct net_device *dev;
629
630 rcu_read_lock();
631 dev = dev_get_by_name_rcu(net, devname);
632 if (dev)
633 index = dev->ifindex;
634 rcu_read_unlock();
635 ret = -ENODEV;
636 if (!dev)
637 goto out;
638 }
639
640 return sock_bindtoindex(sk, index, true);
641 out:
642 #endif
643
644 return ret;
645 }
646
sock_getbindtodevice(struct sock *sk, char __user *optval, int __user *optlen, int len)647 static int sock_getbindtodevice(struct sock *sk, char __user *optval,
648 int __user *optlen, int len)
649 {
650 int ret = -ENOPROTOOPT;
651 #ifdef CONFIG_NETDEVICES
652 struct net *net = sock_net(sk);
653 char devname[IFNAMSIZ];
654
655 if (sk->sk_bound_dev_if == 0) {
656 len = 0;
657 goto zero;
658 }
659
660 ret = -EINVAL;
661 if (len < IFNAMSIZ)
662 goto out;
663
664 ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
665 if (ret)
666 goto out;
667
668 len = strlen(devname) + 1;
669
670 ret = -EFAULT;
671 if (copy_to_user(optval, devname, len))
672 goto out;
673
674 zero:
675 ret = -EFAULT;
676 if (put_user(len, optlen))
677 goto out;
678
679 ret = 0;
680
681 out:
682 #endif
683
684 return ret;
685 }
686
sk_mc_loop(struct sock *sk)687 bool sk_mc_loop(struct sock *sk)
688 {
689 if (dev_recursion_level())
690 return false;
691 if (!sk)
692 return true;
693 /* IPV6_ADDRFORM can change sk->sk_family under us. */
694 switch (READ_ONCE(sk->sk_family)) {
695 case AF_INET:
696 return inet_sk(sk)->mc_loop;
697 #if IS_ENABLED(CONFIG_IPV6)
698 case AF_INET6:
699 return inet6_sk(sk)->mc_loop;
700 #endif
701 }
702 WARN_ON_ONCE(1);
703 return true;
704 }
705 EXPORT_SYMBOL(sk_mc_loop);
706
sock_set_reuseaddr(struct sock *sk)707 void sock_set_reuseaddr(struct sock *sk)
708 {
709 lock_sock(sk);
710 sk->sk_reuse = SK_CAN_REUSE;
711 release_sock(sk);
712 }
713 EXPORT_SYMBOL(sock_set_reuseaddr);
714
sock_set_reuseport(struct sock *sk)715 void sock_set_reuseport(struct sock *sk)
716 {
717 lock_sock(sk);
718 sk->sk_reuseport = true;
719 release_sock(sk);
720 }
721 EXPORT_SYMBOL(sock_set_reuseport);
722
sock_no_linger(struct sock *sk)723 void sock_no_linger(struct sock *sk)
724 {
725 lock_sock(sk);
726 sk->sk_lingertime = 0;
727 sock_set_flag(sk, SOCK_LINGER);
728 release_sock(sk);
729 }
730 EXPORT_SYMBOL(sock_no_linger);
731
sock_set_priority(struct sock *sk, u32 priority)732 void sock_set_priority(struct sock *sk, u32 priority)
733 {
734 lock_sock(sk);
735 sk->sk_priority = priority;
736 release_sock(sk);
737 }
738 EXPORT_SYMBOL(sock_set_priority);
739
sock_set_sndtimeo(struct sock *sk, s64 secs)740 void sock_set_sndtimeo(struct sock *sk, s64 secs)
741 {
742 lock_sock(sk);
743 if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
744 sk->sk_sndtimeo = secs * HZ;
745 else
746 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
747 release_sock(sk);
748 }
749 EXPORT_SYMBOL(sock_set_sndtimeo);
750
__sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)751 static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
752 {
753 if (val) {
754 sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
755 sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
756 sock_set_flag(sk, SOCK_RCVTSTAMP);
757 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
758 } else {
759 sock_reset_flag(sk, SOCK_RCVTSTAMP);
760 sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
761 }
762 }
763
sock_enable_timestamps(struct sock *sk)764 void sock_enable_timestamps(struct sock *sk)
765 {
766 lock_sock(sk);
767 __sock_set_timestamps(sk, true, false, true);
768 release_sock(sk);
769 }
770 EXPORT_SYMBOL(sock_enable_timestamps);
771
sock_set_keepalive(struct sock *sk)772 void sock_set_keepalive(struct sock *sk)
773 {
774 lock_sock(sk);
775 if (sk->sk_prot->keepalive)
776 sk->sk_prot->keepalive(sk, true);
777 sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
778 release_sock(sk);
779 }
780 EXPORT_SYMBOL(sock_set_keepalive);
781
__sock_set_rcvbuf(struct sock *sk, int val)782 static void __sock_set_rcvbuf(struct sock *sk, int val)
783 {
784 /* Ensure val * 2 fits into an int, to prevent max_t() from treating it
785 * as a negative value.
786 */
787 val = min_t(int, val, INT_MAX / 2);
788 sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
789
790 /* We double it on the way in to account for "struct sk_buff" etc.
791 * overhead. Applications assume that the SO_RCVBUF setting they make
792 * will allow that much actual data to be received on that socket.
793 *
794 * Applications are unaware that "struct sk_buff" and other overheads
795 * allocate from the receive buffer during socket buffer allocation.
796 *
797 * And after considering the possible alternatives, returning the value
798 * we actually used in getsockopt is the most desirable behavior.
799 */
800 WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
801 }
802
sock_set_rcvbuf(struct sock *sk, int val)803 void sock_set_rcvbuf(struct sock *sk, int val)
804 {
805 lock_sock(sk);
806 __sock_set_rcvbuf(sk, val);
807 release_sock(sk);
808 }
809 EXPORT_SYMBOL(sock_set_rcvbuf);
810
__sock_set_mark(struct sock *sk, u32 val)811 static void __sock_set_mark(struct sock *sk, u32 val)
812 {
813 if (val != sk->sk_mark) {
814 sk->sk_mark = val;
815 sk_dst_reset(sk);
816 }
817 }
818
sock_set_mark(struct sock *sk, u32 val)819 void sock_set_mark(struct sock *sk, u32 val)
820 {
821 lock_sock(sk);
822 __sock_set_mark(sk, val);
823 release_sock(sk);
824 }
825 EXPORT_SYMBOL(sock_set_mark);
826
827 /*
828 * This is meant for all protocols to use and covers goings on
829 * at the socket level. Everything here is generic.
830 */
831
sock_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen)832 int sock_setsockopt(struct socket *sock, int level, int optname,
833 sockptr_t optval, unsigned int optlen)
834 {
835 struct sock_txtime sk_txtime;
836 struct sock *sk = sock->sk;
837 int val;
838 int valbool;
839 struct linger ling;
840 int ret = 0;
841
842 /*
843 * Options without arguments
844 */
845
846 if (optname == SO_BINDTODEVICE)
847 return sock_setbindtodevice(sk, optval, optlen);
848
849 if (optlen < sizeof(int))
850 return -EINVAL;
851
852 if (copy_from_sockptr(&val, optval, sizeof(val)))
853 return -EFAULT;
854
855 valbool = val ? 1 : 0;
856
857 lock_sock(sk);
858
859 switch (optname) {
860 case SO_DEBUG:
861 if (val && !capable(CAP_NET_ADMIN))
862 ret = -EACCES;
863 else
864 sock_valbool_flag(sk, SOCK_DBG, valbool);
865 break;
866 case SO_REUSEADDR:
867 sk->sk_reuse = (valbool ? SK_CAN_REUSE : SK_NO_REUSE);
868 break;
869 case SO_REUSEPORT:
870 sk->sk_reuseport = valbool;
871 break;
872 case SO_TYPE:
873 case SO_PROTOCOL:
874 case SO_DOMAIN:
875 case SO_ERROR:
876 ret = -ENOPROTOOPT;
877 break;
878 case SO_DONTROUTE:
879 sock_valbool_flag(sk, SOCK_LOCALROUTE, valbool);
880 sk_dst_reset(sk);
881 break;
882 case SO_BROADCAST:
883 sock_valbool_flag(sk, SOCK_BROADCAST, valbool);
884 break;
885 case SO_SNDBUF:
886 /* Don't error on this BSD doesn't and if you think
887 * about it this is right. Otherwise apps have to
888 * play 'guess the biggest size' games. RCVBUF/SNDBUF
889 * are treated in BSD as hints
890 */
891 val = min_t(u32, val, READ_ONCE(sysctl_wmem_max));
892 set_sndbuf:
893 /* Ensure val * 2 fits into an int, to prevent max_t()
894 * from treating it as a negative value.
895 */
896 val = min_t(int, val, INT_MAX / 2);
897 sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
898 WRITE_ONCE(sk->sk_sndbuf,
899 max_t(int, val * 2, SOCK_MIN_SNDBUF));
900 /* Wake up sending tasks if we upped the value. */
901 sk->sk_write_space(sk);
902 break;
903
904 case SO_SNDBUFFORCE:
905 if (!capable(CAP_NET_ADMIN)) {
906 ret = -EPERM;
907 break;
908 }
909
910 /* No negative values (to prevent underflow, as val will be
911 * multiplied by 2).
912 */
913 if (val < 0)
914 val = 0;
915 goto set_sndbuf;
916
917 case SO_RCVBUF:
918 /* Don't error on this BSD doesn't and if you think
919 * about it this is right. Otherwise apps have to
920 * play 'guess the biggest size' games. RCVBUF/SNDBUF
921 * are treated in BSD as hints
922 */
923 __sock_set_rcvbuf(sk, min_t(u32, val, READ_ONCE(sysctl_rmem_max)));
924 break;
925
926 case SO_RCVBUFFORCE:
927 if (!capable(CAP_NET_ADMIN)) {
928 ret = -EPERM;
929 break;
930 }
931
932 /* No negative values (to prevent underflow, as val will be
933 * multiplied by 2).
934 */
935 __sock_set_rcvbuf(sk, max(val, 0));
936 break;
937
938 case SO_KEEPALIVE:
939 if (sk->sk_prot->keepalive)
940 sk->sk_prot->keepalive(sk, valbool);
941 sock_valbool_flag(sk, SOCK_KEEPOPEN, valbool);
942 break;
943
944 case SO_OOBINLINE:
945 sock_valbool_flag(sk, SOCK_URGINLINE, valbool);
946 break;
947
948 case SO_NO_CHECK:
949 sk->sk_no_check_tx = valbool;
950 break;
951
952 case SO_PRIORITY:
953 if ((val >= 0 && val <= 6) ||
954 ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
955 sk->sk_priority = val;
956 else
957 ret = -EPERM;
958 break;
959
960 case SO_LINGER:
961 if (optlen < sizeof(ling)) {
962 ret = -EINVAL; /* 1003.1g */
963 break;
964 }
965 if (copy_from_sockptr(&ling, optval, sizeof(ling))) {
966 ret = -EFAULT;
967 break;
968 }
969 if (!ling.l_onoff)
970 sock_reset_flag(sk, SOCK_LINGER);
971 else {
972 #if (BITS_PER_LONG == 32)
973 if ((unsigned int)ling.l_linger >= MAX_SCHEDULE_TIMEOUT/HZ)
974 sk->sk_lingertime = MAX_SCHEDULE_TIMEOUT;
975 else
976 #endif
977 sk->sk_lingertime = (unsigned int)ling.l_linger * HZ;
978 sock_set_flag(sk, SOCK_LINGER);
979 }
980 break;
981
982 case SO_BSDCOMPAT:
983 break;
984
985 case SO_PASSCRED:
986 if (valbool)
987 set_bit(SOCK_PASSCRED, &sock->flags);
988 else
989 clear_bit(SOCK_PASSCRED, &sock->flags);
990 break;
991
992 case SO_TIMESTAMP_OLD:
993 __sock_set_timestamps(sk, valbool, false, false);
994 break;
995 case SO_TIMESTAMP_NEW:
996 __sock_set_timestamps(sk, valbool, true, false);
997 break;
998 case SO_TIMESTAMPNS_OLD:
999 __sock_set_timestamps(sk, valbool, false, true);
1000 break;
1001 case SO_TIMESTAMPNS_NEW:
1002 __sock_set_timestamps(sk, valbool, true, true);
1003 break;
1004 case SO_TIMESTAMPING_NEW:
1005 case SO_TIMESTAMPING_OLD:
1006 if (val & ~SOF_TIMESTAMPING_MASK) {
1007 ret = -EINVAL;
1008 break;
1009 }
1010
1011 if (val & SOF_TIMESTAMPING_OPT_ID &&
1012 !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
1013 if (sk->sk_protocol == IPPROTO_TCP &&
1014 sk->sk_type == SOCK_STREAM) {
1015 if ((1 << sk->sk_state) &
1016 (TCPF_CLOSE | TCPF_LISTEN)) {
1017 ret = -EINVAL;
1018 break;
1019 }
1020 sk->sk_tskey = tcp_sk(sk)->snd_una;
1021 } else {
1022 sk->sk_tskey = 0;
1023 }
1024 }
1025
1026 if (val & SOF_TIMESTAMPING_OPT_STATS &&
1027 !(val & SOF_TIMESTAMPING_OPT_TSONLY)) {
1028 ret = -EINVAL;
1029 break;
1030 }
1031
1032 sk->sk_tsflags = val;
1033 sock_valbool_flag(sk, SOCK_TSTAMP_NEW, optname == SO_TIMESTAMPING_NEW);
1034
1035 if (val & SOF_TIMESTAMPING_RX_SOFTWARE)
1036 sock_enable_timestamp(sk,
1037 SOCK_TIMESTAMPING_RX_SOFTWARE);
1038 else
1039 sock_disable_timestamp(sk,
1040 (1UL << SOCK_TIMESTAMPING_RX_SOFTWARE));
1041 break;
1042
1043 case SO_RCVLOWAT:
1044 if (val < 0)
1045 val = INT_MAX;
1046 if (sock->ops->set_rcvlowat)
1047 ret = sock->ops->set_rcvlowat(sk, val);
1048 else
1049 WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
1050 break;
1051
1052 case SO_RCVTIMEO_OLD:
1053 case SO_RCVTIMEO_NEW:
1054 ret = sock_set_timeout(&sk->sk_rcvtimeo, optval,
1055 optlen, optname == SO_RCVTIMEO_OLD);
1056 break;
1057
1058 case SO_SNDTIMEO_OLD:
1059 case SO_SNDTIMEO_NEW:
1060 ret = sock_set_timeout(&sk->sk_sndtimeo, optval,
1061 optlen, optname == SO_SNDTIMEO_OLD);
1062 break;
1063
1064 case SO_ATTACH_FILTER: {
1065 struct sock_fprog fprog;
1066
1067 ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1068 if (!ret)
1069 ret = sk_attach_filter(&fprog, sk);
1070 break;
1071 }
1072 case SO_ATTACH_BPF:
1073 ret = -EINVAL;
1074 if (optlen == sizeof(u32)) {
1075 u32 ufd;
1076
1077 ret = -EFAULT;
1078 if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1079 break;
1080
1081 ret = sk_attach_bpf(ufd, sk);
1082 }
1083 break;
1084
1085 case SO_ATTACH_REUSEPORT_CBPF: {
1086 struct sock_fprog fprog;
1087
1088 ret = copy_bpf_fprog_from_user(&fprog, optval, optlen);
1089 if (!ret)
1090 ret = sk_reuseport_attach_filter(&fprog, sk);
1091 break;
1092 }
1093 case SO_ATTACH_REUSEPORT_EBPF:
1094 ret = -EINVAL;
1095 if (optlen == sizeof(u32)) {
1096 u32 ufd;
1097
1098 ret = -EFAULT;
1099 if (copy_from_sockptr(&ufd, optval, sizeof(ufd)))
1100 break;
1101
1102 ret = sk_reuseport_attach_bpf(ufd, sk);
1103 }
1104 break;
1105
1106 case SO_DETACH_REUSEPORT_BPF:
1107 ret = reuseport_detach_prog(sk);
1108 break;
1109
1110 case SO_DETACH_FILTER:
1111 ret = sk_detach_filter(sk);
1112 break;
1113
1114 case SO_LOCK_FILTER:
1115 if (sock_flag(sk, SOCK_FILTER_LOCKED) && !valbool)
1116 ret = -EPERM;
1117 else
1118 sock_valbool_flag(sk, SOCK_FILTER_LOCKED, valbool);
1119 break;
1120
1121 case SO_PASSSEC:
1122 if (valbool)
1123 set_bit(SOCK_PASSSEC, &sock->flags);
1124 else
1125 clear_bit(SOCK_PASSSEC, &sock->flags);
1126 break;
1127 case SO_MARK:
1128 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1129 ret = -EPERM;
1130 break;
1131 }
1132
1133 __sock_set_mark(sk, val);
1134 break;
1135
1136 case SO_RXQ_OVFL:
1137 sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
1138 break;
1139
1140 case SO_WIFI_STATUS:
1141 sock_valbool_flag(sk, SOCK_WIFI_STATUS, valbool);
1142 break;
1143
1144 case SO_PEEK_OFF:
1145 if (sock->ops->set_peek_off)
1146 ret = sock->ops->set_peek_off(sk, val);
1147 else
1148 ret = -EOPNOTSUPP;
1149 break;
1150
1151 case SO_NOFCS:
1152 sock_valbool_flag(sk, SOCK_NOFCS, valbool);
1153 break;
1154
1155 case SO_SELECT_ERR_QUEUE:
1156 sock_valbool_flag(sk, SOCK_SELECT_ERR_QUEUE, valbool);
1157 break;
1158
1159 #ifdef CONFIG_NET_RX_BUSY_POLL
1160 case SO_BUSY_POLL:
1161 /* allow unprivileged users to decrease the value */
1162 if ((val > sk->sk_ll_usec) && !capable(CAP_NET_ADMIN))
1163 ret = -EPERM;
1164 else {
1165 if (val < 0)
1166 ret = -EINVAL;
1167 else
1168 WRITE_ONCE(sk->sk_ll_usec, val);
1169 }
1170 break;
1171 #endif
1172
1173 case SO_MAX_PACING_RATE:
1174 {
1175 unsigned long ulval = (val == ~0U) ? ~0UL : (unsigned int)val;
1176
1177 if (sizeof(ulval) != sizeof(val) &&
1178 optlen >= sizeof(ulval) &&
1179 copy_from_sockptr(&ulval, optval, sizeof(ulval))) {
1180 ret = -EFAULT;
1181 break;
1182 }
1183 if (ulval != ~0UL)
1184 cmpxchg(&sk->sk_pacing_status,
1185 SK_PACING_NONE,
1186 SK_PACING_NEEDED);
1187 /* Pairs with READ_ONCE() from sk_getsockopt() */
1188 WRITE_ONCE(sk->sk_max_pacing_rate, ulval);
1189 sk->sk_pacing_rate = min(sk->sk_pacing_rate, ulval);
1190 break;
1191 }
1192 case SO_INCOMING_CPU:
1193 WRITE_ONCE(sk->sk_incoming_cpu, val);
1194 break;
1195
1196 case SO_CNX_ADVICE:
1197 if (val == 1)
1198 dst_negative_advice(sk);
1199 break;
1200
1201 case SO_ZEROCOPY:
1202 if (sk->sk_family == PF_INET || sk->sk_family == PF_INET6) {
1203 if (!((sk->sk_type == SOCK_STREAM &&
1204 sk->sk_protocol == IPPROTO_TCP) ||
1205 (sk->sk_type == SOCK_DGRAM &&
1206 sk->sk_protocol == IPPROTO_UDP)))
1207 ret = -ENOTSUPP;
1208 } else if (sk->sk_family != PF_RDS) {
1209 ret = -ENOTSUPP;
1210 }
1211 if (!ret) {
1212 if (val < 0 || val > 1)
1213 ret = -EINVAL;
1214 else
1215 sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool);
1216 }
1217 break;
1218
1219 case SO_TXTIME:
1220 if (optlen != sizeof(struct sock_txtime)) {
1221 ret = -EINVAL;
1222 break;
1223 } else if (copy_from_sockptr(&sk_txtime, optval,
1224 sizeof(struct sock_txtime))) {
1225 ret = -EFAULT;
1226 break;
1227 } else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
1228 ret = -EINVAL;
1229 break;
1230 }
1231 /* CLOCK_MONOTONIC is only used by sch_fq, and this packet
1232 * scheduler has enough safe guards.
1233 */
1234 if (sk_txtime.clockid != CLOCK_MONOTONIC &&
1235 !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
1236 ret = -EPERM;
1237 break;
1238 }
1239 sock_valbool_flag(sk, SOCK_TXTIME, true);
1240 sk->sk_clockid = sk_txtime.clockid;
1241 sk->sk_txtime_deadline_mode =
1242 !!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
1243 sk->sk_txtime_report_errors =
1244 !!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
1245 break;
1246
1247 case SO_BINDTOIFINDEX:
1248 ret = sock_bindtoindex_locked(sk, val);
1249 break;
1250
1251 default:
1252 ret = -ENOPROTOOPT;
1253 break;
1254 }
1255 release_sock(sk);
1256 return ret;
1257 }
1258 EXPORT_SYMBOL(sock_setsockopt);
1259
sk_get_peer_cred(struct sock *sk)1260 static const struct cred *sk_get_peer_cred(struct sock *sk)
1261 {
1262 const struct cred *cred;
1263
1264 spin_lock(&sk->sk_peer_lock);
1265 cred = get_cred(sk->sk_peer_cred);
1266 spin_unlock(&sk->sk_peer_lock);
1267
1268 return cred;
1269 }
1270
cred_to_ucred(struct pid *pid, const struct cred *cred, struct ucred *ucred)1271 static void cred_to_ucred(struct pid *pid, const struct cred *cred,
1272 struct ucred *ucred)
1273 {
1274 ucred->pid = pid_vnr(pid);
1275 ucred->uid = ucred->gid = -1;
1276 if (cred) {
1277 struct user_namespace *current_ns = current_user_ns();
1278
1279 ucred->uid = from_kuid_munged(current_ns, cred->euid);
1280 ucred->gid = from_kgid_munged(current_ns, cred->egid);
1281 }
1282 }
1283
groups_to_user(gid_t __user *dst, const struct group_info *src)1284 static int groups_to_user(gid_t __user *dst, const struct group_info *src)
1285 {
1286 struct user_namespace *user_ns = current_user_ns();
1287 int i;
1288
1289 for (i = 0; i < src->ngroups; i++)
1290 if (put_user(from_kgid_munged(user_ns, src->gid[i]), dst + i))
1291 return -EFAULT;
1292
1293 return 0;
1294 }
1295
sock_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)1296 int sock_getsockopt(struct socket *sock, int level, int optname,
1297 char __user *optval, int __user *optlen)
1298 {
1299 struct sock *sk = sock->sk;
1300
1301 union {
1302 int val;
1303 u64 val64;
1304 unsigned long ulval;
1305 struct linger ling;
1306 struct old_timeval32 tm32;
1307 struct __kernel_old_timeval tm;
1308 struct __kernel_sock_timeval stm;
1309 struct sock_txtime txtime;
1310 } v;
1311
1312 int lv = sizeof(int);
1313 int len;
1314
1315 if (get_user(len, optlen))
1316 return -EFAULT;
1317 if (len < 0)
1318 return -EINVAL;
1319
1320 memset(&v, 0, sizeof(v));
1321
1322 switch (optname) {
1323 case SO_DEBUG:
1324 v.val = sock_flag(sk, SOCK_DBG);
1325 break;
1326
1327 case SO_DONTROUTE:
1328 v.val = sock_flag(sk, SOCK_LOCALROUTE);
1329 break;
1330
1331 case SO_BROADCAST:
1332 v.val = sock_flag(sk, SOCK_BROADCAST);
1333 break;
1334
1335 case SO_SNDBUF:
1336 v.val = READ_ONCE(sk->sk_sndbuf);
1337 break;
1338
1339 case SO_RCVBUF:
1340 v.val = READ_ONCE(sk->sk_rcvbuf);
1341 break;
1342
1343 case SO_REUSEADDR:
1344 v.val = sk->sk_reuse;
1345 break;
1346
1347 case SO_REUSEPORT:
1348 v.val = sk->sk_reuseport;
1349 break;
1350
1351 case SO_KEEPALIVE:
1352 v.val = sock_flag(sk, SOCK_KEEPOPEN);
1353 break;
1354
1355 case SO_TYPE:
1356 v.val = sk->sk_type;
1357 break;
1358
1359 case SO_PROTOCOL:
1360 v.val = sk->sk_protocol;
1361 break;
1362
1363 case SO_DOMAIN:
1364 v.val = sk->sk_family;
1365 break;
1366
1367 case SO_ERROR:
1368 v.val = -sock_error(sk);
1369 if (v.val == 0)
1370 v.val = xchg(&sk->sk_err_soft, 0);
1371 break;
1372
1373 case SO_OOBINLINE:
1374 v.val = sock_flag(sk, SOCK_URGINLINE);
1375 break;
1376
1377 case SO_NO_CHECK:
1378 v.val = sk->sk_no_check_tx;
1379 break;
1380
1381 case SO_PRIORITY:
1382 v.val = sk->sk_priority;
1383 break;
1384
1385 case SO_LINGER:
1386 lv = sizeof(v.ling);
1387 v.ling.l_onoff = sock_flag(sk, SOCK_LINGER);
1388 v.ling.l_linger = sk->sk_lingertime / HZ;
1389 break;
1390
1391 case SO_BSDCOMPAT:
1392 break;
1393
1394 case SO_TIMESTAMP_OLD:
1395 v.val = sock_flag(sk, SOCK_RCVTSTAMP) &&
1396 !sock_flag(sk, SOCK_TSTAMP_NEW) &&
1397 !sock_flag(sk, SOCK_RCVTSTAMPNS);
1398 break;
1399
1400 case SO_TIMESTAMPNS_OLD:
1401 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && !sock_flag(sk, SOCK_TSTAMP_NEW);
1402 break;
1403
1404 case SO_TIMESTAMP_NEW:
1405 v.val = sock_flag(sk, SOCK_RCVTSTAMP) && sock_flag(sk, SOCK_TSTAMP_NEW);
1406 break;
1407
1408 case SO_TIMESTAMPNS_NEW:
1409 v.val = sock_flag(sk, SOCK_RCVTSTAMPNS) && sock_flag(sk, SOCK_TSTAMP_NEW);
1410 break;
1411
1412 case SO_TIMESTAMPING_OLD:
1413 v.val = sk->sk_tsflags;
1414 break;
1415
1416 case SO_RCVTIMEO_OLD:
1417 case SO_RCVTIMEO_NEW:
1418 lv = sock_get_timeout(sk->sk_rcvtimeo, &v, SO_RCVTIMEO_OLD == optname);
1419 break;
1420
1421 case SO_SNDTIMEO_OLD:
1422 case SO_SNDTIMEO_NEW:
1423 lv = sock_get_timeout(sk->sk_sndtimeo, &v, SO_SNDTIMEO_OLD == optname);
1424 break;
1425
1426 case SO_RCVLOWAT:
1427 v.val = READ_ONCE(sk->sk_rcvlowat);
1428 break;
1429
1430 case SO_SNDLOWAT:
1431 v.val = 1;
1432 break;
1433
1434 case SO_PASSCRED:
1435 v.val = !!test_bit(SOCK_PASSCRED, &sock->flags);
1436 break;
1437
1438 case SO_PEERCRED:
1439 {
1440 struct ucred peercred;
1441 if (len > sizeof(peercred))
1442 len = sizeof(peercred);
1443
1444 spin_lock(&sk->sk_peer_lock);
1445 cred_to_ucred(sk->sk_peer_pid, sk->sk_peer_cred, &peercred);
1446 spin_unlock(&sk->sk_peer_lock);
1447
1448 if (copy_to_user(optval, &peercred, len))
1449 return -EFAULT;
1450 goto lenout;
1451 }
1452
1453 case SO_PEERGROUPS:
1454 {
1455 const struct cred *cred;
1456 int ret, n;
1457
1458 cred = sk_get_peer_cred(sk);
1459 if (!cred)
1460 return -ENODATA;
1461
1462 n = cred->group_info->ngroups;
1463 if (len < n * sizeof(gid_t)) {
1464 len = n * sizeof(gid_t);
1465 put_cred(cred);
1466 return put_user(len, optlen) ? -EFAULT : -ERANGE;
1467 }
1468 len = n * sizeof(gid_t);
1469
1470 ret = groups_to_user((gid_t __user *)optval, cred->group_info);
1471 put_cred(cred);
1472 if (ret)
1473 return ret;
1474 goto lenout;
1475 }
1476
1477 case SO_PEERNAME:
1478 {
1479 char address[128];
1480
1481 lv = sock->ops->getname(sock, (struct sockaddr *)address, 2);
1482 if (lv < 0)
1483 return -ENOTCONN;
1484 if (lv < len)
1485 return -EINVAL;
1486 if (copy_to_user(optval, address, len))
1487 return -EFAULT;
1488 goto lenout;
1489 }
1490
1491 /* Dubious BSD thing... Probably nobody even uses it, but
1492 * the UNIX standard wants it for whatever reason... -DaveM
1493 */
1494 case SO_ACCEPTCONN:
1495 v.val = sk->sk_state == TCP_LISTEN;
1496 break;
1497
1498 case SO_PASSSEC:
1499 v.val = !!test_bit(SOCK_PASSSEC, &sock->flags);
1500 break;
1501
1502 case SO_PEERSEC:
1503 return security_socket_getpeersec_stream(sock, optval, optlen, len);
1504
1505 case SO_MARK:
1506 v.val = sk->sk_mark;
1507 break;
1508
1509 case SO_RXQ_OVFL:
1510 v.val = sock_flag(sk, SOCK_RXQ_OVFL);
1511 break;
1512
1513 case SO_WIFI_STATUS:
1514 v.val = sock_flag(sk, SOCK_WIFI_STATUS);
1515 break;
1516
1517 case SO_PEEK_OFF:
1518 if (!sock->ops->set_peek_off)
1519 return -EOPNOTSUPP;
1520
1521 v.val = READ_ONCE(sk->sk_peek_off);
1522 break;
1523 case SO_NOFCS:
1524 v.val = sock_flag(sk, SOCK_NOFCS);
1525 break;
1526
1527 case SO_BINDTODEVICE:
1528 return sock_getbindtodevice(sk, optval, optlen, len);
1529
1530 case SO_GET_FILTER:
1531 len = sk_get_filter(sk, (struct sock_filter __user *)optval, len);
1532 if (len < 0)
1533 return len;
1534
1535 goto lenout;
1536
1537 case SO_LOCK_FILTER:
1538 v.val = sock_flag(sk, SOCK_FILTER_LOCKED);
1539 break;
1540
1541 case SO_BPF_EXTENSIONS:
1542 v.val = bpf_tell_extensions();
1543 break;
1544
1545 case SO_SELECT_ERR_QUEUE:
1546 v.val = sock_flag(sk, SOCK_SELECT_ERR_QUEUE);
1547 break;
1548
1549 #ifdef CONFIG_NET_RX_BUSY_POLL
1550 case SO_BUSY_POLL:
1551 v.val = READ_ONCE(sk->sk_ll_usec);
1552 break;
1553 #endif
1554
1555 case SO_MAX_PACING_RATE:
1556 /* The READ_ONCE() pair with the WRITE_ONCE() in sk_setsockopt() */
1557 if (sizeof(v.ulval) != sizeof(v.val) && len >= sizeof(v.ulval)) {
1558 lv = sizeof(v.ulval);
1559 v.ulval = READ_ONCE(sk->sk_max_pacing_rate);
1560 } else {
1561 /* 32bit version */
1562 v.val = min_t(unsigned long, ~0U,
1563 READ_ONCE(sk->sk_max_pacing_rate));
1564 }
1565 break;
1566
1567 case SO_INCOMING_CPU:
1568 v.val = READ_ONCE(sk->sk_incoming_cpu);
1569 break;
1570
1571 case SO_MEMINFO:
1572 {
1573 u32 meminfo[SK_MEMINFO_VARS];
1574
1575 sk_get_meminfo(sk, meminfo);
1576
1577 len = min_t(unsigned int, len, sizeof(meminfo));
1578 if (copy_to_user(optval, &meminfo, len))
1579 return -EFAULT;
1580
1581 goto lenout;
1582 }
1583
1584 #ifdef CONFIG_NET_RX_BUSY_POLL
1585 case SO_INCOMING_NAPI_ID:
1586 v.val = READ_ONCE(sk->sk_napi_id);
1587
1588 /* aggregate non-NAPI IDs down to 0 */
1589 if (v.val < MIN_NAPI_ID)
1590 v.val = 0;
1591
1592 break;
1593 #endif
1594
1595 case SO_COOKIE:
1596 lv = sizeof(u64);
1597 if (len < lv)
1598 return -EINVAL;
1599 v.val64 = sock_gen_cookie(sk);
1600 break;
1601
1602 case SO_ZEROCOPY:
1603 v.val = sock_flag(sk, SOCK_ZEROCOPY);
1604 break;
1605
1606 case SO_TXTIME:
1607 lv = sizeof(v.txtime);
1608 v.txtime.clockid = sk->sk_clockid;
1609 v.txtime.flags |= sk->sk_txtime_deadline_mode ?
1610 SOF_TXTIME_DEADLINE_MODE : 0;
1611 v.txtime.flags |= sk->sk_txtime_report_errors ?
1612 SOF_TXTIME_REPORT_ERRORS : 0;
1613 break;
1614
1615 case SO_BINDTOIFINDEX:
1616 v.val = sk->sk_bound_dev_if;
1617 break;
1618
1619 default:
1620 /* We implement the SO_SNDLOWAT etc to not be settable
1621 * (1003.1g 7).
1622 */
1623 return -ENOPROTOOPT;
1624 }
1625
1626 if (len > lv)
1627 len = lv;
1628 if (copy_to_user(optval, &v, len))
1629 return -EFAULT;
1630 lenout:
1631 if (put_user(len, optlen))
1632 return -EFAULT;
1633 return 0;
1634 }
1635
1636 /*
1637 * Initialize an sk_lock.
1638 *
1639 * (We also register the sk_lock with the lock validator.)
1640 */
sock_lock_init(struct sock *sk)1641 static inline void sock_lock_init(struct sock *sk)
1642 {
1643 if (sk->sk_kern_sock)
1644 sock_lock_init_class_and_name(
1645 sk,
1646 af_family_kern_slock_key_strings[sk->sk_family],
1647 af_family_kern_slock_keys + sk->sk_family,
1648 af_family_kern_key_strings[sk->sk_family],
1649 af_family_kern_keys + sk->sk_family);
1650 else
1651 sock_lock_init_class_and_name(
1652 sk,
1653 af_family_slock_key_strings[sk->sk_family],
1654 af_family_slock_keys + sk->sk_family,
1655 af_family_key_strings[sk->sk_family],
1656 af_family_keys + sk->sk_family);
1657 }
1658
1659 /*
1660 * Copy all fields from osk to nsk but nsk->sk_refcnt must not change yet,
1661 * even temporarly, because of RCU lookups. sk_node should also be left as is.
1662 * We must not copy fields between sk_dontcopy_begin and sk_dontcopy_end
1663 */
sock_copy(struct sock *nsk, const struct sock *osk)1664 static void sock_copy(struct sock *nsk, const struct sock *osk)
1665 {
1666 const struct proto *prot = READ_ONCE(osk->sk_prot);
1667 #ifdef CONFIG_SECURITY_NETWORK
1668 void *sptr = nsk->sk_security;
1669 #endif
1670 memcpy(nsk, osk, offsetof(struct sock, sk_dontcopy_begin));
1671
1672 memcpy(&nsk->sk_dontcopy_end, &osk->sk_dontcopy_end,
1673 prot->obj_size - offsetof(struct sock, sk_dontcopy_end));
1674
1675 #ifdef CONFIG_SECURITY_NETWORK
1676 nsk->sk_security = sptr;
1677 security_sk_clone(osk, nsk);
1678 #endif
1679 }
1680
sk_prot_alloc(struct proto *prot, gfp_t priority, int family)1681 static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority,
1682 int family)
1683 {
1684 struct sock *sk;
1685 struct kmem_cache *slab;
1686
1687 slab = prot->slab;
1688 if (slab != NULL) {
1689 sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO);
1690 if (!sk)
1691 return sk;
1692 if (want_init_on_alloc(priority))
1693 sk_prot_clear_nulls(sk, prot->obj_size);
1694 } else
1695 sk = kmalloc(prot->obj_size, priority);
1696
1697 if (sk != NULL) {
1698 if (security_sk_alloc(sk, family, priority))
1699 goto out_free;
1700
1701 if (!try_module_get(prot->owner))
1702 goto out_free_sec;
1703 sk_tx_queue_clear(sk);
1704 }
1705
1706 return sk;
1707
1708 out_free_sec:
1709 security_sk_free(sk);
1710 out_free:
1711 if (slab != NULL)
1712 kmem_cache_free(slab, sk);
1713 else
1714 kfree(sk);
1715 return NULL;
1716 }
1717
sk_prot_free(struct proto *prot, struct sock *sk)1718 static void sk_prot_free(struct proto *prot, struct sock *sk)
1719 {
1720 struct kmem_cache *slab;
1721 struct module *owner;
1722
1723 owner = prot->owner;
1724 slab = prot->slab;
1725
1726 cgroup_sk_free(&sk->sk_cgrp_data);
1727 mem_cgroup_sk_free(sk);
1728 security_sk_free(sk);
1729 if (slab != NULL)
1730 kmem_cache_free(slab, sk);
1731 else
1732 kfree(sk);
1733 module_put(owner);
1734 }
1735
1736 /**
1737 * sk_alloc - All socket objects are allocated here
1738 * @net: the applicable net namespace
1739 * @family: protocol family
1740 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1741 * @prot: struct proto associated with this new sock instance
1742 * @kern: is this to be a kernel socket?
1743 */
sk_alloc(struct net *net, int family, gfp_t priority, struct proto *prot, int kern)1744 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
1745 struct proto *prot, int kern)
1746 {
1747 struct sock *sk;
1748
1749 sk = sk_prot_alloc(prot, priority | __GFP_ZERO, family);
1750 if (sk) {
1751 sk->sk_family = family;
1752 /*
1753 * See comment in struct sock definition to understand
1754 * why we need sk_prot_creator -acme
1755 */
1756 sk->sk_prot = sk->sk_prot_creator = prot;
1757 sk->sk_kern_sock = kern;
1758 sock_lock_init(sk);
1759 sk->sk_net_refcnt = kern ? 0 : 1;
1760 if (likely(sk->sk_net_refcnt)) {
1761 get_net(net);
1762 sock_inuse_add(net, 1);
1763 }
1764
1765 sock_net_set(sk, net);
1766 refcount_set(&sk->sk_wmem_alloc, 1);
1767
1768 mem_cgroup_sk_alloc(sk);
1769 cgroup_sk_alloc(&sk->sk_cgrp_data);
1770 sock_update_classid(&sk->sk_cgrp_data);
1771 sock_update_netprioidx(&sk->sk_cgrp_data);
1772 sk_tx_queue_clear(sk);
1773 }
1774
1775 return sk;
1776 }
1777 EXPORT_SYMBOL(sk_alloc);
1778
1779 /* Sockets having SOCK_RCU_FREE will call this function after one RCU
1780 * grace period. This is the case for UDP sockets and TCP listeners.
1781 */
__sk_destruct(struct rcu_head *head)1782 static void __sk_destruct(struct rcu_head *head)
1783 {
1784 struct sock *sk = container_of(head, struct sock, sk_rcu);
1785 struct sk_filter *filter;
1786
1787 if (sk->sk_destruct)
1788 sk->sk_destruct(sk);
1789
1790 filter = rcu_dereference_check(sk->sk_filter,
1791 refcount_read(&sk->sk_wmem_alloc) == 0);
1792 if (filter) {
1793 sk_filter_uncharge(sk, filter);
1794 RCU_INIT_POINTER(sk->sk_filter, NULL);
1795 }
1796
1797 sock_disable_timestamp(sk, SK_FLAGS_TIMESTAMP);
1798
1799 #ifdef CONFIG_BPF_SYSCALL
1800 bpf_sk_storage_free(sk);
1801 #endif
1802
1803 if (atomic_read(&sk->sk_omem_alloc))
1804 pr_debug("%s: optmem leakage (%d bytes) detected\n",
1805 __func__, atomic_read(&sk->sk_omem_alloc));
1806
1807 if (sk->sk_frag.page) {
1808 put_page(sk->sk_frag.page);
1809 sk->sk_frag.page = NULL;
1810 }
1811
1812 /* We do not need to acquire sk->sk_peer_lock, we are the last user. */
1813 put_cred(sk->sk_peer_cred);
1814 put_pid(sk->sk_peer_pid);
1815
1816 if (likely(sk->sk_net_refcnt))
1817 put_net(sock_net(sk));
1818 sk_prot_free(sk->sk_prot_creator, sk);
1819 }
1820
sk_destruct(struct sock *sk)1821 void sk_destruct(struct sock *sk)
1822 {
1823 bool use_call_rcu = sock_flag(sk, SOCK_RCU_FREE);
1824
1825 if (rcu_access_pointer(sk->sk_reuseport_cb)) {
1826 reuseport_detach_sock(sk);
1827 use_call_rcu = true;
1828 }
1829
1830 if (use_call_rcu)
1831 call_rcu(&sk->sk_rcu, __sk_destruct);
1832 else
1833 __sk_destruct(&sk->sk_rcu);
1834 }
1835
__sk_free(struct sock *sk)1836 static void __sk_free(struct sock *sk)
1837 {
1838 if (likely(sk->sk_net_refcnt))
1839 sock_inuse_add(sock_net(sk), -1);
1840
1841 if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk)))
1842 sock_diag_broadcast_destroy(sk);
1843 else
1844 sk_destruct(sk);
1845 }
1846
sk_free(struct sock *sk)1847 void sk_free(struct sock *sk)
1848 {
1849 /*
1850 * We subtract one from sk_wmem_alloc and can know if
1851 * some packets are still in some tx queue.
1852 * If not null, sock_wfree() will call __sk_free(sk) later
1853 */
1854 if (refcount_dec_and_test(&sk->sk_wmem_alloc))
1855 __sk_free(sk);
1856 }
1857 EXPORT_SYMBOL(sk_free);
1858
sk_init_common(struct sock *sk)1859 static void sk_init_common(struct sock *sk)
1860 {
1861 skb_queue_head_init(&sk->sk_receive_queue);
1862 skb_queue_head_init(&sk->sk_write_queue);
1863 skb_queue_head_init(&sk->sk_error_queue);
1864
1865 rwlock_init(&sk->sk_callback_lock);
1866 lockdep_set_class_and_name(&sk->sk_receive_queue.lock,
1867 af_rlock_keys + sk->sk_family,
1868 af_family_rlock_key_strings[sk->sk_family]);
1869 lockdep_set_class_and_name(&sk->sk_write_queue.lock,
1870 af_wlock_keys + sk->sk_family,
1871 af_family_wlock_key_strings[sk->sk_family]);
1872 lockdep_set_class_and_name(&sk->sk_error_queue.lock,
1873 af_elock_keys + sk->sk_family,
1874 af_family_elock_key_strings[sk->sk_family]);
1875 lockdep_set_class_and_name(&sk->sk_callback_lock,
1876 af_callback_keys + sk->sk_family,
1877 af_family_clock_key_strings[sk->sk_family]);
1878 }
1879
1880 /**
1881 * sk_clone_lock - clone a socket, and lock its clone
1882 * @sk: the socket to clone
1883 * @priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
1884 *
1885 * Caller must unlock socket even in error path (bh_unlock_sock(newsk))
1886 */
sk_clone_lock(const struct sock *sk, const gfp_t priority)1887 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
1888 {
1889 struct proto *prot = READ_ONCE(sk->sk_prot);
1890 struct sk_filter *filter;
1891 bool is_charged = true;
1892 struct sock *newsk;
1893
1894 newsk = sk_prot_alloc(prot, priority, sk->sk_family);
1895 if (!newsk)
1896 goto out;
1897
1898 sock_copy(newsk, sk);
1899
1900 newsk->sk_prot_creator = prot;
1901
1902 /* SANITY */
1903 if (likely(newsk->sk_net_refcnt)) {
1904 get_net(sock_net(newsk));
1905 sock_inuse_add(sock_net(newsk), 1);
1906 }
1907 sk_node_init(&newsk->sk_node);
1908 sock_lock_init(newsk);
1909 bh_lock_sock(newsk);
1910 newsk->sk_backlog.head = newsk->sk_backlog.tail = NULL;
1911 newsk->sk_backlog.len = 0;
1912
1913 atomic_set(&newsk->sk_rmem_alloc, 0);
1914
1915 /* sk_wmem_alloc set to one (see sk_free() and sock_wfree()) */
1916 refcount_set(&newsk->sk_wmem_alloc, 1);
1917
1918 atomic_set(&newsk->sk_omem_alloc, 0);
1919 sk_init_common(newsk);
1920
1921 newsk->sk_dst_cache = NULL;
1922 newsk->sk_dst_pending_confirm = 0;
1923 newsk->sk_wmem_queued = 0;
1924 newsk->sk_forward_alloc = 0;
1925 atomic_set(&newsk->sk_drops, 0);
1926 newsk->sk_send_head = NULL;
1927 newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
1928 atomic_set(&newsk->sk_zckey, 0);
1929
1930 sock_reset_flag(newsk, SOCK_DONE);
1931
1932 /* sk->sk_memcg will be populated at accept() time */
1933 newsk->sk_memcg = NULL;
1934
1935 cgroup_sk_clone(&newsk->sk_cgrp_data);
1936
1937 rcu_read_lock();
1938 filter = rcu_dereference(sk->sk_filter);
1939 if (filter != NULL)
1940 /* though it's an empty new sock, the charging may fail
1941 * if sysctl_optmem_max was changed between creation of
1942 * original socket and cloning
1943 */
1944 is_charged = sk_filter_charge(newsk, filter);
1945 RCU_INIT_POINTER(newsk->sk_filter, filter);
1946 rcu_read_unlock();
1947
1948 if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
1949 /* We need to make sure that we don't uncharge the new
1950 * socket if we couldn't charge it in the first place
1951 * as otherwise we uncharge the parent's filter.
1952 */
1953 if (!is_charged)
1954 RCU_INIT_POINTER(newsk->sk_filter, NULL);
1955 sk_free_unlock_clone(newsk);
1956 newsk = NULL;
1957 goto out;
1958 }
1959 RCU_INIT_POINTER(newsk->sk_reuseport_cb, NULL);
1960
1961 if (bpf_sk_storage_clone(sk, newsk)) {
1962 sk_free_unlock_clone(newsk);
1963 newsk = NULL;
1964 goto out;
1965 }
1966
1967 /* Clear sk_user_data if parent had the pointer tagged
1968 * as not suitable for copying when cloning.
1969 */
1970 if (sk_user_data_is_nocopy(newsk))
1971 newsk->sk_user_data = NULL;
1972
1973 newsk->sk_err = 0;
1974 newsk->sk_err_soft = 0;
1975 newsk->sk_priority = 0;
1976 newsk->sk_incoming_cpu = raw_smp_processor_id();
1977
1978 /* Before updating sk_refcnt, we must commit prior changes to memory
1979 * (Documentation/RCU/rculist_nulls.rst for details)
1980 */
1981 smp_wmb();
1982 refcount_set(&newsk->sk_refcnt, 2);
1983
1984 /* Increment the counter in the same struct proto as the master
1985 * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
1986 * is the same as sk->sk_prot->socks, as this field was copied
1987 * with memcpy).
1988 *
1989 * This _changes_ the previous behaviour, where
1990 * tcp_create_openreq_child always was incrementing the
1991 * equivalent to tcp_prot->socks (inet_sock_nr), so this have
1992 * to be taken into account in all callers. -acme
1993 */
1994 sk_refcnt_debug_inc(newsk);
1995 sk_set_socket(newsk, NULL);
1996 sk_tx_queue_clear(newsk);
1997 RCU_INIT_POINTER(newsk->sk_wq, NULL);
1998
1999 if (newsk->sk_prot->sockets_allocated)
2000 sk_sockets_allocated_inc(newsk);
2001
2002 if (sock_needs_netstamp(sk) && newsk->sk_flags & SK_FLAGS_TIMESTAMP)
2003 net_enable_timestamp();
2004 out:
2005 return newsk;
2006 }
2007 EXPORT_SYMBOL_GPL(sk_clone_lock);
2008
sk_free_unlock_clone(struct sock *sk)2009 void sk_free_unlock_clone(struct sock *sk)
2010 {
2011 /* It is still raw copy of parent, so invalidate
2012 * destructor and make plain sk_free() */
2013 sk->sk_destruct = NULL;
2014 bh_unlock_sock(sk);
2015 sk_free(sk);
2016 }
2017 EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
2018
sk_setup_caps(struct sock *sk, struct dst_entry *dst)2019 void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
2020 {
2021 u32 max_segs = 1;
2022
2023 sk->sk_route_caps = dst->dev->features | sk->sk_route_forced_caps;
2024 if (sk->sk_route_caps & NETIF_F_GSO)
2025 sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE;
2026 sk->sk_route_caps &= ~sk->sk_route_nocaps;
2027 if (sk_can_gso(sk)) {
2028 if (dst->header_len && !xfrm_dst_offload_ok(dst)) {
2029 sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
2030 } else {
2031 sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
2032 sk->sk_gso_max_size = dst->dev->gso_max_size;
2033 max_segs = max_t(u32, dst->dev->gso_max_segs, 1);
2034 }
2035 }
2036 sk->sk_gso_max_segs = max_segs;
2037 sk_dst_set(sk, dst);
2038 }
2039 EXPORT_SYMBOL_GPL(sk_setup_caps);
2040
2041 /*
2042 * Simple resource managers for sockets.
2043 */
2044
2045
2046 /*
2047 * Write buffer destructor automatically called from kfree_skb.
2048 */
sock_wfree(struct sk_buff *skb)2049 void sock_wfree(struct sk_buff *skb)
2050 {
2051 struct sock *sk = skb->sk;
2052 unsigned int len = skb->truesize;
2053
2054 if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
2055 /*
2056 * Keep a reference on sk_wmem_alloc, this will be released
2057 * after sk_write_space() call
2058 */
2059 WARN_ON(refcount_sub_and_test(len - 1, &sk->sk_wmem_alloc));
2060 sk->sk_write_space(sk);
2061 len = 1;
2062 }
2063 /*
2064 * if sk_wmem_alloc reaches 0, we must finish what sk_free()
2065 * could not do because of in-flight packets
2066 */
2067 if (refcount_sub_and_test(len, &sk->sk_wmem_alloc))
2068 __sk_free(sk);
2069 }
2070 EXPORT_SYMBOL(sock_wfree);
2071
2072 /* This variant of sock_wfree() is used by TCP,
2073 * since it sets SOCK_USE_WRITE_QUEUE.
2074 */
__sock_wfree(struct sk_buff *skb)2075 void __sock_wfree(struct sk_buff *skb)
2076 {
2077 struct sock *sk = skb->sk;
2078
2079 if (refcount_sub_and_test(skb->truesize, &sk->sk_wmem_alloc))
2080 __sk_free(sk);
2081 }
2082
skb_set_owner_w(struct sk_buff *skb, struct sock *sk)2083 void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
2084 {
2085 skb_orphan(skb);
2086 skb->sk = sk;
2087 #ifdef CONFIG_INET
2088 if (unlikely(!sk_fullsock(sk))) {
2089 skb->destructor = sock_edemux;
2090 sock_hold(sk);
2091 return;
2092 }
2093 #endif
2094 skb->destructor = sock_wfree;
2095 skb_set_hash_from_sk(skb, sk);
2096 /*
2097 * We used to take a refcount on sk, but following operation
2098 * is enough to guarantee sk_free() wont free this sock until
2099 * all in-flight packets are completed
2100 */
2101 refcount_add(skb->truesize, &sk->sk_wmem_alloc);
2102 }
2103 EXPORT_SYMBOL(skb_set_owner_w);
2104
can_skb_orphan_partial(const struct sk_buff *skb)2105 static bool can_skb_orphan_partial(const struct sk_buff *skb)
2106 {
2107 #ifdef CONFIG_TLS_DEVICE
2108 /* Drivers depend on in-order delivery for crypto offload,
2109 * partial orphan breaks out-of-order-OK logic.
2110 */
2111 if (skb->decrypted)
2112 return false;
2113 #endif
2114 return (skb->destructor == sock_wfree ||
2115 (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree));
2116 }
2117
2118 /* This helper is used by netem, as it can hold packets in its
2119 * delay queue. We want to allow the owner socket to send more
2120 * packets, as if they were already TX completed by a typical driver.
2121 * But we also want to keep skb->sk set because some packet schedulers
2122 * rely on it (sch_fq for example).
2123 */
skb_orphan_partial(struct sk_buff *skb)2124 void skb_orphan_partial(struct sk_buff *skb)
2125 {
2126 if (skb_is_tcp_pure_ack(skb))
2127 return;
2128
2129 if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk))
2130 return;
2131
2132 skb_orphan(skb);
2133 }
2134 EXPORT_SYMBOL(skb_orphan_partial);
2135
2136 /*
2137 * Read buffer destructor automatically called from kfree_skb.
2138 */
sock_rfree(struct sk_buff *skb)2139 void sock_rfree(struct sk_buff *skb)
2140 {
2141 struct sock *sk = skb->sk;
2142 unsigned int len = skb->truesize;
2143
2144 atomic_sub(len, &sk->sk_rmem_alloc);
2145 sk_mem_uncharge(sk, len);
2146 }
2147 EXPORT_SYMBOL(sock_rfree);
2148
2149 /*
2150 * Buffer destructor for skbs that are not used directly in read or write
2151 * path, e.g. for error handler skbs. Automatically called from kfree_skb.
2152 */
sock_efree(struct sk_buff *skb)2153 void sock_efree(struct sk_buff *skb)
2154 {
2155 sock_put(skb->sk);
2156 }
2157 EXPORT_SYMBOL(sock_efree);
2158
2159 /* Buffer destructor for prefetch/receive path where reference count may
2160 * not be held, e.g. for listen sockets.
2161 */
2162 #ifdef CONFIG_INET
sock_pfree(struct sk_buff *skb)2163 void sock_pfree(struct sk_buff *skb)
2164 {
2165 if (sk_is_refcounted(skb->sk))
2166 sock_gen_put(skb->sk);
2167 }
2168 EXPORT_SYMBOL(sock_pfree);
2169 #endif /* CONFIG_INET */
2170
sock_i_uid(struct sock *sk)2171 kuid_t sock_i_uid(struct sock *sk)
2172 {
2173 kuid_t uid;
2174
2175 read_lock_bh(&sk->sk_callback_lock);
2176 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : GLOBAL_ROOT_UID;
2177 read_unlock_bh(&sk->sk_callback_lock);
2178 return uid;
2179 }
2180 EXPORT_SYMBOL(sock_i_uid);
2181
__sock_i_ino(struct sock *sk)2182 unsigned long __sock_i_ino(struct sock *sk)
2183 {
2184 unsigned long ino;
2185
2186 read_lock(&sk->sk_callback_lock);
2187 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0;
2188 read_unlock(&sk->sk_callback_lock);
2189 return ino;
2190 }
2191 EXPORT_SYMBOL(__sock_i_ino);
2192
sock_i_ino(struct sock *sk)2193 unsigned long sock_i_ino(struct sock *sk)
2194 {
2195 unsigned long ino;
2196
2197 local_bh_disable();
2198 ino = __sock_i_ino(sk);
2199 local_bh_enable();
2200 return ino;
2201 }
2202 EXPORT_SYMBOL(sock_i_ino);
2203
2204 /*
2205 * Allocate a skb from the socket's send buffer.
2206 */
sock_wmalloc(struct sock *sk, unsigned long size, int force, gfp_t priority)2207 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
2208 gfp_t priority)
2209 {
2210 if (force ||
2211 refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf)) {
2212 struct sk_buff *skb = alloc_skb(size, priority);
2213
2214 if (skb) {
2215 skb_set_owner_w(skb, sk);
2216 return skb;
2217 }
2218 }
2219 return NULL;
2220 }
2221 EXPORT_SYMBOL(sock_wmalloc);
2222
sock_ofree(struct sk_buff *skb)2223 static void sock_ofree(struct sk_buff *skb)
2224 {
2225 struct sock *sk = skb->sk;
2226
2227 atomic_sub(skb->truesize, &sk->sk_omem_alloc);
2228 }
2229
sock_omalloc(struct sock *sk, unsigned long size, gfp_t priority)2230 struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size,
2231 gfp_t priority)
2232 {
2233 struct sk_buff *skb;
2234
2235 /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */
2236 if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) >
2237 READ_ONCE(sysctl_optmem_max))
2238 return NULL;
2239
2240 skb = alloc_skb(size, priority);
2241 if (!skb)
2242 return NULL;
2243
2244 atomic_add(skb->truesize, &sk->sk_omem_alloc);
2245 skb->sk = sk;
2246 skb->destructor = sock_ofree;
2247 return skb;
2248 }
2249
2250 /*
2251 * Allocate a memory block from the socket's option memory buffer.
2252 */
sock_kmalloc(struct sock *sk, int size, gfp_t priority)2253 void *sock_kmalloc(struct sock *sk, int size, gfp_t priority)
2254 {
2255 int optmem_max = READ_ONCE(sysctl_optmem_max);
2256
2257 if ((unsigned int)size <= optmem_max &&
2258 atomic_read(&sk->sk_omem_alloc) + size < optmem_max) {
2259 void *mem;
2260 /* First do the add, to avoid the race if kmalloc
2261 * might sleep.
2262 */
2263 atomic_add(size, &sk->sk_omem_alloc);
2264 mem = kmalloc(size, priority);
2265 if (mem)
2266 return mem;
2267 atomic_sub(size, &sk->sk_omem_alloc);
2268 }
2269 return NULL;
2270 }
2271 EXPORT_SYMBOL(sock_kmalloc);
2272
2273 /* Free an option memory block. Note, we actually want the inline
2274 * here as this allows gcc to detect the nullify and fold away the
2275 * condition entirely.
2276 */
__sock_kfree_s(struct sock *sk, void *mem, int size, const bool nullify)2277 static inline void __sock_kfree_s(struct sock *sk, void *mem, int size,
2278 const bool nullify)
2279 {
2280 if (WARN_ON_ONCE(!mem))
2281 return;
2282 if (nullify)
2283 kfree_sensitive(mem);
2284 else
2285 kfree(mem);
2286 atomic_sub(size, &sk->sk_omem_alloc);
2287 }
2288
sock_kfree_s(struct sock *sk, void *mem, int size)2289 void sock_kfree_s(struct sock *sk, void *mem, int size)
2290 {
2291 __sock_kfree_s(sk, mem, size, false);
2292 }
2293 EXPORT_SYMBOL(sock_kfree_s);
2294
sock_kzfree_s(struct sock *sk, void *mem, int size)2295 void sock_kzfree_s(struct sock *sk, void *mem, int size)
2296 {
2297 __sock_kfree_s(sk, mem, size, true);
2298 }
2299 EXPORT_SYMBOL(sock_kzfree_s);
2300
2301 /* It is almost wait_for_tcp_memory minus release_sock/lock_sock.
2302 I think, these locks should be removed for datagram sockets.
2303 */
sock_wait_for_wmem(struct sock *sk, long timeo)2304 static long sock_wait_for_wmem(struct sock *sk, long timeo)
2305 {
2306 DEFINE_WAIT(wait);
2307
2308 sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2309 for (;;) {
2310 if (!timeo)
2311 break;
2312 if (signal_pending(current))
2313 break;
2314 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2315 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2316 if (refcount_read(&sk->sk_wmem_alloc) < READ_ONCE(sk->sk_sndbuf))
2317 break;
2318 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2319 break;
2320 if (READ_ONCE(sk->sk_err))
2321 break;
2322 timeo = schedule_timeout(timeo);
2323 }
2324 finish_wait(sk_sleep(sk), &wait);
2325 return timeo;
2326 }
2327
2328
2329 /*
2330 * Generic send/receive buffer handlers
2331 */
2332
sock_alloc_send_pskb(struct sock *sk, unsigned long header_len, unsigned long data_len, int noblock, int *errcode, int max_page_order)2333 struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
2334 unsigned long data_len, int noblock,
2335 int *errcode, int max_page_order)
2336 {
2337 struct sk_buff *skb;
2338 long timeo;
2339 int err;
2340
2341 timeo = sock_sndtimeo(sk, noblock);
2342 for (;;) {
2343 err = sock_error(sk);
2344 if (err != 0)
2345 goto failure;
2346
2347 err = -EPIPE;
2348 if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN)
2349 goto failure;
2350
2351 if (sk_wmem_alloc_get(sk) < READ_ONCE(sk->sk_sndbuf))
2352 break;
2353
2354 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2355 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
2356 err = -EAGAIN;
2357 if (!timeo)
2358 goto failure;
2359 if (signal_pending(current))
2360 goto interrupted;
2361 timeo = sock_wait_for_wmem(sk, timeo);
2362 }
2363 skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
2364 errcode, sk->sk_allocation);
2365 if (skb)
2366 skb_set_owner_w(skb, sk);
2367 return skb;
2368
2369 interrupted:
2370 err = sock_intr_errno(timeo);
2371 failure:
2372 *errcode = err;
2373 return NULL;
2374 }
2375 EXPORT_SYMBOL(sock_alloc_send_pskb);
2376
sock_alloc_send_skb(struct sock *sk, unsigned long size, int noblock, int *errcode)2377 struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
2378 int noblock, int *errcode)
2379 {
2380 return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
2381 }
2382 EXPORT_SYMBOL(sock_alloc_send_skb);
2383
__sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg, struct sockcm_cookie *sockc)2384 int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
2385 struct sockcm_cookie *sockc)
2386 {
2387 u32 tsflags;
2388
2389 switch (cmsg->cmsg_type) {
2390 case SO_MARK:
2391 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2392 return -EPERM;
2393 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2394 return -EINVAL;
2395 sockc->mark = *(u32 *)CMSG_DATA(cmsg);
2396 break;
2397 case SO_TIMESTAMPING_OLD:
2398 case SO_TIMESTAMPING_NEW:
2399 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u32)))
2400 return -EINVAL;
2401
2402 tsflags = *(u32 *)CMSG_DATA(cmsg);
2403 if (tsflags & ~SOF_TIMESTAMPING_TX_RECORD_MASK)
2404 return -EINVAL;
2405
2406 sockc->tsflags &= ~SOF_TIMESTAMPING_TX_RECORD_MASK;
2407 sockc->tsflags |= tsflags;
2408 break;
2409 case SCM_TXTIME:
2410 if (!sock_flag(sk, SOCK_TXTIME))
2411 return -EINVAL;
2412 if (cmsg->cmsg_len != CMSG_LEN(sizeof(u64)))
2413 return -EINVAL;
2414 sockc->transmit_time = get_unaligned((u64 *)CMSG_DATA(cmsg));
2415 break;
2416 /* SCM_RIGHTS and SCM_CREDENTIALS are semantically in SOL_UNIX. */
2417 case SCM_RIGHTS:
2418 case SCM_CREDENTIALS:
2419 break;
2420 default:
2421 return -EINVAL;
2422 }
2423 return 0;
2424 }
2425 EXPORT_SYMBOL(__sock_cmsg_send);
2426
sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct sockcm_cookie *sockc)2427 int sock_cmsg_send(struct sock *sk, struct msghdr *msg,
2428 struct sockcm_cookie *sockc)
2429 {
2430 struct cmsghdr *cmsg;
2431 int ret;
2432
2433 for_each_cmsghdr(cmsg, msg) {
2434 if (!CMSG_OK(msg, cmsg))
2435 return -EINVAL;
2436 if (cmsg->cmsg_level != SOL_SOCKET)
2437 continue;
2438 ret = __sock_cmsg_send(sk, msg, cmsg, sockc);
2439 if (ret)
2440 return ret;
2441 }
2442 return 0;
2443 }
2444 EXPORT_SYMBOL(sock_cmsg_send);
2445
sk_enter_memory_pressure(struct sock *sk)2446 static void sk_enter_memory_pressure(struct sock *sk)
2447 {
2448 if (!sk->sk_prot->enter_memory_pressure)
2449 return;
2450
2451 sk->sk_prot->enter_memory_pressure(sk);
2452 }
2453
sk_leave_memory_pressure(struct sock *sk)2454 static void sk_leave_memory_pressure(struct sock *sk)
2455 {
2456 if (sk->sk_prot->leave_memory_pressure) {
2457 sk->sk_prot->leave_memory_pressure(sk);
2458 } else {
2459 unsigned long *memory_pressure = sk->sk_prot->memory_pressure;
2460
2461 if (memory_pressure && READ_ONCE(*memory_pressure))
2462 WRITE_ONCE(*memory_pressure, 0);
2463 }
2464 }
2465
2466 DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key);
2467
2468 /**
2469 * skb_page_frag_refill - check that a page_frag contains enough room
2470 * @sz: minimum size of the fragment we want to get
2471 * @pfrag: pointer to page_frag
2472 * @gfp: priority for memory allocation
2473 *
2474 * Note: While this allocator tries to use high order pages, there is
2475 * no guarantee that allocations succeed. Therefore, @sz MUST be
2476 * less or equal than PAGE_SIZE.
2477 */
skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)2478 bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t gfp)
2479 {
2480 if (pfrag->page) {
2481 if (page_ref_count(pfrag->page) == 1) {
2482 pfrag->offset = 0;
2483 return true;
2484 }
2485 if (pfrag->offset + sz <= pfrag->size)
2486 return true;
2487 put_page(pfrag->page);
2488 }
2489
2490 pfrag->offset = 0;
2491 if (SKB_FRAG_PAGE_ORDER &&
2492 !static_branch_unlikely(&net_high_order_alloc_disable_key)) {
2493 /* Avoid direct reclaim but allow kswapd to wake */
2494 pfrag->page = alloc_pages((gfp & ~__GFP_DIRECT_RECLAIM) |
2495 __GFP_COMP | __GFP_NOWARN |
2496 __GFP_NORETRY,
2497 SKB_FRAG_PAGE_ORDER);
2498 if (likely(pfrag->page)) {
2499 pfrag->size = PAGE_SIZE << SKB_FRAG_PAGE_ORDER;
2500 return true;
2501 }
2502 }
2503 pfrag->page = alloc_page(gfp);
2504 if (likely(pfrag->page)) {
2505 pfrag->size = PAGE_SIZE;
2506 return true;
2507 }
2508 return false;
2509 }
2510 EXPORT_SYMBOL(skb_page_frag_refill);
2511
sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)2512 bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag)
2513 {
2514 if (likely(skb_page_frag_refill(32U, pfrag, sk->sk_allocation)))
2515 return true;
2516
2517 sk_enter_memory_pressure(sk);
2518 sk_stream_moderate_sndbuf(sk);
2519 return false;
2520 }
2521 EXPORT_SYMBOL(sk_page_frag_refill);
2522
2523 static void __lock_sock(struct sock *sk)
2524 __releases(&sk->sk_lock.slock)
2525 __acquires(&sk->sk_lock.slock)
2526 {
2527 DEFINE_WAIT(wait);
2528
2529 for (;;) {
2530 prepare_to_wait_exclusive(&sk->sk_lock.wq, &wait,
2531 TASK_UNINTERRUPTIBLE);
2532 spin_unlock_bh(&sk->sk_lock.slock);
2533 schedule();
2534 spin_lock_bh(&sk->sk_lock.slock);
2535 if (!sock_owned_by_user(sk))
2536 break;
2537 }
2538 finish_wait(&sk->sk_lock.wq, &wait);
2539 }
2540
2541 void __release_sock(struct sock *sk)
2542 __releases(&sk->sk_lock.slock)
2543 __acquires(&sk->sk_lock.slock)
2544 {
2545 struct sk_buff *skb, *next;
2546
2547 while ((skb = sk->sk_backlog.head) != NULL) {
2548 sk->sk_backlog.head = sk->sk_backlog.tail = NULL;
2549
2550 spin_unlock_bh(&sk->sk_lock.slock);
2551
2552 do {
2553 next = skb->next;
2554 prefetch(next);
2555 WARN_ON_ONCE(skb_dst_is_noref(skb));
2556 skb_mark_not_on_list(skb);
2557 sk_backlog_rcv(sk, skb);
2558
2559 cond_resched();
2560
2561 skb = next;
2562 } while (skb != NULL);
2563
2564 spin_lock_bh(&sk->sk_lock.slock);
2565 }
2566
2567 /*
2568 * Doing the zeroing here guarantee we can not loop forever
2569 * while a wild producer attempts to flood us.
2570 */
2571 sk->sk_backlog.len = 0;
2572 }
2573
__sk_flush_backlog(struct sock *sk)2574 void __sk_flush_backlog(struct sock *sk)
2575 {
2576 spin_lock_bh(&sk->sk_lock.slock);
2577 __release_sock(sk);
2578 spin_unlock_bh(&sk->sk_lock.slock);
2579 }
2580
2581 /**
2582 * sk_wait_data - wait for data to arrive at sk_receive_queue
2583 * @sk: sock to wait on
2584 * @timeo: for how long
2585 * @skb: last skb seen on sk_receive_queue
2586 *
2587 * Now socket state including sk->sk_err is changed only under lock,
2588 * hence we may omit checks after joining wait queue.
2589 * We check receive queue before schedule() only as optimization;
2590 * it is very likely that release_sock() added new data.
2591 */
sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)2592 int sk_wait_data(struct sock *sk, long *timeo, const struct sk_buff *skb)
2593 {
2594 DEFINE_WAIT_FUNC(wait, woken_wake_function);
2595 int rc;
2596
2597 add_wait_queue(sk_sleep(sk), &wait);
2598 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2599 rc = sk_wait_event(sk, timeo, skb_peek_tail(&sk->sk_receive_queue) != skb, &wait);
2600 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2601 remove_wait_queue(sk_sleep(sk), &wait);
2602 return rc;
2603 }
2604 EXPORT_SYMBOL(sk_wait_data);
2605
2606 /**
2607 * __sk_mem_raise_allocated - increase memory_allocated
2608 * @sk: socket
2609 * @size: memory size to allocate
2610 * @amt: pages to allocate
2611 * @kind: allocation type
2612 *
2613 * Similar to __sk_mem_schedule(), but does not update sk_forward_alloc
2614 */
__sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)2615 int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
2616 {
2617 struct proto *prot = sk->sk_prot;
2618 long allocated = sk_memory_allocated_add(sk, amt);
2619 bool charged = true;
2620
2621 if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
2622 !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
2623 goto suppress_allocation;
2624
2625 /* Under limit. */
2626 if (allocated <= sk_prot_mem_limits(sk, 0)) {
2627 sk_leave_memory_pressure(sk);
2628 return 1;
2629 }
2630
2631 /* Under pressure. */
2632 if (allocated > sk_prot_mem_limits(sk, 1))
2633 sk_enter_memory_pressure(sk);
2634
2635 /* Over hard limit. */
2636 if (allocated > sk_prot_mem_limits(sk, 2))
2637 goto suppress_allocation;
2638
2639 /* guarantee minimum buffer size under pressure */
2640 if (kind == SK_MEM_RECV) {
2641 if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot))
2642 return 1;
2643
2644 } else { /* SK_MEM_SEND */
2645 int wmem0 = sk_get_wmem0(sk, prot);
2646
2647 if (sk->sk_type == SOCK_STREAM) {
2648 if (sk->sk_wmem_queued < wmem0)
2649 return 1;
2650 } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) {
2651 return 1;
2652 }
2653 }
2654
2655 if (sk_has_memory_pressure(sk)) {
2656 u64 alloc;
2657
2658 if (!sk_under_memory_pressure(sk))
2659 return 1;
2660 alloc = sk_sockets_allocated_read_positive(sk);
2661 if (sk_prot_mem_limits(sk, 2) > alloc *
2662 sk_mem_pages(sk->sk_wmem_queued +
2663 atomic_read(&sk->sk_rmem_alloc) +
2664 sk->sk_forward_alloc))
2665 return 1;
2666 }
2667
2668 suppress_allocation:
2669
2670 if (kind == SK_MEM_SEND && sk->sk_type == SOCK_STREAM) {
2671 sk_stream_moderate_sndbuf(sk);
2672
2673 /* Fail only if socket is _under_ its sndbuf.
2674 * In this case we cannot block, so that we have to fail.
2675 */
2676 if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
2677 return 1;
2678 }
2679
2680 if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
2681 trace_sock_exceed_buf_limit(sk, prot, allocated, kind);
2682
2683 sk_memory_allocated_sub(sk, amt);
2684
2685 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2686 mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
2687
2688 return 0;
2689 }
2690 EXPORT_SYMBOL(__sk_mem_raise_allocated);
2691
2692 /**
2693 * __sk_mem_schedule - increase sk_forward_alloc and memory_allocated
2694 * @sk: socket
2695 * @size: memory size to allocate
2696 * @kind: allocation type
2697 *
2698 * If kind is SK_MEM_SEND, it means wmem allocation. Otherwise it means
2699 * rmem allocation. This function assumes that protocols which have
2700 * memory_pressure use sk_wmem_queued as write buffer accounting.
2701 */
__sk_mem_schedule(struct sock *sk, int size, int kind)2702 int __sk_mem_schedule(struct sock *sk, int size, int kind)
2703 {
2704 int ret, amt = sk_mem_pages(size);
2705
2706 sk->sk_forward_alloc += amt << SK_MEM_QUANTUM_SHIFT;
2707 ret = __sk_mem_raise_allocated(sk, size, amt, kind);
2708 if (!ret)
2709 sk->sk_forward_alloc -= amt << SK_MEM_QUANTUM_SHIFT;
2710 return ret;
2711 }
2712 EXPORT_SYMBOL(__sk_mem_schedule);
2713
2714 /**
2715 * __sk_mem_reduce_allocated - reclaim memory_allocated
2716 * @sk: socket
2717 * @amount: number of quanta
2718 *
2719 * Similar to __sk_mem_reclaim(), but does not update sk_forward_alloc
2720 */
__sk_mem_reduce_allocated(struct sock *sk, int amount)2721 void __sk_mem_reduce_allocated(struct sock *sk, int amount)
2722 {
2723 sk_memory_allocated_sub(sk, amount);
2724
2725 if (mem_cgroup_sockets_enabled && sk->sk_memcg)
2726 mem_cgroup_uncharge_skmem(sk->sk_memcg, amount);
2727
2728 if (sk_under_global_memory_pressure(sk) &&
2729 (sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)))
2730 sk_leave_memory_pressure(sk);
2731 }
2732 EXPORT_SYMBOL(__sk_mem_reduce_allocated);
2733
2734 /**
2735 * __sk_mem_reclaim - reclaim sk_forward_alloc and memory_allocated
2736 * @sk: socket
2737 * @amount: number of bytes (rounded down to a SK_MEM_QUANTUM multiple)
2738 */
__sk_mem_reclaim(struct sock *sk, int amount)2739 void __sk_mem_reclaim(struct sock *sk, int amount)
2740 {
2741 amount >>= SK_MEM_QUANTUM_SHIFT;
2742 sk->sk_forward_alloc -= amount << SK_MEM_QUANTUM_SHIFT;
2743 __sk_mem_reduce_allocated(sk, amount);
2744 }
2745 EXPORT_SYMBOL(__sk_mem_reclaim);
2746
sk_set_peek_off(struct sock *sk, int val)2747 int sk_set_peek_off(struct sock *sk, int val)
2748 {
2749 WRITE_ONCE(sk->sk_peek_off, val);
2750 return 0;
2751 }
2752 EXPORT_SYMBOL_GPL(sk_set_peek_off);
2753
2754 /*
2755 * Set of default routines for initialising struct proto_ops when
2756 * the protocol does not support a particular function. In certain
2757 * cases where it makes no sense for a protocol to have a "do nothing"
2758 * function, some default processing is provided.
2759 */
2760
sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)2761 int sock_no_bind(struct socket *sock, struct sockaddr *saddr, int len)
2762 {
2763 return -EOPNOTSUPP;
2764 }
2765 EXPORT_SYMBOL(sock_no_bind);
2766
sock_no_connect(struct socket *sock, struct sockaddr *saddr, int len, int flags)2767 int sock_no_connect(struct socket *sock, struct sockaddr *saddr,
2768 int len, int flags)
2769 {
2770 return -EOPNOTSUPP;
2771 }
2772 EXPORT_SYMBOL(sock_no_connect);
2773
sock_no_socketpair(struct socket *sock1, struct socket *sock2)2774 int sock_no_socketpair(struct socket *sock1, struct socket *sock2)
2775 {
2776 return -EOPNOTSUPP;
2777 }
2778 EXPORT_SYMBOL(sock_no_socketpair);
2779
sock_no_accept(struct socket *sock, struct socket *newsock, int flags, bool kern)2780 int sock_no_accept(struct socket *sock, struct socket *newsock, int flags,
2781 bool kern)
2782 {
2783 return -EOPNOTSUPP;
2784 }
2785 EXPORT_SYMBOL(sock_no_accept);
2786
sock_no_getname(struct socket *sock, struct sockaddr *saddr, int peer)2787 int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
2788 int peer)
2789 {
2790 return -EOPNOTSUPP;
2791 }
2792 EXPORT_SYMBOL(sock_no_getname);
2793
sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)2794 int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2795 {
2796 return -EOPNOTSUPP;
2797 }
2798 EXPORT_SYMBOL(sock_no_ioctl);
2799
sock_no_listen(struct socket *sock, int backlog)2800 int sock_no_listen(struct socket *sock, int backlog)
2801 {
2802 return -EOPNOTSUPP;
2803 }
2804 EXPORT_SYMBOL(sock_no_listen);
2805
sock_no_shutdown(struct socket *sock, int how)2806 int sock_no_shutdown(struct socket *sock, int how)
2807 {
2808 return -EOPNOTSUPP;
2809 }
2810 EXPORT_SYMBOL(sock_no_shutdown);
2811
sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)2812 int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len)
2813 {
2814 return -EOPNOTSUPP;
2815 }
2816 EXPORT_SYMBOL(sock_no_sendmsg);
2817
sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)2818 int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len)
2819 {
2820 return -EOPNOTSUPP;
2821 }
2822 EXPORT_SYMBOL(sock_no_sendmsg_locked);
2823
sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags)2824 int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len,
2825 int flags)
2826 {
2827 return -EOPNOTSUPP;
2828 }
2829 EXPORT_SYMBOL(sock_no_recvmsg);
2830
sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)2831 int sock_no_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
2832 {
2833 /* Mirror missing mmap method error code */
2834 return -ENODEV;
2835 }
2836 EXPORT_SYMBOL(sock_no_mmap);
2837
2838 /*
2839 * When a file is received (via SCM_RIGHTS, etc), we must bump the
2840 * various sock-based usage counts.
2841 */
__receive_sock(struct file *file)2842 void __receive_sock(struct file *file)
2843 {
2844 struct socket *sock;
2845 int error;
2846
2847 /*
2848 * The resulting value of "error" is ignored here since we only
2849 * need to take action when the file is a socket and testing
2850 * "sock" for NULL is sufficient.
2851 */
2852 sock = sock_from_file(file, &error);
2853 if (sock) {
2854 sock_update_netprioidx(&sock->sk->sk_cgrp_data);
2855 sock_update_classid(&sock->sk->sk_cgrp_data);
2856 }
2857 }
2858
sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)2859 ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
2860 {
2861 ssize_t res;
2862 struct msghdr msg = {.msg_flags = flags};
2863 struct kvec iov;
2864 char *kaddr = kmap(page);
2865 iov.iov_base = kaddr + offset;
2866 iov.iov_len = size;
2867 res = kernel_sendmsg(sock, &msg, &iov, 1, size);
2868 kunmap(page);
2869 return res;
2870 }
2871 EXPORT_SYMBOL(sock_no_sendpage);
2872
sock_no_sendpage_locked(struct sock *sk, struct page *page, int offset, size_t size, int flags)2873 ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page,
2874 int offset, size_t size, int flags)
2875 {
2876 ssize_t res;
2877 struct msghdr msg = {.msg_flags = flags};
2878 struct kvec iov;
2879 char *kaddr = kmap(page);
2880
2881 iov.iov_base = kaddr + offset;
2882 iov.iov_len = size;
2883 res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size);
2884 kunmap(page);
2885 return res;
2886 }
2887 EXPORT_SYMBOL(sock_no_sendpage_locked);
2888
2889 /*
2890 * Default Socket Callbacks
2891 */
2892
sock_def_wakeup(struct sock *sk)2893 static void sock_def_wakeup(struct sock *sk)
2894 {
2895 struct socket_wq *wq;
2896
2897 rcu_read_lock();
2898 wq = rcu_dereference(sk->sk_wq);
2899 if (skwq_has_sleeper(wq))
2900 wake_up_interruptible_all(&wq->wait);
2901 rcu_read_unlock();
2902 }
2903
sock_def_error_report(struct sock *sk)2904 static void sock_def_error_report(struct sock *sk)
2905 {
2906 struct socket_wq *wq;
2907
2908 rcu_read_lock();
2909 wq = rcu_dereference(sk->sk_wq);
2910 if (skwq_has_sleeper(wq))
2911 wake_up_interruptible_poll(&wq->wait, EPOLLERR);
2912 sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
2913 rcu_read_unlock();
2914 }
2915
sock_def_readable(struct sock *sk)2916 void sock_def_readable(struct sock *sk)
2917 {
2918 struct socket_wq *wq;
2919
2920 rcu_read_lock();
2921 wq = rcu_dereference(sk->sk_wq);
2922 if (skwq_has_sleeper(wq))
2923 wake_up_interruptible_sync_poll(&wq->wait, EPOLLIN | EPOLLPRI |
2924 EPOLLRDNORM | EPOLLRDBAND);
2925 sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
2926 rcu_read_unlock();
2927 }
2928
sock_def_write_space(struct sock *sk)2929 static void sock_def_write_space(struct sock *sk)
2930 {
2931 struct socket_wq *wq;
2932
2933 rcu_read_lock();
2934
2935 /* Do not wake up a writer until he can make "significant"
2936 * progress. --DaveM
2937 */
2938 if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
2939 wq = rcu_dereference(sk->sk_wq);
2940 if (skwq_has_sleeper(wq))
2941 wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
2942 EPOLLWRNORM | EPOLLWRBAND);
2943
2944 /* Should agree with poll, otherwise some programs break */
2945 if (sock_writeable(sk))
2946 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
2947 }
2948
2949 rcu_read_unlock();
2950 }
2951
sock_def_destruct(struct sock *sk)2952 static void sock_def_destruct(struct sock *sk)
2953 {
2954 }
2955
sk_send_sigurg(struct sock *sk)2956 void sk_send_sigurg(struct sock *sk)
2957 {
2958 if (sk->sk_socket && sk->sk_socket->file)
2959 if (send_sigurg(&sk->sk_socket->file->f_owner))
2960 sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI);
2961 }
2962 EXPORT_SYMBOL(sk_send_sigurg);
2963
sk_reset_timer(struct sock *sk, struct timer_list* timer, unsigned long expires)2964 void sk_reset_timer(struct sock *sk, struct timer_list* timer,
2965 unsigned long expires)
2966 {
2967 if (!mod_timer(timer, expires))
2968 sock_hold(sk);
2969 }
2970 EXPORT_SYMBOL(sk_reset_timer);
2971
sk_stop_timer(struct sock *sk, struct timer_list* timer)2972 void sk_stop_timer(struct sock *sk, struct timer_list* timer)
2973 {
2974 if (del_timer(timer))
2975 __sock_put(sk);
2976 }
2977 EXPORT_SYMBOL(sk_stop_timer);
2978
sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)2979 void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
2980 {
2981 if (del_timer_sync(timer))
2982 __sock_put(sk);
2983 }
2984 EXPORT_SYMBOL(sk_stop_timer_sync);
2985
sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)2986 void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
2987 {
2988 sk_init_common(sk);
2989 sk->sk_send_head = NULL;
2990
2991 timer_setup(&sk->sk_timer, NULL, 0);
2992
2993 sk->sk_allocation = GFP_KERNEL;
2994 sk->sk_rcvbuf = READ_ONCE(sysctl_rmem_default);
2995 sk->sk_sndbuf = READ_ONCE(sysctl_wmem_default);
2996 sk->sk_state = TCP_CLOSE;
2997 sk_set_socket(sk, sock);
2998
2999 sock_set_flag(sk, SOCK_ZAPPED);
3000
3001 if (sock) {
3002 sk->sk_type = sock->type;
3003 RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
3004 sock->sk = sk;
3005 } else {
3006 RCU_INIT_POINTER(sk->sk_wq, NULL);
3007 }
3008 sk->sk_uid = uid;
3009
3010 rwlock_init(&sk->sk_callback_lock);
3011 if (sk->sk_kern_sock)
3012 lockdep_set_class_and_name(
3013 &sk->sk_callback_lock,
3014 af_kern_callback_keys + sk->sk_family,
3015 af_family_kern_clock_key_strings[sk->sk_family]);
3016 else
3017 lockdep_set_class_and_name(
3018 &sk->sk_callback_lock,
3019 af_callback_keys + sk->sk_family,
3020 af_family_clock_key_strings[sk->sk_family]);
3021
3022 sk->sk_state_change = sock_def_wakeup;
3023 sk->sk_data_ready = sock_def_readable;
3024 sk->sk_write_space = sock_def_write_space;
3025 sk->sk_error_report = sock_def_error_report;
3026 sk->sk_destruct = sock_def_destruct;
3027
3028 sk->sk_frag.page = NULL;
3029 sk->sk_frag.offset = 0;
3030 sk->sk_peek_off = -1;
3031
3032 sk->sk_peer_pid = NULL;
3033 sk->sk_peer_cred = NULL;
3034 spin_lock_init(&sk->sk_peer_lock);
3035
3036 sk->sk_write_pending = 0;
3037 sk->sk_rcvlowat = 1;
3038 sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
3039 sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
3040
3041 sk->sk_stamp = SK_DEFAULT_STAMP;
3042 #if BITS_PER_LONG==32
3043 seqlock_init(&sk->sk_stamp_seq);
3044 #endif
3045 atomic_set(&sk->sk_zckey, 0);
3046
3047 #ifdef CONFIG_NET_RX_BUSY_POLL
3048 sk->sk_napi_id = 0;
3049 sk->sk_ll_usec = READ_ONCE(sysctl_net_busy_read);
3050 #endif
3051
3052 sk->sk_max_pacing_rate = ~0UL;
3053 sk->sk_pacing_rate = ~0UL;
3054 WRITE_ONCE(sk->sk_pacing_shift, 10);
3055 sk->sk_incoming_cpu = -1;
3056
3057 sk_rx_queue_clear(sk);
3058 /*
3059 * Before updating sk_refcnt, we must commit prior changes to memory
3060 * (Documentation/RCU/rculist_nulls.rst for details)
3061 */
3062 smp_wmb();
3063 refcount_set(&sk->sk_refcnt, 1);
3064 atomic_set(&sk->sk_drops, 0);
3065 }
3066 EXPORT_SYMBOL(sock_init_data_uid);
3067
sock_init_data(struct socket *sock, struct sock *sk)3068 void sock_init_data(struct socket *sock, struct sock *sk)
3069 {
3070 kuid_t uid = sock ?
3071 SOCK_INODE(sock)->i_uid :
3072 make_kuid(sock_net(sk)->user_ns, 0);
3073
3074 sock_init_data_uid(sock, sk, uid);
3075 }
3076 EXPORT_SYMBOL(sock_init_data);
3077
lock_sock_nested(struct sock *sk, int subclass)3078 void lock_sock_nested(struct sock *sk, int subclass)
3079 {
3080 might_sleep();
3081 spin_lock_bh(&sk->sk_lock.slock);
3082 if (sk->sk_lock.owned)
3083 __lock_sock(sk);
3084 sk->sk_lock.owned = 1;
3085 spin_unlock(&sk->sk_lock.slock);
3086 /*
3087 * The sk_lock has mutex_lock() semantics here:
3088 */
3089 mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_);
3090 local_bh_enable();
3091 }
3092 EXPORT_SYMBOL(lock_sock_nested);
3093
release_sock(struct sock *sk)3094 void release_sock(struct sock *sk)
3095 {
3096 spin_lock_bh(&sk->sk_lock.slock);
3097 if (sk->sk_backlog.tail)
3098 __release_sock(sk);
3099
3100 /* Warning : release_cb() might need to release sk ownership,
3101 * ie call sock_release_ownership(sk) before us.
3102 */
3103 if (sk->sk_prot->release_cb)
3104 sk->sk_prot->release_cb(sk);
3105
3106 sock_release_ownership(sk);
3107 if (waitqueue_active(&sk->sk_lock.wq))
3108 wake_up(&sk->sk_lock.wq);
3109 spin_unlock_bh(&sk->sk_lock.slock);
3110 }
3111 EXPORT_SYMBOL(release_sock);
3112
3113 /**
3114 * lock_sock_fast - fast version of lock_sock
3115 * @sk: socket
3116 *
3117 * This version should be used for very small section, where process wont block
3118 * return false if fast path is taken:
3119 *
3120 * sk_lock.slock locked, owned = 0, BH disabled
3121 *
3122 * return true if slow path is taken:
3123 *
3124 * sk_lock.slock unlocked, owned = 1, BH enabled
3125 */
lock_sock_fast(struct sock *sk)3126 bool lock_sock_fast(struct sock *sk)
3127 {
3128 might_sleep();
3129 spin_lock_bh(&sk->sk_lock.slock);
3130
3131 if (!sk->sk_lock.owned)
3132 /*
3133 * Note : We must disable BH
3134 */
3135 return false;
3136
3137 __lock_sock(sk);
3138 sk->sk_lock.owned = 1;
3139 spin_unlock(&sk->sk_lock.slock);
3140 /*
3141 * The sk_lock has mutex_lock() semantics here:
3142 */
3143 mutex_acquire(&sk->sk_lock.dep_map, 0, 0, _RET_IP_);
3144 local_bh_enable();
3145 return true;
3146 }
3147 EXPORT_SYMBOL(lock_sock_fast);
3148
sock_gettstamp(struct socket *sock, void __user *userstamp, bool timeval, bool time32)3149 int sock_gettstamp(struct socket *sock, void __user *userstamp,
3150 bool timeval, bool time32)
3151 {
3152 struct sock *sk = sock->sk;
3153 struct timespec64 ts;
3154
3155 sock_enable_timestamp(sk, SOCK_TIMESTAMP);
3156 ts = ktime_to_timespec64(sock_read_timestamp(sk));
3157 if (ts.tv_sec == -1)
3158 return -ENOENT;
3159 if (ts.tv_sec == 0) {
3160 ktime_t kt = ktime_get_real();
3161 sock_write_timestamp(sk, kt);
3162 ts = ktime_to_timespec64(kt);
3163 }
3164
3165 if (timeval)
3166 ts.tv_nsec /= 1000;
3167
3168 #ifdef CONFIG_COMPAT_32BIT_TIME
3169 if (time32)
3170 return put_old_timespec32(&ts, userstamp);
3171 #endif
3172 #ifdef CONFIG_SPARC64
3173 /* beware of padding in sparc64 timeval */
3174 if (timeval && !in_compat_syscall()) {
3175 struct __kernel_old_timeval __user tv = {
3176 .tv_sec = ts.tv_sec,
3177 .tv_usec = ts.tv_nsec,
3178 };
3179 if (copy_to_user(userstamp, &tv, sizeof(tv)))
3180 return -EFAULT;
3181 return 0;
3182 }
3183 #endif
3184 return put_timespec64(&ts, userstamp);
3185 }
3186 EXPORT_SYMBOL(sock_gettstamp);
3187
sock_enable_timestamp(struct sock *sk, enum sock_flags flag)3188 void sock_enable_timestamp(struct sock *sk, enum sock_flags flag)
3189 {
3190 if (!sock_flag(sk, flag)) {
3191 unsigned long previous_flags = sk->sk_flags;
3192
3193 sock_set_flag(sk, flag);
3194 /*
3195 * we just set one of the two flags which require net
3196 * time stamping, but time stamping might have been on
3197 * already because of the other one
3198 */
3199 if (sock_needs_netstamp(sk) &&
3200 !(previous_flags & SK_FLAGS_TIMESTAMP))
3201 net_enable_timestamp();
3202 }
3203 }
3204
sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level, int type)3205 int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
3206 int level, int type)
3207 {
3208 struct sock_exterr_skb *serr;
3209 struct sk_buff *skb;
3210 int copied, err;
3211
3212 err = -EAGAIN;
3213 skb = sock_dequeue_err_skb(sk);
3214 if (skb == NULL)
3215 goto out;
3216
3217 copied = skb->len;
3218 if (copied > len) {
3219 msg->msg_flags |= MSG_TRUNC;
3220 copied = len;
3221 }
3222 err = skb_copy_datagram_msg(skb, 0, msg, copied);
3223 if (err)
3224 goto out_free_skb;
3225
3226 sock_recv_timestamp(msg, sk, skb);
3227
3228 serr = SKB_EXT_ERR(skb);
3229 put_cmsg(msg, level, type, sizeof(serr->ee), &serr->ee);
3230
3231 msg->msg_flags |= MSG_ERRQUEUE;
3232 err = copied;
3233
3234 out_free_skb:
3235 kfree_skb(skb);
3236 out:
3237 return err;
3238 }
3239 EXPORT_SYMBOL(sock_recv_errqueue);
3240
3241 /*
3242 * Get a socket option on an socket.
3243 *
3244 * FIX: POSIX 1003.1g is very ambiguous here. It states that
3245 * asynchronous errors should be reported by getsockopt. We assume
3246 * this means if you specify SO_ERROR (otherwise whats the point of it).
3247 */
sock_common_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen)3248 int sock_common_getsockopt(struct socket *sock, int level, int optname,
3249 char __user *optval, int __user *optlen)
3250 {
3251 struct sock *sk = sock->sk;
3252
3253 /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3254 return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen);
3255 }
3256 EXPORT_SYMBOL(sock_common_getsockopt);
3257
sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags)3258 int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
3259 int flags)
3260 {
3261 struct sock *sk = sock->sk;
3262 int addr_len = 0;
3263 int err;
3264
3265 err = sk->sk_prot->recvmsg(sk, msg, size, flags & MSG_DONTWAIT,
3266 flags & ~MSG_DONTWAIT, &addr_len);
3267 if (err >= 0)
3268 msg->msg_namelen = addr_len;
3269 return err;
3270 }
3271 EXPORT_SYMBOL(sock_common_recvmsg);
3272
3273 /*
3274 * Set socket options on an inet socket.
3275 */
sock_common_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen)3276 int sock_common_setsockopt(struct socket *sock, int level, int optname,
3277 sockptr_t optval, unsigned int optlen)
3278 {
3279 struct sock *sk = sock->sk;
3280
3281 /* IPV6_ADDRFORM can change sk->sk_prot under us. */
3282 return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen);
3283 }
3284 EXPORT_SYMBOL(sock_common_setsockopt);
3285
sk_common_release(struct sock *sk)3286 void sk_common_release(struct sock *sk)
3287 {
3288 if (sk->sk_prot->destroy)
3289 sk->sk_prot->destroy(sk);
3290
3291 /*
3292 * Observation: when sk_common_release is called, processes have
3293 * no access to socket. But net still has.
3294 * Step one, detach it from networking:
3295 *
3296 * A. Remove from hash tables.
3297 */
3298
3299 sk->sk_prot->unhash(sk);
3300
3301 /*
3302 * In this point socket cannot receive new packets, but it is possible
3303 * that some packets are in flight because some CPU runs receiver and
3304 * did hash table lookup before we unhashed socket. They will achieve
3305 * receive queue and will be purged by socket destructor.
3306 *
3307 * Also we still have packets pending on receive queue and probably,
3308 * our own packets waiting in device queues. sock_destroy will drain
3309 * receive queue, but transmitted packets will delay socket destruction
3310 * until the last reference will be released.
3311 */
3312
3313 sock_orphan(sk);
3314
3315 xfrm_sk_free_policy(sk);
3316
3317 sk_refcnt_debug_release(sk);
3318
3319 sock_put(sk);
3320 }
3321 EXPORT_SYMBOL(sk_common_release);
3322
sk_get_meminfo(const struct sock *sk, u32 *mem)3323 void sk_get_meminfo(const struct sock *sk, u32 *mem)
3324 {
3325 memset(mem, 0, sizeof(*mem) * SK_MEMINFO_VARS);
3326
3327 mem[SK_MEMINFO_RMEM_ALLOC] = sk_rmem_alloc_get(sk);
3328 mem[SK_MEMINFO_RCVBUF] = READ_ONCE(sk->sk_rcvbuf);
3329 mem[SK_MEMINFO_WMEM_ALLOC] = sk_wmem_alloc_get(sk);
3330 mem[SK_MEMINFO_SNDBUF] = READ_ONCE(sk->sk_sndbuf);
3331 mem[SK_MEMINFO_FWD_ALLOC] = sk->sk_forward_alloc;
3332 mem[SK_MEMINFO_WMEM_QUEUED] = READ_ONCE(sk->sk_wmem_queued);
3333 mem[SK_MEMINFO_OPTMEM] = atomic_read(&sk->sk_omem_alloc);
3334 mem[SK_MEMINFO_BACKLOG] = READ_ONCE(sk->sk_backlog.len);
3335 mem[SK_MEMINFO_DROPS] = atomic_read(&sk->sk_drops);
3336 }
3337
3338 #ifdef CONFIG_PROC_FS
3339 #define PROTO_INUSE_NR 64 /* should be enough for the first time */
3340 struct prot_inuse {
3341 int val[PROTO_INUSE_NR];
3342 };
3343
3344 static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR);
3345
sock_prot_inuse_add(struct net *net, struct proto *prot, int val)3346 void sock_prot_inuse_add(struct net *net, struct proto *prot, int val)
3347 {
3348 __this_cpu_add(net->core.prot_inuse->val[prot->inuse_idx], val);
3349 }
3350 EXPORT_SYMBOL_GPL(sock_prot_inuse_add);
3351
sock_prot_inuse_get(struct net *net, struct proto *prot)3352 int sock_prot_inuse_get(struct net *net, struct proto *prot)
3353 {
3354 int cpu, idx = prot->inuse_idx;
3355 int res = 0;
3356
3357 for_each_possible_cpu(cpu)
3358 res += per_cpu_ptr(net->core.prot_inuse, cpu)->val[idx];
3359
3360 return res >= 0 ? res : 0;
3361 }
3362 EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
3363
sock_inuse_add(struct net *net, int val)3364 static void sock_inuse_add(struct net *net, int val)
3365 {
3366 this_cpu_add(*net->core.sock_inuse, val);
3367 }
3368
sock_inuse_get(struct net *net)3369 int sock_inuse_get(struct net *net)
3370 {
3371 int cpu, res = 0;
3372
3373 for_each_possible_cpu(cpu)
3374 res += *per_cpu_ptr(net->core.sock_inuse, cpu);
3375
3376 return res;
3377 }
3378
3379 EXPORT_SYMBOL_GPL(sock_inuse_get);
3380
sock_inuse_init_net(struct net *net)3381 static int __net_init sock_inuse_init_net(struct net *net)
3382 {
3383 net->core.prot_inuse = alloc_percpu(struct prot_inuse);
3384 if (net->core.prot_inuse == NULL)
3385 return -ENOMEM;
3386
3387 net->core.sock_inuse = alloc_percpu(int);
3388 if (net->core.sock_inuse == NULL)
3389 goto out;
3390
3391 return 0;
3392
3393 out:
3394 free_percpu(net->core.prot_inuse);
3395 return -ENOMEM;
3396 }
3397
sock_inuse_exit_net(struct net *net)3398 static void __net_exit sock_inuse_exit_net(struct net *net)
3399 {
3400 free_percpu(net->core.prot_inuse);
3401 free_percpu(net->core.sock_inuse);
3402 }
3403
3404 static struct pernet_operations net_inuse_ops = {
3405 .init = sock_inuse_init_net,
3406 .exit = sock_inuse_exit_net,
3407 };
3408
net_inuse_init(void)3409 static __init int net_inuse_init(void)
3410 {
3411 if (register_pernet_subsys(&net_inuse_ops))
3412 panic("Cannot initialize net inuse counters");
3413
3414 return 0;
3415 }
3416
3417 core_initcall(net_inuse_init);
3418
assign_proto_idx(struct proto *prot)3419 static int assign_proto_idx(struct proto *prot)
3420 {
3421 prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR);
3422
3423 if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) {
3424 pr_err("PROTO_INUSE_NR exhausted\n");
3425 return -ENOSPC;
3426 }
3427
3428 set_bit(prot->inuse_idx, proto_inuse_idx);
3429 return 0;
3430 }
3431
release_proto_idx(struct proto *prot)3432 static void release_proto_idx(struct proto *prot)
3433 {
3434 if (prot->inuse_idx != PROTO_INUSE_NR - 1)
3435 clear_bit(prot->inuse_idx, proto_inuse_idx);
3436 }
3437 #else
assign_proto_idx(struct proto *prot)3438 static inline int assign_proto_idx(struct proto *prot)
3439 {
3440 return 0;
3441 }
3442
release_proto_idx(struct proto *prot)3443 static inline void release_proto_idx(struct proto *prot)
3444 {
3445 }
3446
sock_inuse_add(struct net *net, int val)3447 static void sock_inuse_add(struct net *net, int val)
3448 {
3449 }
3450 #endif
3451
tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)3452 static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
3453 {
3454 if (!twsk_prot)
3455 return;
3456 kfree(twsk_prot->twsk_slab_name);
3457 twsk_prot->twsk_slab_name = NULL;
3458 kmem_cache_destroy(twsk_prot->twsk_slab);
3459 twsk_prot->twsk_slab = NULL;
3460 }
3461
req_prot_cleanup(struct request_sock_ops *rsk_prot)3462 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
3463 {
3464 if (!rsk_prot)
3465 return;
3466 kfree(rsk_prot->slab_name);
3467 rsk_prot->slab_name = NULL;
3468 kmem_cache_destroy(rsk_prot->slab);
3469 rsk_prot->slab = NULL;
3470 }
3471
req_prot_init(const struct proto *prot)3472 static int req_prot_init(const struct proto *prot)
3473 {
3474 struct request_sock_ops *rsk_prot = prot->rsk_prot;
3475
3476 if (!rsk_prot)
3477 return 0;
3478
3479 rsk_prot->slab_name = kasprintf(GFP_KERNEL, "request_sock_%s",
3480 prot->name);
3481 if (!rsk_prot->slab_name)
3482 return -ENOMEM;
3483
3484 rsk_prot->slab = kmem_cache_create(rsk_prot->slab_name,
3485 rsk_prot->obj_size, 0,
3486 SLAB_ACCOUNT | prot->slab_flags,
3487 NULL);
3488
3489 if (!rsk_prot->slab) {
3490 pr_crit("%s: Can't create request sock SLAB cache!\n",
3491 prot->name);
3492 return -ENOMEM;
3493 }
3494 return 0;
3495 }
3496
proto_register(struct proto *prot, int alloc_slab)3497 int proto_register(struct proto *prot, int alloc_slab)
3498 {
3499 int ret = -ENOBUFS;
3500
3501 if (alloc_slab) {
3502 prot->slab = kmem_cache_create_usercopy(prot->name,
3503 prot->obj_size, 0,
3504 SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT |
3505 prot->slab_flags,
3506 prot->useroffset, prot->usersize,
3507 NULL);
3508
3509 if (prot->slab == NULL) {
3510 pr_crit("%s: Can't create sock SLAB cache!\n",
3511 prot->name);
3512 goto out;
3513 }
3514
3515 if (req_prot_init(prot))
3516 goto out_free_request_sock_slab;
3517
3518 if (prot->twsk_prot != NULL) {
3519 prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
3520
3521 if (prot->twsk_prot->twsk_slab_name == NULL)
3522 goto out_free_request_sock_slab;
3523
3524 prot->twsk_prot->twsk_slab =
3525 kmem_cache_create(prot->twsk_prot->twsk_slab_name,
3526 prot->twsk_prot->twsk_obj_size,
3527 0,
3528 SLAB_ACCOUNT |
3529 prot->slab_flags,
3530 NULL);
3531 if (prot->twsk_prot->twsk_slab == NULL)
3532 goto out_free_timewait_sock_slab;
3533 }
3534 }
3535
3536 mutex_lock(&proto_list_mutex);
3537 ret = assign_proto_idx(prot);
3538 if (ret) {
3539 mutex_unlock(&proto_list_mutex);
3540 goto out_free_timewait_sock_slab;
3541 }
3542 list_add(&prot->node, &proto_list);
3543 mutex_unlock(&proto_list_mutex);
3544 return ret;
3545
3546 out_free_timewait_sock_slab:
3547 if (alloc_slab && prot->twsk_prot)
3548 tw_prot_cleanup(prot->twsk_prot);
3549 out_free_request_sock_slab:
3550 if (alloc_slab) {
3551 req_prot_cleanup(prot->rsk_prot);
3552
3553 kmem_cache_destroy(prot->slab);
3554 prot->slab = NULL;
3555 }
3556 out:
3557 return ret;
3558 }
3559 EXPORT_SYMBOL(proto_register);
3560
proto_unregister(struct proto *prot)3561 void proto_unregister(struct proto *prot)
3562 {
3563 mutex_lock(&proto_list_mutex);
3564 release_proto_idx(prot);
3565 list_del(&prot->node);
3566 mutex_unlock(&proto_list_mutex);
3567
3568 kmem_cache_destroy(prot->slab);
3569 prot->slab = NULL;
3570
3571 req_prot_cleanup(prot->rsk_prot);
3572 tw_prot_cleanup(prot->twsk_prot);
3573 }
3574 EXPORT_SYMBOL(proto_unregister);
3575
sock_load_diag_module(int family, int protocol)3576 int sock_load_diag_module(int family, int protocol)
3577 {
3578 if (!protocol) {
3579 if (!sock_is_registered(family))
3580 return -ENOENT;
3581
3582 return request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
3583 NETLINK_SOCK_DIAG, family);
3584 }
3585
3586 #ifdef CONFIG_INET
3587 if (family == AF_INET &&
3588 protocol != IPPROTO_RAW &&
3589 protocol < MAX_INET_PROTOS &&
3590 !rcu_access_pointer(inet_protos[protocol]))
3591 return -ENOENT;
3592 #endif
3593
3594 return request_module("net-pf-%d-proto-%d-type-%d-%d", PF_NETLINK,
3595 NETLINK_SOCK_DIAG, family, protocol);
3596 }
3597 EXPORT_SYMBOL(sock_load_diag_module);
3598
3599 #ifdef CONFIG_PROC_FS
3600 static void *proto_seq_start(struct seq_file *seq, loff_t *pos)
__acquiresnull3601 __acquires(proto_list_mutex)
3602 {
3603 mutex_lock(&proto_list_mutex);
3604 return seq_list_start_head(&proto_list, *pos);
3605 }
3606
proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)3607 static void *proto_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3608 {
3609 return seq_list_next(v, &proto_list, pos);
3610 }
3611
3612 static void proto_seq_stop(struct seq_file *seq, void *v)
__releasesnull3613 __releases(proto_list_mutex)
3614 {
3615 mutex_unlock(&proto_list_mutex);
3616 }
3617
proto_method_implemented(const void *method)3618 static char proto_method_implemented(const void *method)
3619 {
3620 return method == NULL ? 'n' : 'y';
3621 }
sock_prot_memory_allocated(struct proto *proto)3622 static long sock_prot_memory_allocated(struct proto *proto)
3623 {
3624 return proto->memory_allocated != NULL ? proto_memory_allocated(proto) : -1L;
3625 }
3626
sock_prot_memory_pressure(struct proto *proto)3627 static const char *sock_prot_memory_pressure(struct proto *proto)
3628 {
3629 return proto->memory_pressure != NULL ?
3630 proto_memory_pressure(proto) ? "yes" : "no" : "NI";
3631 }
3632
proto_seq_printf(struct seq_file *seq, struct proto *proto)3633 static void proto_seq_printf(struct seq_file *seq, struct proto *proto)
3634 {
3635
3636 seq_printf(seq, "%-9s %4u %6d %6ld %-3s %6u %-3s %-10s "
3637 "%2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c %2c\n",
3638 proto->name,
3639 proto->obj_size,
3640 sock_prot_inuse_get(seq_file_net(seq), proto),
3641 sock_prot_memory_allocated(proto),
3642 sock_prot_memory_pressure(proto),
3643 proto->max_header,
3644 proto->slab == NULL ? "no" : "yes",
3645 module_name(proto->owner),
3646 proto_method_implemented(proto->close),
3647 proto_method_implemented(proto->connect),
3648 proto_method_implemented(proto->disconnect),
3649 proto_method_implemented(proto->accept),
3650 proto_method_implemented(proto->ioctl),
3651 proto_method_implemented(proto->init),
3652 proto_method_implemented(proto->destroy),
3653 proto_method_implemented(proto->shutdown),
3654 proto_method_implemented(proto->setsockopt),
3655 proto_method_implemented(proto->getsockopt),
3656 proto_method_implemented(proto->sendmsg),
3657 proto_method_implemented(proto->recvmsg),
3658 proto_method_implemented(proto->sendpage),
3659 proto_method_implemented(proto->bind),
3660 proto_method_implemented(proto->backlog_rcv),
3661 proto_method_implemented(proto->hash),
3662 proto_method_implemented(proto->unhash),
3663 proto_method_implemented(proto->get_port),
3664 proto_method_implemented(proto->enter_memory_pressure));
3665 }
3666
proto_seq_show(struct seq_file *seq, void *v)3667 static int proto_seq_show(struct seq_file *seq, void *v)
3668 {
3669 if (v == &proto_list)
3670 seq_printf(seq, "%-9s %-4s %-8s %-6s %-5s %-7s %-4s %-10s %s",
3671 "protocol",
3672 "size",
3673 "sockets",
3674 "memory",
3675 "press",
3676 "maxhdr",
3677 "slab",
3678 "module",
3679 "cl co di ac io in de sh ss gs se re sp bi br ha uh gp em\n");
3680 else
3681 proto_seq_printf(seq, list_entry(v, struct proto, node));
3682 return 0;
3683 }
3684
3685 static const struct seq_operations proto_seq_ops = {
3686 .start = proto_seq_start,
3687 .next = proto_seq_next,
3688 .stop = proto_seq_stop,
3689 .show = proto_seq_show,
3690 };
3691
proto_init_net(struct net *net)3692 static __net_init int proto_init_net(struct net *net)
3693 {
3694 if (!proc_create_net("protocols", 0444, net->proc_net, &proto_seq_ops,
3695 sizeof(struct seq_net_private)))
3696 return -ENOMEM;
3697
3698 return 0;
3699 }
3700
proto_exit_net(struct net *net)3701 static __net_exit void proto_exit_net(struct net *net)
3702 {
3703 remove_proc_entry("protocols", net->proc_net);
3704 }
3705
3706
3707 static __net_initdata struct pernet_operations proto_net_ops = {
3708 .init = proto_init_net,
3709 .exit = proto_exit_net,
3710 };
3711
proto_init(void)3712 static int __init proto_init(void)
3713 {
3714 return register_pernet_subsys(&proto_net_ops);
3715 }
3716
3717 subsys_initcall(proto_init);
3718
3719 #endif /* PROC_FS */
3720
3721 #ifdef CONFIG_NET_RX_BUSY_POLL
sk_busy_loop_end(void *p, unsigned long start_time)3722 bool sk_busy_loop_end(void *p, unsigned long start_time)
3723 {
3724 struct sock *sk = p;
3725
3726 return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
3727 sk_busy_loop_timeout(sk, start_time);
3728 }
3729 EXPORT_SYMBOL(sk_busy_loop_end);
3730 #endif /* CONFIG_NET_RX_BUSY_POLL */
3731
sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)3732 int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
3733 {
3734 if (!sk->sk_prot->bind_add)
3735 return -EOPNOTSUPP;
3736 return sk->sk_prot->bind_add(sk, addr, addr_len);
3737 }
3738 EXPORT_SYMBOL(sock_bind_add);
3739