1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 *	ip6_flowlabel.c		IPv6 flowlabel manager.
4 *
5 *	Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 */
7
8#include <linux/capability.h>
9#include <linux/errno.h>
10#include <linux/types.h>
11#include <linux/socket.h>
12#include <linux/net.h>
13#include <linux/netdevice.h>
14#include <linux/in6.h>
15#include <linux/proc_fs.h>
16#include <linux/seq_file.h>
17#include <linux/slab.h>
18#include <linux/export.h>
19#include <linux/pid_namespace.h>
20#include <linux/jump_label_ratelimit.h>
21
22#include <net/net_namespace.h>
23#include <net/sock.h>
24
25#include <net/ipv6.h>
26#include <net/rawv6.h>
27#include <net/transp_v6.h>
28
29#include <linux/uaccess.h>
30
31#define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified
32				   in old IPv6 RFC. Well, it was reasonable value.
33				 */
34#define FL_MAX_LINGER	150	/* Maximal linger timeout */
35
36/* FL hash table */
37
38#define FL_MAX_PER_SOCK	32
39#define FL_MAX_SIZE	4096
40#define FL_HASH_MASK	255
41#define FL_HASH(l)	(ntohl(l)&FL_HASH_MASK)
42
43static atomic_t fl_size = ATOMIC_INIT(0);
44static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1];
45
46static void ip6_fl_gc(struct timer_list *unused);
47static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc);
48
49/* FL hash table lock: it protects only of GC */
50
51static DEFINE_SPINLOCK(ip6_fl_lock);
52
53/* Big socket sock */
54
55static DEFINE_SPINLOCK(ip6_sk_fl_lock);
56
57DEFINE_STATIC_KEY_DEFERRED_FALSE(ipv6_flowlabel_exclusive, HZ);
58EXPORT_SYMBOL(ipv6_flowlabel_exclusive);
59
60#define for_each_fl_rcu(hash, fl)				\
61	for (fl = rcu_dereference_bh(fl_ht[(hash)]);		\
62	     fl != NULL;					\
63	     fl = rcu_dereference_bh(fl->next))
64#define for_each_fl_continue_rcu(fl)				\
65	for (fl = rcu_dereference_bh(fl->next);			\
66	     fl != NULL;					\
67	     fl = rcu_dereference_bh(fl->next))
68
69#define for_each_sk_fl_rcu(np, sfl)				\
70	for (sfl = rcu_dereference_bh(np->ipv6_fl_list);	\
71	     sfl != NULL;					\
72	     sfl = rcu_dereference_bh(sfl->next))
73
74static inline struct ip6_flowlabel *__fl_lookup(struct net *net, __be32 label)
75{
76	struct ip6_flowlabel *fl;
77
78	for_each_fl_rcu(FL_HASH(label), fl) {
79		if (fl->label == label && net_eq(fl->fl_net, net))
80			return fl;
81	}
82	return NULL;
83}
84
85static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label)
86{
87	struct ip6_flowlabel *fl;
88
89	rcu_read_lock_bh();
90	fl = __fl_lookup(net, label);
91	if (fl && !atomic_inc_not_zero(&fl->users))
92		fl = NULL;
93	rcu_read_unlock_bh();
94	return fl;
95}
96
97static bool fl_shared_exclusive(struct ip6_flowlabel *fl)
98{
99	return fl->share == IPV6_FL_S_EXCL ||
100	       fl->share == IPV6_FL_S_PROCESS ||
101	       fl->share == IPV6_FL_S_USER;
102}
103
104static void fl_free_rcu(struct rcu_head *head)
105{
106	struct ip6_flowlabel *fl = container_of(head, struct ip6_flowlabel, rcu);
107
108	if (fl->share == IPV6_FL_S_PROCESS)
109		put_pid(fl->owner.pid);
110	kfree(fl->opt);
111	kfree(fl);
112}
113
114
115static void fl_free(struct ip6_flowlabel *fl)
116{
117	if (!fl)
118		return;
119
120	if (fl_shared_exclusive(fl) || fl->opt)
121		static_branch_slow_dec_deferred(&ipv6_flowlabel_exclusive);
122
123	call_rcu(&fl->rcu, fl_free_rcu);
124}
125
126static void fl_release(struct ip6_flowlabel *fl)
127{
128	spin_lock_bh(&ip6_fl_lock);
129
130	fl->lastuse = jiffies;
131	if (atomic_dec_and_test(&fl->users)) {
132		unsigned long ttd = fl->lastuse + fl->linger;
133		if (time_after(ttd, fl->expires))
134			fl->expires = ttd;
135		ttd = fl->expires;
136		if (fl->opt && fl->share == IPV6_FL_S_EXCL) {
137			struct ipv6_txoptions *opt = fl->opt;
138			fl->opt = NULL;
139			kfree(opt);
140		}
141		if (!timer_pending(&ip6_fl_gc_timer) ||
142		    time_after(ip6_fl_gc_timer.expires, ttd))
143			mod_timer(&ip6_fl_gc_timer, ttd);
144	}
145	spin_unlock_bh(&ip6_fl_lock);
146}
147
148static void ip6_fl_gc(struct timer_list *unused)
149{
150	int i;
151	unsigned long now = jiffies;
152	unsigned long sched = 0;
153
154	spin_lock(&ip6_fl_lock);
155
156	for (i = 0; i <= FL_HASH_MASK; i++) {
157		struct ip6_flowlabel *fl;
158		struct ip6_flowlabel __rcu **flp;
159
160		flp = &fl_ht[i];
161		while ((fl = rcu_dereference_protected(*flp,
162						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
163			if (atomic_read(&fl->users) == 0) {
164				unsigned long ttd = fl->lastuse + fl->linger;
165				if (time_after(ttd, fl->expires))
166					fl->expires = ttd;
167				ttd = fl->expires;
168				if (time_after_eq(now, ttd)) {
169					*flp = fl->next;
170					fl_free(fl);
171					atomic_dec(&fl_size);
172					continue;
173				}
174				if (!sched || time_before(ttd, sched))
175					sched = ttd;
176			}
177			flp = &fl->next;
178		}
179	}
180	if (!sched && atomic_read(&fl_size))
181		sched = now + FL_MAX_LINGER;
182	if (sched) {
183		mod_timer(&ip6_fl_gc_timer, sched);
184	}
185	spin_unlock(&ip6_fl_lock);
186}
187
188static void __net_exit ip6_fl_purge(struct net *net)
189{
190	int i;
191
192	spin_lock_bh(&ip6_fl_lock);
193	for (i = 0; i <= FL_HASH_MASK; i++) {
194		struct ip6_flowlabel *fl;
195		struct ip6_flowlabel __rcu **flp;
196
197		flp = &fl_ht[i];
198		while ((fl = rcu_dereference_protected(*flp,
199						       lockdep_is_held(&ip6_fl_lock))) != NULL) {
200			if (net_eq(fl->fl_net, net) &&
201			    atomic_read(&fl->users) == 0) {
202				*flp = fl->next;
203				fl_free(fl);
204				atomic_dec(&fl_size);
205				continue;
206			}
207			flp = &fl->next;
208		}
209	}
210	spin_unlock_bh(&ip6_fl_lock);
211}
212
213static struct ip6_flowlabel *fl_intern(struct net *net,
214				       struct ip6_flowlabel *fl, __be32 label)
215{
216	struct ip6_flowlabel *lfl;
217
218	fl->label = label & IPV6_FLOWLABEL_MASK;
219
220	spin_lock_bh(&ip6_fl_lock);
221	if (label == 0) {
222		for (;;) {
223			fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;
224			if (fl->label) {
225				lfl = __fl_lookup(net, fl->label);
226				if (!lfl)
227					break;
228			}
229		}
230	} else {
231		/*
232		 * we dropper the ip6_fl_lock, so this entry could reappear
233		 * and we need to recheck with it.
234		 *
235		 * OTOH no need to search the active socket first, like it is
236		 * done in ipv6_flowlabel_opt - sock is locked, so new entry
237		 * with the same label can only appear on another sock
238		 */
239		lfl = __fl_lookup(net, fl->label);
240		if (lfl) {
241			atomic_inc(&lfl->users);
242			spin_unlock_bh(&ip6_fl_lock);
243			return lfl;
244		}
245	}
246
247	fl->lastuse = jiffies;
248	fl->next = fl_ht[FL_HASH(fl->label)];
249	rcu_assign_pointer(fl_ht[FL_HASH(fl->label)], fl);
250	atomic_inc(&fl_size);
251	spin_unlock_bh(&ip6_fl_lock);
252	return NULL;
253}
254
255
256
257/* Socket flowlabel lists */
258
259struct ip6_flowlabel *__fl6_sock_lookup(struct sock *sk, __be32 label)
260{
261	struct ipv6_fl_socklist *sfl;
262	struct ipv6_pinfo *np = inet6_sk(sk);
263
264	label &= IPV6_FLOWLABEL_MASK;
265
266	rcu_read_lock_bh();
267	for_each_sk_fl_rcu(np, sfl) {
268		struct ip6_flowlabel *fl = sfl->fl;
269
270		if (fl->label == label && atomic_inc_not_zero(&fl->users)) {
271			fl->lastuse = jiffies;
272			rcu_read_unlock_bh();
273			return fl;
274		}
275	}
276	rcu_read_unlock_bh();
277	return NULL;
278}
279EXPORT_SYMBOL_GPL(__fl6_sock_lookup);
280
281void fl6_free_socklist(struct sock *sk)
282{
283	struct ipv6_pinfo *np = inet6_sk(sk);
284	struct ipv6_fl_socklist *sfl;
285
286	if (!rcu_access_pointer(np->ipv6_fl_list))
287		return;
288
289	spin_lock_bh(&ip6_sk_fl_lock);
290	while ((sfl = rcu_dereference_protected(np->ipv6_fl_list,
291						lockdep_is_held(&ip6_sk_fl_lock))) != NULL) {
292		np->ipv6_fl_list = sfl->next;
293		spin_unlock_bh(&ip6_sk_fl_lock);
294
295		fl_release(sfl->fl);
296		kfree_rcu(sfl, rcu);
297
298		spin_lock_bh(&ip6_sk_fl_lock);
299	}
300	spin_unlock_bh(&ip6_sk_fl_lock);
301}
302
303/* Service routines */
304
305
306/*
307   It is the only difficult place. flowlabel enforces equal headers
308   before and including routing header, however user may supply options
309   following rthdr.
310 */
311
312struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space,
313					 struct ip6_flowlabel *fl,
314					 struct ipv6_txoptions *fopt)
315{
316	struct ipv6_txoptions *fl_opt = fl->opt;
317
318	if (!fopt || fopt->opt_flen == 0)
319		return fl_opt;
320
321	if (fl_opt) {
322		opt_space->hopopt = fl_opt->hopopt;
323		opt_space->dst0opt = fl_opt->dst0opt;
324		opt_space->srcrt = fl_opt->srcrt;
325		opt_space->opt_nflen = fl_opt->opt_nflen;
326	} else {
327		if (fopt->opt_nflen == 0)
328			return fopt;
329		opt_space->hopopt = NULL;
330		opt_space->dst0opt = NULL;
331		opt_space->srcrt = NULL;
332		opt_space->opt_nflen = 0;
333	}
334	opt_space->dst1opt = fopt->dst1opt;
335	opt_space->opt_flen = fopt->opt_flen;
336	opt_space->tot_len = fopt->tot_len;
337	return opt_space;
338}
339EXPORT_SYMBOL_GPL(fl6_merge_options);
340
341static unsigned long check_linger(unsigned long ttl)
342{
343	if (ttl < FL_MIN_LINGER)
344		return FL_MIN_LINGER*HZ;
345	if (ttl > FL_MAX_LINGER && !capable(CAP_NET_ADMIN))
346		return 0;
347	return ttl*HZ;
348}
349
350static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned long expires)
351{
352	linger = check_linger(linger);
353	if (!linger)
354		return -EPERM;
355	expires = check_linger(expires);
356	if (!expires)
357		return -EPERM;
358
359	spin_lock_bh(&ip6_fl_lock);
360	fl->lastuse = jiffies;
361	if (time_before(fl->linger, linger))
362		fl->linger = linger;
363	if (time_before(expires, fl->linger))
364		expires = fl->linger;
365	if (time_before(fl->expires, fl->lastuse + expires))
366		fl->expires = fl->lastuse + expires;
367	spin_unlock_bh(&ip6_fl_lock);
368
369	return 0;
370}
371
372static struct ip6_flowlabel *
373fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq,
374	  sockptr_t optval, int optlen, int *err_p)
375{
376	struct ip6_flowlabel *fl = NULL;
377	int olen;
378	int addr_type;
379	int err;
380
381	olen = optlen - CMSG_ALIGN(sizeof(*freq));
382	err = -EINVAL;
383	if (olen > 64 * 1024)
384		goto done;
385
386	err = -ENOMEM;
387	fl = kzalloc(sizeof(*fl), GFP_KERNEL);
388	if (!fl)
389		goto done;
390
391	if (olen > 0) {
392		struct msghdr msg;
393		struct flowi6 flowi6;
394		struct ipcm6_cookie ipc6;
395
396		err = -ENOMEM;
397		fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL);
398		if (!fl->opt)
399			goto done;
400
401		memset(fl->opt, 0, sizeof(*fl->opt));
402		fl->opt->tot_len = sizeof(*fl->opt) + olen;
403		err = -EFAULT;
404		if (copy_from_sockptr_offset(fl->opt + 1, optval,
405				CMSG_ALIGN(sizeof(*freq)), olen))
406			goto done;
407
408		msg.msg_controllen = olen;
409		msg.msg_control = (void *)(fl->opt+1);
410		memset(&flowi6, 0, sizeof(flowi6));
411
412		ipc6.opt = fl->opt;
413		err = ip6_datagram_send_ctl(net, sk, &msg, &flowi6, &ipc6);
414		if (err)
415			goto done;
416		err = -EINVAL;
417		if (fl->opt->opt_flen)
418			goto done;
419		if (fl->opt->opt_nflen == 0) {
420			kfree(fl->opt);
421			fl->opt = NULL;
422		}
423	}
424
425	fl->fl_net = net;
426	fl->expires = jiffies;
427	err = fl6_renew(fl, freq->flr_linger, freq->flr_expires);
428	if (err)
429		goto done;
430	fl->share = freq->flr_share;
431	addr_type = ipv6_addr_type(&freq->flr_dst);
432	if ((addr_type & IPV6_ADDR_MAPPED) ||
433	    addr_type == IPV6_ADDR_ANY) {
434		err = -EINVAL;
435		goto done;
436	}
437	fl->dst = freq->flr_dst;
438	atomic_set(&fl->users, 1);
439	switch (fl->share) {
440	case IPV6_FL_S_EXCL:
441	case IPV6_FL_S_ANY:
442		break;
443	case IPV6_FL_S_PROCESS:
444		fl->owner.pid = get_task_pid(current, PIDTYPE_PID);
445		break;
446	case IPV6_FL_S_USER:
447		fl->owner.uid = current_euid();
448		break;
449	default:
450		err = -EINVAL;
451		goto done;
452	}
453	if (fl_shared_exclusive(fl) || fl->opt) {
454		WRITE_ONCE(sock_net(sk)->ipv6.flowlabel_has_excl, 1);
455		static_branch_deferred_inc(&ipv6_flowlabel_exclusive);
456	}
457	return fl;
458
459done:
460	if (fl) {
461		kfree(fl->opt);
462		kfree(fl);
463	}
464	*err_p = err;
465	return NULL;
466}
467
468static int mem_check(struct sock *sk)
469{
470	struct ipv6_pinfo *np = inet6_sk(sk);
471	struct ipv6_fl_socklist *sfl;
472	int room = FL_MAX_SIZE - atomic_read(&fl_size);
473	int count = 0;
474
475	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)
476		return 0;
477
478	rcu_read_lock_bh();
479	for_each_sk_fl_rcu(np, sfl)
480		count++;
481	rcu_read_unlock_bh();
482
483	if (room <= 0 ||
484	    ((count >= FL_MAX_PER_SOCK ||
485	      (count > 0 && room < FL_MAX_SIZE/2) || room < FL_MAX_SIZE/4) &&
486	     !capable(CAP_NET_ADMIN)))
487		return -ENOBUFS;
488
489	return 0;
490}
491
492static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,
493		struct ip6_flowlabel *fl)
494{
495	spin_lock_bh(&ip6_sk_fl_lock);
496	sfl->fl = fl;
497	sfl->next = np->ipv6_fl_list;
498	rcu_assign_pointer(np->ipv6_fl_list, sfl);
499	spin_unlock_bh(&ip6_sk_fl_lock);
500}
501
502int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq,
503			   int flags)
504{
505	struct ipv6_pinfo *np = inet6_sk(sk);
506	struct ipv6_fl_socklist *sfl;
507
508	if (flags & IPV6_FL_F_REMOTE) {
509		freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK;
510		return 0;
511	}
512
513	if (np->repflow) {
514		freq->flr_label = np->flow_label;
515		return 0;
516	}
517
518	rcu_read_lock_bh();
519
520	for_each_sk_fl_rcu(np, sfl) {
521		if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) {
522			spin_lock_bh(&ip6_fl_lock);
523			freq->flr_label = sfl->fl->label;
524			freq->flr_dst = sfl->fl->dst;
525			freq->flr_share = sfl->fl->share;
526			freq->flr_expires = (sfl->fl->expires - jiffies) / HZ;
527			freq->flr_linger = sfl->fl->linger / HZ;
528
529			spin_unlock_bh(&ip6_fl_lock);
530			rcu_read_unlock_bh();
531			return 0;
532		}
533	}
534	rcu_read_unlock_bh();
535
536	return -ENOENT;
537}
538
539#define socklist_dereference(__sflp) \
540	rcu_dereference_protected(__sflp, lockdep_is_held(&ip6_sk_fl_lock))
541
542static int ipv6_flowlabel_put(struct sock *sk, struct in6_flowlabel_req *freq)
543{
544	struct ipv6_pinfo *np = inet6_sk(sk);
545	struct ipv6_fl_socklist __rcu **sflp;
546	struct ipv6_fl_socklist *sfl;
547
548	if (freq->flr_flags & IPV6_FL_F_REFLECT) {
549		if (sk->sk_protocol != IPPROTO_TCP)
550			return -ENOPROTOOPT;
551		if (!np->repflow)
552			return -ESRCH;
553		np->flow_label = 0;
554		np->repflow = 0;
555		return 0;
556	}
557
558	spin_lock_bh(&ip6_sk_fl_lock);
559	for (sflp = &np->ipv6_fl_list;
560	     (sfl = socklist_dereference(*sflp)) != NULL;
561	     sflp = &sfl->next) {
562		if (sfl->fl->label == freq->flr_label)
563			goto found;
564	}
565	spin_unlock_bh(&ip6_sk_fl_lock);
566	return -ESRCH;
567found:
568	if (freq->flr_label == (np->flow_label & IPV6_FLOWLABEL_MASK))
569		np->flow_label &= ~IPV6_FLOWLABEL_MASK;
570	*sflp = sfl->next;
571	spin_unlock_bh(&ip6_sk_fl_lock);
572	fl_release(sfl->fl);
573	kfree_rcu(sfl, rcu);
574	return 0;
575}
576
577static int ipv6_flowlabel_renew(struct sock *sk, struct in6_flowlabel_req *freq)
578{
579	struct ipv6_pinfo *np = inet6_sk(sk);
580	struct net *net = sock_net(sk);
581	struct ipv6_fl_socklist *sfl;
582	int err;
583
584	rcu_read_lock_bh();
585	for_each_sk_fl_rcu(np, sfl) {
586		if (sfl->fl->label == freq->flr_label) {
587			err = fl6_renew(sfl->fl, freq->flr_linger,
588					freq->flr_expires);
589			rcu_read_unlock_bh();
590			return err;
591		}
592	}
593	rcu_read_unlock_bh();
594
595	if (freq->flr_share == IPV6_FL_S_NONE &&
596	    ns_capable(net->user_ns, CAP_NET_ADMIN)) {
597		struct ip6_flowlabel *fl = fl_lookup(net, freq->flr_label);
598
599		if (fl) {
600			err = fl6_renew(fl, freq->flr_linger,
601					freq->flr_expires);
602			fl_release(fl);
603			return err;
604		}
605	}
606	return -ESRCH;
607}
608
609static int ipv6_flowlabel_get(struct sock *sk, struct in6_flowlabel_req *freq,
610		sockptr_t optval, int optlen)
611{
612	struct ipv6_fl_socklist *sfl, *sfl1 = NULL;
613	struct ip6_flowlabel *fl, *fl1 = NULL;
614	struct ipv6_pinfo *np = inet6_sk(sk);
615	struct net *net = sock_net(sk);
616	int err;
617
618	if (freq->flr_flags & IPV6_FL_F_REFLECT) {
619		if (net->ipv6.sysctl.flowlabel_consistency) {
620			net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n");
621			return -EPERM;
622		}
623
624		if (sk->sk_protocol != IPPROTO_TCP)
625			return -ENOPROTOOPT;
626		np->repflow = 1;
627		return 0;
628	}
629
630	if (freq->flr_label & ~IPV6_FLOWLABEL_MASK)
631		return -EINVAL;
632	if (net->ipv6.sysctl.flowlabel_state_ranges &&
633	    (freq->flr_label & IPV6_FLOWLABEL_STATELESS_FLAG))
634		return -ERANGE;
635
636	fl = fl_create(net, sk, freq, optval, optlen, &err);
637	if (!fl)
638		return err;
639
640	sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL);
641
642	if (freq->flr_label) {
643		err = -EEXIST;
644		rcu_read_lock_bh();
645		for_each_sk_fl_rcu(np, sfl) {
646			if (sfl->fl->label == freq->flr_label) {
647				if (freq->flr_flags & IPV6_FL_F_EXCL) {
648					rcu_read_unlock_bh();
649					goto done;
650				}
651				fl1 = sfl->fl;
652				if (!atomic_inc_not_zero(&fl1->users))
653					fl1 = NULL;
654				break;
655			}
656		}
657		rcu_read_unlock_bh();
658
659		if (!fl1)
660			fl1 = fl_lookup(net, freq->flr_label);
661		if (fl1) {
662recheck:
663			err = -EEXIST;
664			if (freq->flr_flags&IPV6_FL_F_EXCL)
665				goto release;
666			err = -EPERM;
667			if (fl1->share == IPV6_FL_S_EXCL ||
668			    fl1->share != fl->share ||
669			    ((fl1->share == IPV6_FL_S_PROCESS) &&
670			     (fl1->owner.pid != fl->owner.pid)) ||
671			    ((fl1->share == IPV6_FL_S_USER) &&
672			     !uid_eq(fl1->owner.uid, fl->owner.uid)))
673				goto release;
674
675			err = -ENOMEM;
676			if (!sfl1)
677				goto release;
678			if (fl->linger > fl1->linger)
679				fl1->linger = fl->linger;
680			if ((long)(fl->expires - fl1->expires) > 0)
681				fl1->expires = fl->expires;
682			fl_link(np, sfl1, fl1);
683			fl_free(fl);
684			return 0;
685
686release:
687			fl_release(fl1);
688			goto done;
689		}
690	}
691	err = -ENOENT;
692	if (!(freq->flr_flags & IPV6_FL_F_CREATE))
693		goto done;
694
695	err = -ENOMEM;
696	if (!sfl1)
697		goto done;
698
699	err = mem_check(sk);
700	if (err != 0)
701		goto done;
702
703	fl1 = fl_intern(net, fl, freq->flr_label);
704	if (fl1)
705		goto recheck;
706
707	if (!freq->flr_label) {
708		size_t offset = offsetof(struct in6_flowlabel_req, flr_label);
709
710		if (copy_to_sockptr_offset(optval, offset, &fl->label,
711				sizeof(fl->label))) {
712			/* Intentionally ignore fault. */
713		}
714	}
715
716	fl_link(np, sfl1, fl);
717	return 0;
718done:
719	fl_free(fl);
720	kfree(sfl1);
721	return err;
722}
723
724int ipv6_flowlabel_opt(struct sock *sk, sockptr_t optval, int optlen)
725{
726	struct in6_flowlabel_req freq;
727
728	if (optlen < sizeof(freq))
729		return -EINVAL;
730	if (copy_from_sockptr(&freq, optval, sizeof(freq)))
731		return -EFAULT;
732
733	switch (freq.flr_action) {
734	case IPV6_FL_A_PUT:
735		return ipv6_flowlabel_put(sk, &freq);
736	case IPV6_FL_A_RENEW:
737		return ipv6_flowlabel_renew(sk, &freq);
738	case IPV6_FL_A_GET:
739		return ipv6_flowlabel_get(sk, &freq, optval, optlen);
740	default:
741		return -EINVAL;
742	}
743}
744
745#ifdef CONFIG_PROC_FS
746
747struct ip6fl_iter_state {
748	struct seq_net_private p;
749	struct pid_namespace *pid_ns;
750	int bucket;
751};
752
753#define ip6fl_seq_private(seq)	((struct ip6fl_iter_state *)(seq)->private)
754
755static struct ip6_flowlabel *ip6fl_get_first(struct seq_file *seq)
756{
757	struct ip6_flowlabel *fl = NULL;
758	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
759	struct net *net = seq_file_net(seq);
760
761	for (state->bucket = 0; state->bucket <= FL_HASH_MASK; ++state->bucket) {
762		for_each_fl_rcu(state->bucket, fl) {
763			if (net_eq(fl->fl_net, net))
764				goto out;
765		}
766	}
767	fl = NULL;
768out:
769	return fl;
770}
771
772static struct ip6_flowlabel *ip6fl_get_next(struct seq_file *seq, struct ip6_flowlabel *fl)
773{
774	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
775	struct net *net = seq_file_net(seq);
776
777	for_each_fl_continue_rcu(fl) {
778		if (net_eq(fl->fl_net, net))
779			goto out;
780	}
781
782try_again:
783	if (++state->bucket <= FL_HASH_MASK) {
784		for_each_fl_rcu(state->bucket, fl) {
785			if (net_eq(fl->fl_net, net))
786				goto out;
787		}
788		goto try_again;
789	}
790	fl = NULL;
791
792out:
793	return fl;
794}
795
796static struct ip6_flowlabel *ip6fl_get_idx(struct seq_file *seq, loff_t pos)
797{
798	struct ip6_flowlabel *fl = ip6fl_get_first(seq);
799	if (fl)
800		while (pos && (fl = ip6fl_get_next(seq, fl)) != NULL)
801			--pos;
802	return pos ? NULL : fl;
803}
804
805static void *ip6fl_seq_start(struct seq_file *seq, loff_t *pos)
806	__acquires(RCU)
807{
808	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
809
810	state->pid_ns = proc_pid_ns(file_inode(seq->file)->i_sb);
811
812	rcu_read_lock_bh();
813	return *pos ? ip6fl_get_idx(seq, *pos - 1) : SEQ_START_TOKEN;
814}
815
816static void *ip6fl_seq_next(struct seq_file *seq, void *v, loff_t *pos)
817{
818	struct ip6_flowlabel *fl;
819
820	if (v == SEQ_START_TOKEN)
821		fl = ip6fl_get_first(seq);
822	else
823		fl = ip6fl_get_next(seq, v);
824	++*pos;
825	return fl;
826}
827
828static void ip6fl_seq_stop(struct seq_file *seq, void *v)
829	__releases(RCU)
830{
831	rcu_read_unlock_bh();
832}
833
834static int ip6fl_seq_show(struct seq_file *seq, void *v)
835{
836	struct ip6fl_iter_state *state = ip6fl_seq_private(seq);
837	if (v == SEQ_START_TOKEN) {
838		seq_puts(seq, "Label S Owner  Users  Linger Expires  Dst                              Opt\n");
839	} else {
840		struct ip6_flowlabel *fl = v;
841		seq_printf(seq,
842			   "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n",
843			   (unsigned int)ntohl(fl->label),
844			   fl->share,
845			   ((fl->share == IPV6_FL_S_PROCESS) ?
846			    pid_nr_ns(fl->owner.pid, state->pid_ns) :
847			    ((fl->share == IPV6_FL_S_USER) ?
848			     from_kuid_munged(seq_user_ns(seq), fl->owner.uid) :
849			     0)),
850			   atomic_read(&fl->users),
851			   fl->linger/HZ,
852			   (long)(fl->expires - jiffies)/HZ,
853			   &fl->dst,
854			   fl->opt ? fl->opt->opt_nflen : 0);
855	}
856	return 0;
857}
858
859static const struct seq_operations ip6fl_seq_ops = {
860	.start	=	ip6fl_seq_start,
861	.next	=	ip6fl_seq_next,
862	.stop	=	ip6fl_seq_stop,
863	.show	=	ip6fl_seq_show,
864};
865
866static int __net_init ip6_flowlabel_proc_init(struct net *net)
867{
868	if (!proc_create_net("ip6_flowlabel", 0444, net->proc_net,
869			&ip6fl_seq_ops, sizeof(struct ip6fl_iter_state)))
870		return -ENOMEM;
871	return 0;
872}
873
874static void __net_exit ip6_flowlabel_proc_fini(struct net *net)
875{
876	remove_proc_entry("ip6_flowlabel", net->proc_net);
877}
878#else
879static inline int ip6_flowlabel_proc_init(struct net *net)
880{
881	return 0;
882}
883static inline void ip6_flowlabel_proc_fini(struct net *net)
884{
885}
886#endif
887
888static void __net_exit ip6_flowlabel_net_exit(struct net *net)
889{
890	ip6_fl_purge(net);
891	ip6_flowlabel_proc_fini(net);
892}
893
894static struct pernet_operations ip6_flowlabel_net_ops = {
895	.init = ip6_flowlabel_proc_init,
896	.exit = ip6_flowlabel_net_exit,
897};
898
899int ip6_flowlabel_init(void)
900{
901	return register_pernet_subsys(&ip6_flowlabel_net_ops);
902}
903
904void ip6_flowlabel_cleanup(void)
905{
906	static_key_deferred_flush(&ipv6_flowlabel_exclusive);
907	del_timer(&ip6_fl_gc_timer);
908	unregister_pernet_subsys(&ip6_flowlabel_net_ops);
909}
910