xref: /kernel/linux/linux-6.6/net/sched/cls_api.c (revision 62306a36)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * net/sched/cls_api.c	Packet classifier API.
4 *
5 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Changes:
8 *
9 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
10 */
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/err.h>
18#include <linux/skbuff.h>
19#include <linux/init.h>
20#include <linux/kmod.h>
21#include <linux/slab.h>
22#include <linux/idr.h>
23#include <linux/jhash.h>
24#include <linux/rculist.h>
25#include <linux/rhashtable.h>
26#include <net/net_namespace.h>
27#include <net/sock.h>
28#include <net/netlink.h>
29#include <net/pkt_sched.h>
30#include <net/pkt_cls.h>
31#include <net/tc_act/tc_pedit.h>
32#include <net/tc_act/tc_mirred.h>
33#include <net/tc_act/tc_vlan.h>
34#include <net/tc_act/tc_tunnel_key.h>
35#include <net/tc_act/tc_csum.h>
36#include <net/tc_act/tc_gact.h>
37#include <net/tc_act/tc_police.h>
38#include <net/tc_act/tc_sample.h>
39#include <net/tc_act/tc_skbedit.h>
40#include <net/tc_act/tc_ct.h>
41#include <net/tc_act/tc_mpls.h>
42#include <net/tc_act/tc_gate.h>
43#include <net/flow_offload.h>
44#include <net/tc_wrapper.h>
45
46/* The list of all installed classifier types */
47static LIST_HEAD(tcf_proto_base);
48
49/* Protects list of registered TC modules. It is pure SMP lock. */
50static DEFINE_RWLOCK(cls_mod_lock);
51
52static struct xarray tcf_exts_miss_cookies_xa;
53struct tcf_exts_miss_cookie_node {
54	const struct tcf_chain *chain;
55	const struct tcf_proto *tp;
56	const struct tcf_exts *exts;
57	u32 chain_index;
58	u32 tp_prio;
59	u32 handle;
60	u32 miss_cookie_base;
61	struct rcu_head rcu;
62};
63
64/* Each tc action entry cookie will be comprised of 32bit miss_cookie_base +
65 * action index in the exts tc actions array.
66 */
67union tcf_exts_miss_cookie {
68	struct {
69		u32 miss_cookie_base;
70		u32 act_index;
71	};
72	u64 miss_cookie;
73};
74
75#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
76static int
77tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
78				u32 handle)
79{
80	struct tcf_exts_miss_cookie_node *n;
81	static u32 next;
82	int err;
83
84	if (WARN_ON(!handle || !tp->ops->get_exts))
85		return -EINVAL;
86
87	n = kzalloc(sizeof(*n), GFP_KERNEL);
88	if (!n)
89		return -ENOMEM;
90
91	n->chain_index = tp->chain->index;
92	n->chain = tp->chain;
93	n->tp_prio = tp->prio;
94	n->tp = tp;
95	n->exts = exts;
96	n->handle = handle;
97
98	err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base,
99			      n, xa_limit_32b, &next, GFP_KERNEL);
100	if (err)
101		goto err_xa_alloc;
102
103	exts->miss_cookie_node = n;
104	return 0;
105
106err_xa_alloc:
107	kfree(n);
108	return err;
109}
110
111static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
112{
113	struct tcf_exts_miss_cookie_node *n;
114
115	if (!exts->miss_cookie_node)
116		return;
117
118	n = exts->miss_cookie_node;
119	xa_erase(&tcf_exts_miss_cookies_xa, n->miss_cookie_base);
120	kfree_rcu(n, rcu);
121}
122
123static struct tcf_exts_miss_cookie_node *
124tcf_exts_miss_cookie_lookup(u64 miss_cookie, int *act_index)
125{
126	union tcf_exts_miss_cookie mc = { .miss_cookie = miss_cookie, };
127
128	*act_index = mc.act_index;
129	return xa_load(&tcf_exts_miss_cookies_xa, mc.miss_cookie_base);
130}
131#else /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
132static int
133tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
134				u32 handle)
135{
136	return 0;
137}
138
139static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
140{
141}
142#endif /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
143
144static u64 tcf_exts_miss_cookie_get(u32 miss_cookie_base, int act_index)
145{
146	union tcf_exts_miss_cookie mc = { .act_index = act_index, };
147
148	if (!miss_cookie_base)
149		return 0;
150
151	mc.miss_cookie_base = miss_cookie_base;
152	return mc.miss_cookie;
153}
154
155#ifdef CONFIG_NET_CLS_ACT
156DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
157EXPORT_SYMBOL(tc_skb_ext_tc);
158
159void tc_skb_ext_tc_enable(void)
160{
161	static_branch_inc(&tc_skb_ext_tc);
162}
163EXPORT_SYMBOL(tc_skb_ext_tc_enable);
164
165void tc_skb_ext_tc_disable(void)
166{
167	static_branch_dec(&tc_skb_ext_tc);
168}
169EXPORT_SYMBOL(tc_skb_ext_tc_disable);
170#endif
171
172static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
173{
174	return jhash_3words(tp->chain->index, tp->prio,
175			    (__force __u32)tp->protocol, 0);
176}
177
178static void tcf_proto_signal_destroying(struct tcf_chain *chain,
179					struct tcf_proto *tp)
180{
181	struct tcf_block *block = chain->block;
182
183	mutex_lock(&block->proto_destroy_lock);
184	hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
185		     destroy_obj_hashfn(tp));
186	mutex_unlock(&block->proto_destroy_lock);
187}
188
189static bool tcf_proto_cmp(const struct tcf_proto *tp1,
190			  const struct tcf_proto *tp2)
191{
192	return tp1->chain->index == tp2->chain->index &&
193	       tp1->prio == tp2->prio &&
194	       tp1->protocol == tp2->protocol;
195}
196
197static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
198					struct tcf_proto *tp)
199{
200	u32 hash = destroy_obj_hashfn(tp);
201	struct tcf_proto *iter;
202	bool found = false;
203
204	rcu_read_lock();
205	hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
206				   destroy_ht_node, hash) {
207		if (tcf_proto_cmp(tp, iter)) {
208			found = true;
209			break;
210		}
211	}
212	rcu_read_unlock();
213
214	return found;
215}
216
217static void
218tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
219{
220	struct tcf_block *block = chain->block;
221
222	mutex_lock(&block->proto_destroy_lock);
223	if (hash_hashed(&tp->destroy_ht_node))
224		hash_del_rcu(&tp->destroy_ht_node);
225	mutex_unlock(&block->proto_destroy_lock);
226}
227
228/* Find classifier type by string name */
229
230static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
231{
232	const struct tcf_proto_ops *t, *res = NULL;
233
234	if (kind) {
235		read_lock(&cls_mod_lock);
236		list_for_each_entry(t, &tcf_proto_base, head) {
237			if (strcmp(kind, t->kind) == 0) {
238				if (try_module_get(t->owner))
239					res = t;
240				break;
241			}
242		}
243		read_unlock(&cls_mod_lock);
244	}
245	return res;
246}
247
248static const struct tcf_proto_ops *
249tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
250		     struct netlink_ext_ack *extack)
251{
252	const struct tcf_proto_ops *ops;
253
254	ops = __tcf_proto_lookup_ops(kind);
255	if (ops)
256		return ops;
257#ifdef CONFIG_MODULES
258	if (rtnl_held)
259		rtnl_unlock();
260	request_module("cls_%s", kind);
261	if (rtnl_held)
262		rtnl_lock();
263	ops = __tcf_proto_lookup_ops(kind);
264	/* We dropped the RTNL semaphore in order to perform
265	 * the module load. So, even if we succeeded in loading
266	 * the module we have to replay the request. We indicate
267	 * this using -EAGAIN.
268	 */
269	if (ops) {
270		module_put(ops->owner);
271		return ERR_PTR(-EAGAIN);
272	}
273#endif
274	NL_SET_ERR_MSG(extack, "TC classifier not found");
275	return ERR_PTR(-ENOENT);
276}
277
278/* Register(unregister) new classifier type */
279
280int register_tcf_proto_ops(struct tcf_proto_ops *ops)
281{
282	struct tcf_proto_ops *t;
283	int rc = -EEXIST;
284
285	write_lock(&cls_mod_lock);
286	list_for_each_entry(t, &tcf_proto_base, head)
287		if (!strcmp(ops->kind, t->kind))
288			goto out;
289
290	list_add_tail(&ops->head, &tcf_proto_base);
291	rc = 0;
292out:
293	write_unlock(&cls_mod_lock);
294	return rc;
295}
296EXPORT_SYMBOL(register_tcf_proto_ops);
297
298static struct workqueue_struct *tc_filter_wq;
299
300void unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
301{
302	struct tcf_proto_ops *t;
303	int rc = -ENOENT;
304
305	/* Wait for outstanding call_rcu()s, if any, from a
306	 * tcf_proto_ops's destroy() handler.
307	 */
308	rcu_barrier();
309	flush_workqueue(tc_filter_wq);
310
311	write_lock(&cls_mod_lock);
312	list_for_each_entry(t, &tcf_proto_base, head) {
313		if (t == ops) {
314			list_del(&t->head);
315			rc = 0;
316			break;
317		}
318	}
319	write_unlock(&cls_mod_lock);
320
321	WARN(rc, "unregister tc filter kind(%s) failed %d\n", ops->kind, rc);
322}
323EXPORT_SYMBOL(unregister_tcf_proto_ops);
324
325bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
326{
327	INIT_RCU_WORK(rwork, func);
328	return queue_rcu_work(tc_filter_wq, rwork);
329}
330EXPORT_SYMBOL(tcf_queue_work);
331
332/* Select new prio value from the range, managed by kernel. */
333
334static inline u32 tcf_auto_prio(struct tcf_proto *tp)
335{
336	u32 first = TC_H_MAKE(0xC0000000U, 0U);
337
338	if (tp)
339		first = tp->prio - 1;
340
341	return TC_H_MAJ(first);
342}
343
344static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
345{
346	if (kind)
347		return nla_strscpy(name, kind, IFNAMSIZ) < 0;
348	memset(name, 0, IFNAMSIZ);
349	return false;
350}
351
352static bool tcf_proto_is_unlocked(const char *kind)
353{
354	const struct tcf_proto_ops *ops;
355	bool ret;
356
357	if (strlen(kind) == 0)
358		return false;
359
360	ops = tcf_proto_lookup_ops(kind, false, NULL);
361	/* On error return false to take rtnl lock. Proto lookup/create
362	 * functions will perform lookup again and properly handle errors.
363	 */
364	if (IS_ERR(ops))
365		return false;
366
367	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
368	module_put(ops->owner);
369	return ret;
370}
371
372static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
373					  u32 prio, struct tcf_chain *chain,
374					  bool rtnl_held,
375					  struct netlink_ext_ack *extack)
376{
377	struct tcf_proto *tp;
378	int err;
379
380	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
381	if (!tp)
382		return ERR_PTR(-ENOBUFS);
383
384	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
385	if (IS_ERR(tp->ops)) {
386		err = PTR_ERR(tp->ops);
387		goto errout;
388	}
389	tp->classify = tp->ops->classify;
390	tp->protocol = protocol;
391	tp->prio = prio;
392	tp->chain = chain;
393	spin_lock_init(&tp->lock);
394	refcount_set(&tp->refcnt, 1);
395
396	err = tp->ops->init(tp);
397	if (err) {
398		module_put(tp->ops->owner);
399		goto errout;
400	}
401	return tp;
402
403errout:
404	kfree(tp);
405	return ERR_PTR(err);
406}
407
408static void tcf_proto_get(struct tcf_proto *tp)
409{
410	refcount_inc(&tp->refcnt);
411}
412
413static void tcf_chain_put(struct tcf_chain *chain);
414
415static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
416			      bool sig_destroy, struct netlink_ext_ack *extack)
417{
418	tp->ops->destroy(tp, rtnl_held, extack);
419	if (sig_destroy)
420		tcf_proto_signal_destroyed(tp->chain, tp);
421	tcf_chain_put(tp->chain);
422	module_put(tp->ops->owner);
423	kfree_rcu(tp, rcu);
424}
425
426static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
427			  struct netlink_ext_ack *extack)
428{
429	if (refcount_dec_and_test(&tp->refcnt))
430		tcf_proto_destroy(tp, rtnl_held, true, extack);
431}
432
433static bool tcf_proto_check_delete(struct tcf_proto *tp)
434{
435	if (tp->ops->delete_empty)
436		return tp->ops->delete_empty(tp);
437
438	tp->deleting = true;
439	return tp->deleting;
440}
441
442static void tcf_proto_mark_delete(struct tcf_proto *tp)
443{
444	spin_lock(&tp->lock);
445	tp->deleting = true;
446	spin_unlock(&tp->lock);
447}
448
449static bool tcf_proto_is_deleting(struct tcf_proto *tp)
450{
451	bool deleting;
452
453	spin_lock(&tp->lock);
454	deleting = tp->deleting;
455	spin_unlock(&tp->lock);
456
457	return deleting;
458}
459
460#define ASSERT_BLOCK_LOCKED(block)					\
461	lockdep_assert_held(&(block)->lock)
462
463struct tcf_filter_chain_list_item {
464	struct list_head list;
465	tcf_chain_head_change_t *chain_head_change;
466	void *chain_head_change_priv;
467};
468
469static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
470					  u32 chain_index)
471{
472	struct tcf_chain *chain;
473
474	ASSERT_BLOCK_LOCKED(block);
475
476	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
477	if (!chain)
478		return NULL;
479	list_add_tail_rcu(&chain->list, &block->chain_list);
480	mutex_init(&chain->filter_chain_lock);
481	chain->block = block;
482	chain->index = chain_index;
483	chain->refcnt = 1;
484	if (!chain->index)
485		block->chain0.chain = chain;
486	return chain;
487}
488
489static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
490				       struct tcf_proto *tp_head)
491{
492	if (item->chain_head_change)
493		item->chain_head_change(tp_head, item->chain_head_change_priv);
494}
495
496static void tcf_chain0_head_change(struct tcf_chain *chain,
497				   struct tcf_proto *tp_head)
498{
499	struct tcf_filter_chain_list_item *item;
500	struct tcf_block *block = chain->block;
501
502	if (chain->index)
503		return;
504
505	mutex_lock(&block->lock);
506	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
507		tcf_chain_head_change_item(item, tp_head);
508	mutex_unlock(&block->lock);
509}
510
511/* Returns true if block can be safely freed. */
512
513static bool tcf_chain_detach(struct tcf_chain *chain)
514{
515	struct tcf_block *block = chain->block;
516
517	ASSERT_BLOCK_LOCKED(block);
518
519	list_del_rcu(&chain->list);
520	if (!chain->index)
521		block->chain0.chain = NULL;
522
523	if (list_empty(&block->chain_list) &&
524	    refcount_read(&block->refcnt) == 0)
525		return true;
526
527	return false;
528}
529
530static void tcf_block_destroy(struct tcf_block *block)
531{
532	mutex_destroy(&block->lock);
533	mutex_destroy(&block->proto_destroy_lock);
534	kfree_rcu(block, rcu);
535}
536
537static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
538{
539	struct tcf_block *block = chain->block;
540
541	mutex_destroy(&chain->filter_chain_lock);
542	kfree_rcu(chain, rcu);
543	if (free_block)
544		tcf_block_destroy(block);
545}
546
547static void tcf_chain_hold(struct tcf_chain *chain)
548{
549	ASSERT_BLOCK_LOCKED(chain->block);
550
551	++chain->refcnt;
552}
553
554static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
555{
556	ASSERT_BLOCK_LOCKED(chain->block);
557
558	/* In case all the references are action references, this
559	 * chain should not be shown to the user.
560	 */
561	return chain->refcnt == chain->action_refcnt;
562}
563
564static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
565					  u32 chain_index)
566{
567	struct tcf_chain *chain;
568
569	ASSERT_BLOCK_LOCKED(block);
570
571	list_for_each_entry(chain, &block->chain_list, list) {
572		if (chain->index == chain_index)
573			return chain;
574	}
575	return NULL;
576}
577
578#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
579static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
580					      u32 chain_index)
581{
582	struct tcf_chain *chain;
583
584	list_for_each_entry_rcu(chain, &block->chain_list, list) {
585		if (chain->index == chain_index)
586			return chain;
587	}
588	return NULL;
589}
590#endif
591
592static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
593			   u32 seq, u16 flags, int event, bool unicast,
594			   struct netlink_ext_ack *extack);
595
596static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
597					 u32 chain_index, bool create,
598					 bool by_act)
599{
600	struct tcf_chain *chain = NULL;
601	bool is_first_reference;
602
603	mutex_lock(&block->lock);
604	chain = tcf_chain_lookup(block, chain_index);
605	if (chain) {
606		tcf_chain_hold(chain);
607	} else {
608		if (!create)
609			goto errout;
610		chain = tcf_chain_create(block, chain_index);
611		if (!chain)
612			goto errout;
613	}
614
615	if (by_act)
616		++chain->action_refcnt;
617	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
618	mutex_unlock(&block->lock);
619
620	/* Send notification only in case we got the first
621	 * non-action reference. Until then, the chain acts only as
622	 * a placeholder for actions pointing to it and user ought
623	 * not know about them.
624	 */
625	if (is_first_reference && !by_act)
626		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
627				RTM_NEWCHAIN, false, NULL);
628
629	return chain;
630
631errout:
632	mutex_unlock(&block->lock);
633	return chain;
634}
635
636static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
637				       bool create)
638{
639	return __tcf_chain_get(block, chain_index, create, false);
640}
641
642struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
643{
644	return __tcf_chain_get(block, chain_index, true, true);
645}
646EXPORT_SYMBOL(tcf_chain_get_by_act);
647
648static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
649			       void *tmplt_priv);
650static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
651				  void *tmplt_priv, u32 chain_index,
652				  struct tcf_block *block, struct sk_buff *oskb,
653				  u32 seq, u16 flags, bool unicast);
654
655static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
656			    bool explicitly_created)
657{
658	struct tcf_block *block = chain->block;
659	const struct tcf_proto_ops *tmplt_ops;
660	unsigned int refcnt, non_act_refcnt;
661	bool free_block = false;
662	void *tmplt_priv;
663
664	mutex_lock(&block->lock);
665	if (explicitly_created) {
666		if (!chain->explicitly_created) {
667			mutex_unlock(&block->lock);
668			return;
669		}
670		chain->explicitly_created = false;
671	}
672
673	if (by_act)
674		chain->action_refcnt--;
675
676	/* tc_chain_notify_delete can't be called while holding block lock.
677	 * However, when block is unlocked chain can be changed concurrently, so
678	 * save these to temporary variables.
679	 */
680	refcnt = --chain->refcnt;
681	non_act_refcnt = refcnt - chain->action_refcnt;
682	tmplt_ops = chain->tmplt_ops;
683	tmplt_priv = chain->tmplt_priv;
684
685	if (non_act_refcnt == chain->explicitly_created && !by_act) {
686		if (non_act_refcnt == 0)
687			tc_chain_notify_delete(tmplt_ops, tmplt_priv,
688					       chain->index, block, NULL, 0, 0,
689					       false);
690		/* Last reference to chain, no need to lock. */
691		chain->flushing = false;
692	}
693
694	if (refcnt == 0)
695		free_block = tcf_chain_detach(chain);
696	mutex_unlock(&block->lock);
697
698	if (refcnt == 0) {
699		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
700		tcf_chain_destroy(chain, free_block);
701	}
702}
703
704static void tcf_chain_put(struct tcf_chain *chain)
705{
706	__tcf_chain_put(chain, false, false);
707}
708
709void tcf_chain_put_by_act(struct tcf_chain *chain)
710{
711	__tcf_chain_put(chain, true, false);
712}
713EXPORT_SYMBOL(tcf_chain_put_by_act);
714
715static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
716{
717	__tcf_chain_put(chain, false, true);
718}
719
720static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
721{
722	struct tcf_proto *tp, *tp_next;
723
724	mutex_lock(&chain->filter_chain_lock);
725	tp = tcf_chain_dereference(chain->filter_chain, chain);
726	while (tp) {
727		tp_next = rcu_dereference_protected(tp->next, 1);
728		tcf_proto_signal_destroying(chain, tp);
729		tp = tp_next;
730	}
731	tp = tcf_chain_dereference(chain->filter_chain, chain);
732	RCU_INIT_POINTER(chain->filter_chain, NULL);
733	tcf_chain0_head_change(chain, NULL);
734	chain->flushing = true;
735	mutex_unlock(&chain->filter_chain_lock);
736
737	while (tp) {
738		tp_next = rcu_dereference_protected(tp->next, 1);
739		tcf_proto_put(tp, rtnl_held, NULL);
740		tp = tp_next;
741	}
742}
743
744static int tcf_block_setup(struct tcf_block *block,
745			   struct flow_block_offload *bo);
746
747static void tcf_block_offload_init(struct flow_block_offload *bo,
748				   struct net_device *dev, struct Qdisc *sch,
749				   enum flow_block_command command,
750				   enum flow_block_binder_type binder_type,
751				   struct flow_block *flow_block,
752				   bool shared, struct netlink_ext_ack *extack)
753{
754	bo->net = dev_net(dev);
755	bo->command = command;
756	bo->binder_type = binder_type;
757	bo->block = flow_block;
758	bo->block_shared = shared;
759	bo->extack = extack;
760	bo->sch = sch;
761	bo->cb_list_head = &flow_block->cb_list;
762	INIT_LIST_HEAD(&bo->cb_list);
763}
764
765static void tcf_block_unbind(struct tcf_block *block,
766			     struct flow_block_offload *bo);
767
768static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
769{
770	struct tcf_block *block = block_cb->indr.data;
771	struct net_device *dev = block_cb->indr.dev;
772	struct Qdisc *sch = block_cb->indr.sch;
773	struct netlink_ext_ack extack = {};
774	struct flow_block_offload bo = {};
775
776	tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
777			       block_cb->indr.binder_type,
778			       &block->flow_block, tcf_block_shared(block),
779			       &extack);
780	rtnl_lock();
781	down_write(&block->cb_lock);
782	list_del(&block_cb->driver_list);
783	list_move(&block_cb->list, &bo.cb_list);
784	tcf_block_unbind(block, &bo);
785	up_write(&block->cb_lock);
786	rtnl_unlock();
787}
788
789static bool tcf_block_offload_in_use(struct tcf_block *block)
790{
791	return atomic_read(&block->offloadcnt);
792}
793
794static int tcf_block_offload_cmd(struct tcf_block *block,
795				 struct net_device *dev, struct Qdisc *sch,
796				 struct tcf_block_ext_info *ei,
797				 enum flow_block_command command,
798				 struct netlink_ext_ack *extack)
799{
800	struct flow_block_offload bo = {};
801
802	tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
803			       &block->flow_block, tcf_block_shared(block),
804			       extack);
805
806	if (dev->netdev_ops->ndo_setup_tc) {
807		int err;
808
809		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
810		if (err < 0) {
811			if (err != -EOPNOTSUPP)
812				NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
813			return err;
814		}
815
816		return tcf_block_setup(block, &bo);
817	}
818
819	flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
820				    tc_block_indr_cleanup);
821	tcf_block_setup(block, &bo);
822
823	return -EOPNOTSUPP;
824}
825
826static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
827				  struct tcf_block_ext_info *ei,
828				  struct netlink_ext_ack *extack)
829{
830	struct net_device *dev = q->dev_queue->dev;
831	int err;
832
833	down_write(&block->cb_lock);
834
835	/* If tc offload feature is disabled and the block we try to bind
836	 * to already has some offloaded filters, forbid to bind.
837	 */
838	if (dev->netdev_ops->ndo_setup_tc &&
839	    !tc_can_offload(dev) &&
840	    tcf_block_offload_in_use(block)) {
841		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
842		err = -EOPNOTSUPP;
843		goto err_unlock;
844	}
845
846	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
847	if (err == -EOPNOTSUPP)
848		goto no_offload_dev_inc;
849	if (err)
850		goto err_unlock;
851
852	up_write(&block->cb_lock);
853	return 0;
854
855no_offload_dev_inc:
856	if (tcf_block_offload_in_use(block))
857		goto err_unlock;
858
859	err = 0;
860	block->nooffloaddevcnt++;
861err_unlock:
862	up_write(&block->cb_lock);
863	return err;
864}
865
866static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
867				     struct tcf_block_ext_info *ei)
868{
869	struct net_device *dev = q->dev_queue->dev;
870	int err;
871
872	down_write(&block->cb_lock);
873	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
874	if (err == -EOPNOTSUPP)
875		goto no_offload_dev_dec;
876	up_write(&block->cb_lock);
877	return;
878
879no_offload_dev_dec:
880	WARN_ON(block->nooffloaddevcnt-- == 0);
881	up_write(&block->cb_lock);
882}
883
884static int
885tcf_chain0_head_change_cb_add(struct tcf_block *block,
886			      struct tcf_block_ext_info *ei,
887			      struct netlink_ext_ack *extack)
888{
889	struct tcf_filter_chain_list_item *item;
890	struct tcf_chain *chain0;
891
892	item = kmalloc(sizeof(*item), GFP_KERNEL);
893	if (!item) {
894		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
895		return -ENOMEM;
896	}
897	item->chain_head_change = ei->chain_head_change;
898	item->chain_head_change_priv = ei->chain_head_change_priv;
899
900	mutex_lock(&block->lock);
901	chain0 = block->chain0.chain;
902	if (chain0)
903		tcf_chain_hold(chain0);
904	else
905		list_add(&item->list, &block->chain0.filter_chain_list);
906	mutex_unlock(&block->lock);
907
908	if (chain0) {
909		struct tcf_proto *tp_head;
910
911		mutex_lock(&chain0->filter_chain_lock);
912
913		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
914		if (tp_head)
915			tcf_chain_head_change_item(item, tp_head);
916
917		mutex_lock(&block->lock);
918		list_add(&item->list, &block->chain0.filter_chain_list);
919		mutex_unlock(&block->lock);
920
921		mutex_unlock(&chain0->filter_chain_lock);
922		tcf_chain_put(chain0);
923	}
924
925	return 0;
926}
927
928static void
929tcf_chain0_head_change_cb_del(struct tcf_block *block,
930			      struct tcf_block_ext_info *ei)
931{
932	struct tcf_filter_chain_list_item *item;
933
934	mutex_lock(&block->lock);
935	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
936		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
937		    (item->chain_head_change == ei->chain_head_change &&
938		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
939			if (block->chain0.chain)
940				tcf_chain_head_change_item(item, NULL);
941			list_del(&item->list);
942			mutex_unlock(&block->lock);
943
944			kfree(item);
945			return;
946		}
947	}
948	mutex_unlock(&block->lock);
949	WARN_ON(1);
950}
951
952struct tcf_net {
953	spinlock_t idr_lock; /* Protects idr */
954	struct idr idr;
955};
956
957static unsigned int tcf_net_id;
958
959static int tcf_block_insert(struct tcf_block *block, struct net *net,
960			    struct netlink_ext_ack *extack)
961{
962	struct tcf_net *tn = net_generic(net, tcf_net_id);
963	int err;
964
965	idr_preload(GFP_KERNEL);
966	spin_lock(&tn->idr_lock);
967	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
968			    GFP_NOWAIT);
969	spin_unlock(&tn->idr_lock);
970	idr_preload_end();
971
972	return err;
973}
974
975static void tcf_block_remove(struct tcf_block *block, struct net *net)
976{
977	struct tcf_net *tn = net_generic(net, tcf_net_id);
978
979	spin_lock(&tn->idr_lock);
980	idr_remove(&tn->idr, block->index);
981	spin_unlock(&tn->idr_lock);
982}
983
984static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
985					  u32 block_index,
986					  struct netlink_ext_ack *extack)
987{
988	struct tcf_block *block;
989
990	block = kzalloc(sizeof(*block), GFP_KERNEL);
991	if (!block) {
992		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
993		return ERR_PTR(-ENOMEM);
994	}
995	mutex_init(&block->lock);
996	mutex_init(&block->proto_destroy_lock);
997	init_rwsem(&block->cb_lock);
998	flow_block_init(&block->flow_block);
999	INIT_LIST_HEAD(&block->chain_list);
1000	INIT_LIST_HEAD(&block->owner_list);
1001	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
1002
1003	refcount_set(&block->refcnt, 1);
1004	block->net = net;
1005	block->index = block_index;
1006
1007	/* Don't store q pointer for blocks which are shared */
1008	if (!tcf_block_shared(block))
1009		block->q = q;
1010	return block;
1011}
1012
1013static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
1014{
1015	struct tcf_net *tn = net_generic(net, tcf_net_id);
1016
1017	return idr_find(&tn->idr, block_index);
1018}
1019
1020static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
1021{
1022	struct tcf_block *block;
1023
1024	rcu_read_lock();
1025	block = tcf_block_lookup(net, block_index);
1026	if (block && !refcount_inc_not_zero(&block->refcnt))
1027		block = NULL;
1028	rcu_read_unlock();
1029
1030	return block;
1031}
1032
1033static struct tcf_chain *
1034__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1035{
1036	mutex_lock(&block->lock);
1037	if (chain)
1038		chain = list_is_last(&chain->list, &block->chain_list) ?
1039			NULL : list_next_entry(chain, list);
1040	else
1041		chain = list_first_entry_or_null(&block->chain_list,
1042						 struct tcf_chain, list);
1043
1044	/* skip all action-only chains */
1045	while (chain && tcf_chain_held_by_acts_only(chain))
1046		chain = list_is_last(&chain->list, &block->chain_list) ?
1047			NULL : list_next_entry(chain, list);
1048
1049	if (chain)
1050		tcf_chain_hold(chain);
1051	mutex_unlock(&block->lock);
1052
1053	return chain;
1054}
1055
1056/* Function to be used by all clients that want to iterate over all chains on
1057 * block. It properly obtains block->lock and takes reference to chain before
1058 * returning it. Users of this function must be tolerant to concurrent chain
1059 * insertion/deletion or ensure that no concurrent chain modification is
1060 * possible. Note that all netlink dump callbacks cannot guarantee to provide
1061 * consistent dump because rtnl lock is released each time skb is filled with
1062 * data and sent to user-space.
1063 */
1064
1065struct tcf_chain *
1066tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
1067{
1068	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
1069
1070	if (chain)
1071		tcf_chain_put(chain);
1072
1073	return chain_next;
1074}
1075EXPORT_SYMBOL(tcf_get_next_chain);
1076
1077static struct tcf_proto *
1078__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
1079{
1080	u32 prio = 0;
1081
1082	ASSERT_RTNL();
1083	mutex_lock(&chain->filter_chain_lock);
1084
1085	if (!tp) {
1086		tp = tcf_chain_dereference(chain->filter_chain, chain);
1087	} else if (tcf_proto_is_deleting(tp)) {
1088		/* 'deleting' flag is set and chain->filter_chain_lock was
1089		 * unlocked, which means next pointer could be invalid. Restart
1090		 * search.
1091		 */
1092		prio = tp->prio + 1;
1093		tp = tcf_chain_dereference(chain->filter_chain, chain);
1094
1095		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
1096			if (!tp->deleting && tp->prio >= prio)
1097				break;
1098	} else {
1099		tp = tcf_chain_dereference(tp->next, chain);
1100	}
1101
1102	if (tp)
1103		tcf_proto_get(tp);
1104
1105	mutex_unlock(&chain->filter_chain_lock);
1106
1107	return tp;
1108}
1109
1110/* Function to be used by all clients that want to iterate over all tp's on
1111 * chain. Users of this function must be tolerant to concurrent tp
1112 * insertion/deletion or ensure that no concurrent chain modification is
1113 * possible. Note that all netlink dump callbacks cannot guarantee to provide
1114 * consistent dump because rtnl lock is released each time skb is filled with
1115 * data and sent to user-space.
1116 */
1117
1118struct tcf_proto *
1119tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
1120{
1121	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
1122
1123	if (tp)
1124		tcf_proto_put(tp, true, NULL);
1125
1126	return tp_next;
1127}
1128EXPORT_SYMBOL(tcf_get_next_proto);
1129
1130static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1131{
1132	struct tcf_chain *chain;
1133
1134	/* Last reference to block. At this point chains cannot be added or
1135	 * removed concurrently.
1136	 */
1137	for (chain = tcf_get_next_chain(block, NULL);
1138	     chain;
1139	     chain = tcf_get_next_chain(block, chain)) {
1140		tcf_chain_put_explicitly_created(chain);
1141		tcf_chain_flush(chain, rtnl_held);
1142	}
1143}
1144
1145/* Lookup Qdisc and increments its reference counter.
1146 * Set parent, if necessary.
1147 */
1148
1149static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1150			    u32 *parent, int ifindex, bool rtnl_held,
1151			    struct netlink_ext_ack *extack)
1152{
1153	const struct Qdisc_class_ops *cops;
1154	struct net_device *dev;
1155	int err = 0;
1156
1157	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1158		return 0;
1159
1160	rcu_read_lock();
1161
1162	/* Find link */
1163	dev = dev_get_by_index_rcu(net, ifindex);
1164	if (!dev) {
1165		rcu_read_unlock();
1166		return -ENODEV;
1167	}
1168
1169	/* Find qdisc */
1170	if (!*parent) {
1171		*q = rcu_dereference(dev->qdisc);
1172		*parent = (*q)->handle;
1173	} else {
1174		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1175		if (!*q) {
1176			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1177			err = -EINVAL;
1178			goto errout_rcu;
1179		}
1180	}
1181
1182	*q = qdisc_refcount_inc_nz(*q);
1183	if (!*q) {
1184		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1185		err = -EINVAL;
1186		goto errout_rcu;
1187	}
1188
1189	/* Is it classful? */
1190	cops = (*q)->ops->cl_ops;
1191	if (!cops) {
1192		NL_SET_ERR_MSG(extack, "Qdisc not classful");
1193		err = -EINVAL;
1194		goto errout_qdisc;
1195	}
1196
1197	if (!cops->tcf_block) {
1198		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1199		err = -EOPNOTSUPP;
1200		goto errout_qdisc;
1201	}
1202
1203errout_rcu:
1204	/* At this point we know that qdisc is not noop_qdisc,
1205	 * which means that qdisc holds a reference to net_device
1206	 * and we hold a reference to qdisc, so it is safe to release
1207	 * rcu read lock.
1208	 */
1209	rcu_read_unlock();
1210	return err;
1211
1212errout_qdisc:
1213	rcu_read_unlock();
1214
1215	if (rtnl_held)
1216		qdisc_put(*q);
1217	else
1218		qdisc_put_unlocked(*q);
1219	*q = NULL;
1220
1221	return err;
1222}
1223
1224static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1225			       int ifindex, struct netlink_ext_ack *extack)
1226{
1227	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1228		return 0;
1229
1230	/* Do we search for filter, attached to class? */
1231	if (TC_H_MIN(parent)) {
1232		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1233
1234		*cl = cops->find(q, parent);
1235		if (*cl == 0) {
1236			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1237			return -ENOENT;
1238		}
1239	}
1240
1241	return 0;
1242}
1243
1244static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1245					  unsigned long cl, int ifindex,
1246					  u32 block_index,
1247					  struct netlink_ext_ack *extack)
1248{
1249	struct tcf_block *block;
1250
1251	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1252		block = tcf_block_refcnt_get(net, block_index);
1253		if (!block) {
1254			NL_SET_ERR_MSG(extack, "Block of given index was not found");
1255			return ERR_PTR(-EINVAL);
1256		}
1257	} else {
1258		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1259
1260		block = cops->tcf_block(q, cl, extack);
1261		if (!block)
1262			return ERR_PTR(-EINVAL);
1263
1264		if (tcf_block_shared(block)) {
1265			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1266			return ERR_PTR(-EOPNOTSUPP);
1267		}
1268
1269		/* Always take reference to block in order to support execution
1270		 * of rules update path of cls API without rtnl lock. Caller
1271		 * must release block when it is finished using it. 'if' block
1272		 * of this conditional obtain reference to block by calling
1273		 * tcf_block_refcnt_get().
1274		 */
1275		refcount_inc(&block->refcnt);
1276	}
1277
1278	return block;
1279}
1280
1281static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1282			    struct tcf_block_ext_info *ei, bool rtnl_held)
1283{
1284	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1285		/* Flushing/putting all chains will cause the block to be
1286		 * deallocated when last chain is freed. However, if chain_list
1287		 * is empty, block has to be manually deallocated. After block
1288		 * reference counter reached 0, it is no longer possible to
1289		 * increment it or add new chains to block.
1290		 */
1291		bool free_block = list_empty(&block->chain_list);
1292
1293		mutex_unlock(&block->lock);
1294		if (tcf_block_shared(block))
1295			tcf_block_remove(block, block->net);
1296
1297		if (q)
1298			tcf_block_offload_unbind(block, q, ei);
1299
1300		if (free_block)
1301			tcf_block_destroy(block);
1302		else
1303			tcf_block_flush_all_chains(block, rtnl_held);
1304	} else if (q) {
1305		tcf_block_offload_unbind(block, q, ei);
1306	}
1307}
1308
1309static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1310{
1311	__tcf_block_put(block, NULL, NULL, rtnl_held);
1312}
1313
1314/* Find tcf block.
1315 * Set q, parent, cl when appropriate.
1316 */
1317
1318static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
1319					u32 *parent, unsigned long *cl,
1320					int ifindex, u32 block_index,
1321					struct netlink_ext_ack *extack)
1322{
1323	struct tcf_block *block;
1324	int err = 0;
1325
1326	ASSERT_RTNL();
1327
1328	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1329	if (err)
1330		goto errout;
1331
1332	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1333	if (err)
1334		goto errout_qdisc;
1335
1336	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1337	if (IS_ERR(block)) {
1338		err = PTR_ERR(block);
1339		goto errout_qdisc;
1340	}
1341
1342	return block;
1343
1344errout_qdisc:
1345	if (*q)
1346		qdisc_put(*q);
1347errout:
1348	*q = NULL;
1349	return ERR_PTR(err);
1350}
1351
1352static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1353			      bool rtnl_held)
1354{
1355	if (!IS_ERR_OR_NULL(block))
1356		tcf_block_refcnt_put(block, rtnl_held);
1357
1358	if (q) {
1359		if (rtnl_held)
1360			qdisc_put(q);
1361		else
1362			qdisc_put_unlocked(q);
1363	}
1364}
1365
1366struct tcf_block_owner_item {
1367	struct list_head list;
1368	struct Qdisc *q;
1369	enum flow_block_binder_type binder_type;
1370};
1371
1372static void
1373tcf_block_owner_netif_keep_dst(struct tcf_block *block,
1374			       struct Qdisc *q,
1375			       enum flow_block_binder_type binder_type)
1376{
1377	if (block->keep_dst &&
1378	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1379	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1380		netif_keep_dst(qdisc_dev(q));
1381}
1382
1383void tcf_block_netif_keep_dst(struct tcf_block *block)
1384{
1385	struct tcf_block_owner_item *item;
1386
1387	block->keep_dst = true;
1388	list_for_each_entry(item, &block->owner_list, list)
1389		tcf_block_owner_netif_keep_dst(block, item->q,
1390					       item->binder_type);
1391}
1392EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1393
1394static int tcf_block_owner_add(struct tcf_block *block,
1395			       struct Qdisc *q,
1396			       enum flow_block_binder_type binder_type)
1397{
1398	struct tcf_block_owner_item *item;
1399
1400	item = kmalloc(sizeof(*item), GFP_KERNEL);
1401	if (!item)
1402		return -ENOMEM;
1403	item->q = q;
1404	item->binder_type = binder_type;
1405	list_add(&item->list, &block->owner_list);
1406	return 0;
1407}
1408
1409static void tcf_block_owner_del(struct tcf_block *block,
1410				struct Qdisc *q,
1411				enum flow_block_binder_type binder_type)
1412{
1413	struct tcf_block_owner_item *item;
1414
1415	list_for_each_entry(item, &block->owner_list, list) {
1416		if (item->q == q && item->binder_type == binder_type) {
1417			list_del(&item->list);
1418			kfree(item);
1419			return;
1420		}
1421	}
1422	WARN_ON(1);
1423}
1424
1425int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1426		      struct tcf_block_ext_info *ei,
1427		      struct netlink_ext_ack *extack)
1428{
1429	struct net *net = qdisc_net(q);
1430	struct tcf_block *block = NULL;
1431	int err;
1432
1433	if (ei->block_index)
1434		/* block_index not 0 means the shared block is requested */
1435		block = tcf_block_refcnt_get(net, ei->block_index);
1436
1437	if (!block) {
1438		block = tcf_block_create(net, q, ei->block_index, extack);
1439		if (IS_ERR(block))
1440			return PTR_ERR(block);
1441		if (tcf_block_shared(block)) {
1442			err = tcf_block_insert(block, net, extack);
1443			if (err)
1444				goto err_block_insert;
1445		}
1446	}
1447
1448	err = tcf_block_owner_add(block, q, ei->binder_type);
1449	if (err)
1450		goto err_block_owner_add;
1451
1452	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1453
1454	err = tcf_chain0_head_change_cb_add(block, ei, extack);
1455	if (err)
1456		goto err_chain0_head_change_cb_add;
1457
1458	err = tcf_block_offload_bind(block, q, ei, extack);
1459	if (err)
1460		goto err_block_offload_bind;
1461
1462	*p_block = block;
1463	return 0;
1464
1465err_block_offload_bind:
1466	tcf_chain0_head_change_cb_del(block, ei);
1467err_chain0_head_change_cb_add:
1468	tcf_block_owner_del(block, q, ei->binder_type);
1469err_block_owner_add:
1470err_block_insert:
1471	tcf_block_refcnt_put(block, true);
1472	return err;
1473}
1474EXPORT_SYMBOL(tcf_block_get_ext);
1475
1476static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1477{
1478	struct tcf_proto __rcu **p_filter_chain = priv;
1479
1480	rcu_assign_pointer(*p_filter_chain, tp_head);
1481}
1482
1483int tcf_block_get(struct tcf_block **p_block,
1484		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1485		  struct netlink_ext_ack *extack)
1486{
1487	struct tcf_block_ext_info ei = {
1488		.chain_head_change = tcf_chain_head_change_dflt,
1489		.chain_head_change_priv = p_filter_chain,
1490	};
1491
1492	WARN_ON(!p_filter_chain);
1493	return tcf_block_get_ext(p_block, q, &ei, extack);
1494}
1495EXPORT_SYMBOL(tcf_block_get);
1496
1497/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1498 * actions should be all removed after flushing.
1499 */
1500void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1501		       struct tcf_block_ext_info *ei)
1502{
1503	if (!block)
1504		return;
1505	tcf_chain0_head_change_cb_del(block, ei);
1506	tcf_block_owner_del(block, q, ei->binder_type);
1507
1508	__tcf_block_put(block, q, ei, true);
1509}
1510EXPORT_SYMBOL(tcf_block_put_ext);
1511
1512void tcf_block_put(struct tcf_block *block)
1513{
1514	struct tcf_block_ext_info ei = {0, };
1515
1516	if (!block)
1517		return;
1518	tcf_block_put_ext(block, block->q, &ei);
1519}
1520
1521EXPORT_SYMBOL(tcf_block_put);
1522
1523static int
1524tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1525			    void *cb_priv, bool add, bool offload_in_use,
1526			    struct netlink_ext_ack *extack)
1527{
1528	struct tcf_chain *chain, *chain_prev;
1529	struct tcf_proto *tp, *tp_prev;
1530	int err;
1531
1532	lockdep_assert_held(&block->cb_lock);
1533
1534	for (chain = __tcf_get_next_chain(block, NULL);
1535	     chain;
1536	     chain_prev = chain,
1537		     chain = __tcf_get_next_chain(block, chain),
1538		     tcf_chain_put(chain_prev)) {
1539		if (chain->tmplt_ops && add)
1540			chain->tmplt_ops->tmplt_reoffload(chain, true, cb,
1541							  cb_priv);
1542		for (tp = __tcf_get_next_proto(chain, NULL); tp;
1543		     tp_prev = tp,
1544			     tp = __tcf_get_next_proto(chain, tp),
1545			     tcf_proto_put(tp_prev, true, NULL)) {
1546			if (tp->ops->reoffload) {
1547				err = tp->ops->reoffload(tp, add, cb, cb_priv,
1548							 extack);
1549				if (err && add)
1550					goto err_playback_remove;
1551			} else if (add && offload_in_use) {
1552				err = -EOPNOTSUPP;
1553				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1554				goto err_playback_remove;
1555			}
1556		}
1557		if (chain->tmplt_ops && !add)
1558			chain->tmplt_ops->tmplt_reoffload(chain, false, cb,
1559							  cb_priv);
1560	}
1561
1562	return 0;
1563
1564err_playback_remove:
1565	tcf_proto_put(tp, true, NULL);
1566	tcf_chain_put(chain);
1567	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1568				    extack);
1569	return err;
1570}
1571
1572static int tcf_block_bind(struct tcf_block *block,
1573			  struct flow_block_offload *bo)
1574{
1575	struct flow_block_cb *block_cb, *next;
1576	int err, i = 0;
1577
1578	lockdep_assert_held(&block->cb_lock);
1579
1580	list_for_each_entry(block_cb, &bo->cb_list, list) {
1581		err = tcf_block_playback_offloads(block, block_cb->cb,
1582						  block_cb->cb_priv, true,
1583						  tcf_block_offload_in_use(block),
1584						  bo->extack);
1585		if (err)
1586			goto err_unroll;
1587		if (!bo->unlocked_driver_cb)
1588			block->lockeddevcnt++;
1589
1590		i++;
1591	}
1592	list_splice(&bo->cb_list, &block->flow_block.cb_list);
1593
1594	return 0;
1595
1596err_unroll:
1597	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1598		list_del(&block_cb->driver_list);
1599		if (i-- > 0) {
1600			list_del(&block_cb->list);
1601			tcf_block_playback_offloads(block, block_cb->cb,
1602						    block_cb->cb_priv, false,
1603						    tcf_block_offload_in_use(block),
1604						    NULL);
1605			if (!bo->unlocked_driver_cb)
1606				block->lockeddevcnt--;
1607		}
1608		flow_block_cb_free(block_cb);
1609	}
1610
1611	return err;
1612}
1613
1614static void tcf_block_unbind(struct tcf_block *block,
1615			     struct flow_block_offload *bo)
1616{
1617	struct flow_block_cb *block_cb, *next;
1618
1619	lockdep_assert_held(&block->cb_lock);
1620
1621	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1622		tcf_block_playback_offloads(block, block_cb->cb,
1623					    block_cb->cb_priv, false,
1624					    tcf_block_offload_in_use(block),
1625					    NULL);
1626		list_del(&block_cb->list);
1627		flow_block_cb_free(block_cb);
1628		if (!bo->unlocked_driver_cb)
1629			block->lockeddevcnt--;
1630	}
1631}
1632
1633static int tcf_block_setup(struct tcf_block *block,
1634			   struct flow_block_offload *bo)
1635{
1636	int err;
1637
1638	switch (bo->command) {
1639	case FLOW_BLOCK_BIND:
1640		err = tcf_block_bind(block, bo);
1641		break;
1642	case FLOW_BLOCK_UNBIND:
1643		err = 0;
1644		tcf_block_unbind(block, bo);
1645		break;
1646	default:
1647		WARN_ON_ONCE(1);
1648		err = -EOPNOTSUPP;
1649	}
1650
1651	return err;
1652}
1653
1654/* Main classifier routine: scans classifier chain attached
1655 * to this qdisc, (optionally) tests for protocol and asks
1656 * specific classifiers.
1657 */
1658static inline int __tcf_classify(struct sk_buff *skb,
1659				 const struct tcf_proto *tp,
1660				 const struct tcf_proto *orig_tp,
1661				 struct tcf_result *res,
1662				 bool compat_mode,
1663				 struct tcf_exts_miss_cookie_node *n,
1664				 int act_index,
1665				 u32 *last_executed_chain)
1666{
1667#ifdef CONFIG_NET_CLS_ACT
1668	const int max_reclassify_loop = 16;
1669	const struct tcf_proto *first_tp;
1670	int limit = 0;
1671
1672reclassify:
1673#endif
1674	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1675		__be16 protocol = skb_protocol(skb, false);
1676		int err = 0;
1677
1678		if (n) {
1679			struct tcf_exts *exts;
1680
1681			if (n->tp_prio != tp->prio)
1682				continue;
1683
1684			/* We re-lookup the tp and chain based on index instead
1685			 * of having hard refs and locks to them, so do a sanity
1686			 * check if any of tp,chain,exts was replaced by the
1687			 * time we got here with a cookie from hardware.
1688			 */
1689			if (unlikely(n->tp != tp || n->tp->chain != n->chain ||
1690				     !tp->ops->get_exts))
1691				return TC_ACT_SHOT;
1692
1693			exts = tp->ops->get_exts(tp, n->handle);
1694			if (unlikely(!exts || n->exts != exts))
1695				return TC_ACT_SHOT;
1696
1697			n = NULL;
1698			err = tcf_exts_exec_ex(skb, exts, act_index, res);
1699		} else {
1700			if (tp->protocol != protocol &&
1701			    tp->protocol != htons(ETH_P_ALL))
1702				continue;
1703
1704			err = tc_classify(skb, tp, res);
1705		}
1706#ifdef CONFIG_NET_CLS_ACT
1707		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1708			first_tp = orig_tp;
1709			*last_executed_chain = first_tp->chain->index;
1710			goto reset;
1711		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1712			first_tp = res->goto_tp;
1713			*last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
1714			goto reset;
1715		}
1716#endif
1717		if (err >= 0)
1718			return err;
1719	}
1720
1721	if (unlikely(n))
1722		return TC_ACT_SHOT;
1723
1724	return TC_ACT_UNSPEC; /* signal: continue lookup */
1725#ifdef CONFIG_NET_CLS_ACT
1726reset:
1727	if (unlikely(limit++ >= max_reclassify_loop)) {
1728		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1729				       tp->chain->block->index,
1730				       tp->prio & 0xffff,
1731				       ntohs(tp->protocol));
1732		return TC_ACT_SHOT;
1733	}
1734
1735	tp = first_tp;
1736	goto reclassify;
1737#endif
1738}
1739
1740int tcf_classify(struct sk_buff *skb,
1741		 const struct tcf_block *block,
1742		 const struct tcf_proto *tp,
1743		 struct tcf_result *res, bool compat_mode)
1744{
1745#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1746	u32 last_executed_chain = 0;
1747
1748	return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0,
1749			      &last_executed_chain);
1750#else
1751	u32 last_executed_chain = tp ? tp->chain->index : 0;
1752	struct tcf_exts_miss_cookie_node *n = NULL;
1753	const struct tcf_proto *orig_tp = tp;
1754	struct tc_skb_ext *ext;
1755	int act_index = 0;
1756	int ret;
1757
1758	if (block) {
1759		ext = skb_ext_find(skb, TC_SKB_EXT);
1760
1761		if (ext && (ext->chain || ext->act_miss)) {
1762			struct tcf_chain *fchain;
1763			u32 chain;
1764
1765			if (ext->act_miss) {
1766				n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie,
1767								&act_index);
1768				if (!n)
1769					return TC_ACT_SHOT;
1770
1771				chain = n->chain_index;
1772			} else {
1773				chain = ext->chain;
1774			}
1775
1776			fchain = tcf_chain_lookup_rcu(block, chain);
1777			if (!fchain)
1778				return TC_ACT_SHOT;
1779
1780			/* Consume, so cloned/redirect skbs won't inherit ext */
1781			skb_ext_del(skb, TC_SKB_EXT);
1782
1783			tp = rcu_dereference_bh(fchain->filter_chain);
1784			last_executed_chain = fchain->index;
1785		}
1786	}
1787
1788	ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index,
1789			     &last_executed_chain);
1790
1791	if (tc_skb_ext_tc_enabled()) {
1792		/* If we missed on some chain */
1793		if (ret == TC_ACT_UNSPEC && last_executed_chain) {
1794			struct tc_skb_cb *cb = tc_skb_cb(skb);
1795
1796			ext = tc_skb_ext_alloc(skb);
1797			if (WARN_ON_ONCE(!ext))
1798				return TC_ACT_SHOT;
1799			ext->chain = last_executed_chain;
1800			ext->mru = cb->mru;
1801			ext->post_ct = cb->post_ct;
1802			ext->post_ct_snat = cb->post_ct_snat;
1803			ext->post_ct_dnat = cb->post_ct_dnat;
1804			ext->zone = cb->zone;
1805		}
1806	}
1807
1808	return ret;
1809#endif
1810}
1811EXPORT_SYMBOL(tcf_classify);
1812
1813struct tcf_chain_info {
1814	struct tcf_proto __rcu **pprev;
1815	struct tcf_proto __rcu *next;
1816};
1817
1818static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1819					   struct tcf_chain_info *chain_info)
1820{
1821	return tcf_chain_dereference(*chain_info->pprev, chain);
1822}
1823
1824static int tcf_chain_tp_insert(struct tcf_chain *chain,
1825			       struct tcf_chain_info *chain_info,
1826			       struct tcf_proto *tp)
1827{
1828	if (chain->flushing)
1829		return -EAGAIN;
1830
1831	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1832	if (*chain_info->pprev == chain->filter_chain)
1833		tcf_chain0_head_change(chain, tp);
1834	tcf_proto_get(tp);
1835	rcu_assign_pointer(*chain_info->pprev, tp);
1836
1837	return 0;
1838}
1839
1840static void tcf_chain_tp_remove(struct tcf_chain *chain,
1841				struct tcf_chain_info *chain_info,
1842				struct tcf_proto *tp)
1843{
1844	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1845
1846	tcf_proto_mark_delete(tp);
1847	if (tp == chain->filter_chain)
1848		tcf_chain0_head_change(chain, next);
1849	RCU_INIT_POINTER(*chain_info->pprev, next);
1850}
1851
1852static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1853					   struct tcf_chain_info *chain_info,
1854					   u32 protocol, u32 prio,
1855					   bool prio_allocate);
1856
1857/* Try to insert new proto.
1858 * If proto with specified priority already exists, free new proto
1859 * and return existing one.
1860 */
1861
1862static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1863						    struct tcf_proto *tp_new,
1864						    u32 protocol, u32 prio,
1865						    bool rtnl_held)
1866{
1867	struct tcf_chain_info chain_info;
1868	struct tcf_proto *tp;
1869	int err = 0;
1870
1871	mutex_lock(&chain->filter_chain_lock);
1872
1873	if (tcf_proto_exists_destroying(chain, tp_new)) {
1874		mutex_unlock(&chain->filter_chain_lock);
1875		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1876		return ERR_PTR(-EAGAIN);
1877	}
1878
1879	tp = tcf_chain_tp_find(chain, &chain_info,
1880			       protocol, prio, false);
1881	if (!tp)
1882		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1883	mutex_unlock(&chain->filter_chain_lock);
1884
1885	if (tp) {
1886		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1887		tp_new = tp;
1888	} else if (err) {
1889		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1890		tp_new = ERR_PTR(err);
1891	}
1892
1893	return tp_new;
1894}
1895
1896static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1897				      struct tcf_proto *tp, bool rtnl_held,
1898				      struct netlink_ext_ack *extack)
1899{
1900	struct tcf_chain_info chain_info;
1901	struct tcf_proto *tp_iter;
1902	struct tcf_proto **pprev;
1903	struct tcf_proto *next;
1904
1905	mutex_lock(&chain->filter_chain_lock);
1906
1907	/* Atomically find and remove tp from chain. */
1908	for (pprev = &chain->filter_chain;
1909	     (tp_iter = tcf_chain_dereference(*pprev, chain));
1910	     pprev = &tp_iter->next) {
1911		if (tp_iter == tp) {
1912			chain_info.pprev = pprev;
1913			chain_info.next = tp_iter->next;
1914			WARN_ON(tp_iter->deleting);
1915			break;
1916		}
1917	}
1918	/* Verify that tp still exists and no new filters were inserted
1919	 * concurrently.
1920	 * Mark tp for deletion if it is empty.
1921	 */
1922	if (!tp_iter || !tcf_proto_check_delete(tp)) {
1923		mutex_unlock(&chain->filter_chain_lock);
1924		return;
1925	}
1926
1927	tcf_proto_signal_destroying(chain, tp);
1928	next = tcf_chain_dereference(chain_info.next, chain);
1929	if (tp == chain->filter_chain)
1930		tcf_chain0_head_change(chain, next);
1931	RCU_INIT_POINTER(*chain_info.pprev, next);
1932	mutex_unlock(&chain->filter_chain_lock);
1933
1934	tcf_proto_put(tp, rtnl_held, extack);
1935}
1936
1937static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1938					   struct tcf_chain_info *chain_info,
1939					   u32 protocol, u32 prio,
1940					   bool prio_allocate)
1941{
1942	struct tcf_proto **pprev;
1943	struct tcf_proto *tp;
1944
1945	/* Check the chain for existence of proto-tcf with this priority */
1946	for (pprev = &chain->filter_chain;
1947	     (tp = tcf_chain_dereference(*pprev, chain));
1948	     pprev = &tp->next) {
1949		if (tp->prio >= prio) {
1950			if (tp->prio == prio) {
1951				if (prio_allocate ||
1952				    (tp->protocol != protocol && protocol))
1953					return ERR_PTR(-EINVAL);
1954			} else {
1955				tp = NULL;
1956			}
1957			break;
1958		}
1959	}
1960	chain_info->pprev = pprev;
1961	if (tp) {
1962		chain_info->next = tp->next;
1963		tcf_proto_get(tp);
1964	} else {
1965		chain_info->next = NULL;
1966	}
1967	return tp;
1968}
1969
1970static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1971			 struct tcf_proto *tp, struct tcf_block *block,
1972			 struct Qdisc *q, u32 parent, void *fh,
1973			 u32 portid, u32 seq, u16 flags, int event,
1974			 bool terse_dump, bool rtnl_held,
1975			 struct netlink_ext_ack *extack)
1976{
1977	struct tcmsg *tcm;
1978	struct nlmsghdr  *nlh;
1979	unsigned char *b = skb_tail_pointer(skb);
1980
1981	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1982	if (!nlh)
1983		goto out_nlmsg_trim;
1984	tcm = nlmsg_data(nlh);
1985	tcm->tcm_family = AF_UNSPEC;
1986	tcm->tcm__pad1 = 0;
1987	tcm->tcm__pad2 = 0;
1988	if (q) {
1989		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1990		tcm->tcm_parent = parent;
1991	} else {
1992		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1993		tcm->tcm_block_index = block->index;
1994	}
1995	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1996	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1997		goto nla_put_failure;
1998	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1999		goto nla_put_failure;
2000	if (!fh) {
2001		tcm->tcm_handle = 0;
2002	} else if (terse_dump) {
2003		if (tp->ops->terse_dump) {
2004			if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
2005						rtnl_held) < 0)
2006				goto nla_put_failure;
2007		} else {
2008			goto cls_op_not_supp;
2009		}
2010	} else {
2011		if (tp->ops->dump &&
2012		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
2013			goto nla_put_failure;
2014	}
2015
2016	if (extack && extack->_msg &&
2017	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
2018		goto nla_put_failure;
2019
2020	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2021
2022	return skb->len;
2023
2024out_nlmsg_trim:
2025nla_put_failure:
2026cls_op_not_supp:
2027	nlmsg_trim(skb, b);
2028	return -1;
2029}
2030
2031static int tfilter_notify(struct net *net, struct sk_buff *oskb,
2032			  struct nlmsghdr *n, struct tcf_proto *tp,
2033			  struct tcf_block *block, struct Qdisc *q,
2034			  u32 parent, void *fh, int event, bool unicast,
2035			  bool rtnl_held, struct netlink_ext_ack *extack)
2036{
2037	struct sk_buff *skb;
2038	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2039	int err = 0;
2040
2041	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2042	if (!skb)
2043		return -ENOBUFS;
2044
2045	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
2046			  n->nlmsg_seq, n->nlmsg_flags, event,
2047			  false, rtnl_held, extack) <= 0) {
2048		kfree_skb(skb);
2049		return -EINVAL;
2050	}
2051
2052	if (unicast)
2053		err = rtnl_unicast(skb, net, portid);
2054	else
2055		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2056				     n->nlmsg_flags & NLM_F_ECHO);
2057	return err;
2058}
2059
2060static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
2061			      struct nlmsghdr *n, struct tcf_proto *tp,
2062			      struct tcf_block *block, struct Qdisc *q,
2063			      u32 parent, void *fh, bool unicast, bool *last,
2064			      bool rtnl_held, struct netlink_ext_ack *extack)
2065{
2066	struct sk_buff *skb;
2067	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2068	int err;
2069
2070	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2071	if (!skb)
2072		return -ENOBUFS;
2073
2074	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
2075			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
2076			  false, rtnl_held, extack) <= 0) {
2077		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
2078		kfree_skb(skb);
2079		return -EINVAL;
2080	}
2081
2082	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
2083	if (err) {
2084		kfree_skb(skb);
2085		return err;
2086	}
2087
2088	if (unicast)
2089		err = rtnl_unicast(skb, net, portid);
2090	else
2091		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2092				     n->nlmsg_flags & NLM_F_ECHO);
2093	if (err < 0)
2094		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
2095
2096	return err;
2097}
2098
2099static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
2100				 struct tcf_block *block, struct Qdisc *q,
2101				 u32 parent, struct nlmsghdr *n,
2102				 struct tcf_chain *chain, int event,
2103				 struct netlink_ext_ack *extack)
2104{
2105	struct tcf_proto *tp;
2106
2107	for (tp = tcf_get_next_proto(chain, NULL);
2108	     tp; tp = tcf_get_next_proto(chain, tp))
2109		tfilter_notify(net, oskb, n, tp, block, q, parent, NULL,
2110			       event, false, true, extack);
2111}
2112
2113static void tfilter_put(struct tcf_proto *tp, void *fh)
2114{
2115	if (tp->ops->put && fh)
2116		tp->ops->put(tp, fh);
2117}
2118
2119static bool is_qdisc_ingress(__u32 classid)
2120{
2121	return (TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS));
2122}
2123
2124static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2125			  struct netlink_ext_ack *extack)
2126{
2127	struct net *net = sock_net(skb->sk);
2128	struct nlattr *tca[TCA_MAX + 1];
2129	char name[IFNAMSIZ];
2130	struct tcmsg *t;
2131	u32 protocol;
2132	u32 prio;
2133	bool prio_allocate;
2134	u32 parent;
2135	u32 chain_index;
2136	struct Qdisc *q;
2137	struct tcf_chain_info chain_info;
2138	struct tcf_chain *chain;
2139	struct tcf_block *block;
2140	struct tcf_proto *tp;
2141	unsigned long cl;
2142	void *fh;
2143	int err;
2144	int tp_created;
2145	bool rtnl_held = false;
2146	u32 flags;
2147
2148replay:
2149	tp_created = 0;
2150
2151	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2152				     rtm_tca_policy, extack);
2153	if (err < 0)
2154		return err;
2155
2156	t = nlmsg_data(n);
2157	protocol = TC_H_MIN(t->tcm_info);
2158	prio = TC_H_MAJ(t->tcm_info);
2159	prio_allocate = false;
2160	parent = t->tcm_parent;
2161	tp = NULL;
2162	cl = 0;
2163	block = NULL;
2164	q = NULL;
2165	chain = NULL;
2166	flags = 0;
2167
2168	if (prio == 0) {
2169		/* If no priority is provided by the user,
2170		 * we allocate one.
2171		 */
2172		if (n->nlmsg_flags & NLM_F_CREATE) {
2173			prio = TC_H_MAKE(0x80000000U, 0U);
2174			prio_allocate = true;
2175		} else {
2176			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2177			return -ENOENT;
2178		}
2179	}
2180
2181	/* Find head of filter chain. */
2182
2183	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2184	if (err)
2185		return err;
2186
2187	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2188		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2189		err = -EINVAL;
2190		goto errout;
2191	}
2192
2193	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
2194	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
2195	 * type is not specified, classifier is not unlocked.
2196	 */
2197	if (rtnl_held ||
2198	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2199	    !tcf_proto_is_unlocked(name)) {
2200		rtnl_held = true;
2201		rtnl_lock();
2202	}
2203
2204	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2205	if (err)
2206		goto errout;
2207
2208	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2209				 extack);
2210	if (IS_ERR(block)) {
2211		err = PTR_ERR(block);
2212		goto errout;
2213	}
2214	block->classid = parent;
2215
2216	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2217	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2218		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2219		err = -EINVAL;
2220		goto errout;
2221	}
2222	chain = tcf_chain_get(block, chain_index, true);
2223	if (!chain) {
2224		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2225		err = -ENOMEM;
2226		goto errout;
2227	}
2228
2229	mutex_lock(&chain->filter_chain_lock);
2230	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2231			       prio, prio_allocate);
2232	if (IS_ERR(tp)) {
2233		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2234		err = PTR_ERR(tp);
2235		goto errout_locked;
2236	}
2237
2238	if (tp == NULL) {
2239		struct tcf_proto *tp_new = NULL;
2240
2241		if (chain->flushing) {
2242			err = -EAGAIN;
2243			goto errout_locked;
2244		}
2245
2246		/* Proto-tcf does not exist, create new one */
2247
2248		if (tca[TCA_KIND] == NULL || !protocol) {
2249			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2250			err = -EINVAL;
2251			goto errout_locked;
2252		}
2253
2254		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2255			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2256			err = -ENOENT;
2257			goto errout_locked;
2258		}
2259
2260		if (prio_allocate)
2261			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2262							       &chain_info));
2263
2264		mutex_unlock(&chain->filter_chain_lock);
2265		tp_new = tcf_proto_create(name, protocol, prio, chain,
2266					  rtnl_held, extack);
2267		if (IS_ERR(tp_new)) {
2268			err = PTR_ERR(tp_new);
2269			goto errout_tp;
2270		}
2271
2272		tp_created = 1;
2273		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2274						rtnl_held);
2275		if (IS_ERR(tp)) {
2276			err = PTR_ERR(tp);
2277			goto errout_tp;
2278		}
2279	} else {
2280		mutex_unlock(&chain->filter_chain_lock);
2281	}
2282
2283	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2284		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2285		err = -EINVAL;
2286		goto errout;
2287	}
2288
2289	fh = tp->ops->get(tp, t->tcm_handle);
2290
2291	if (!fh) {
2292		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2293			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2294			err = -ENOENT;
2295			goto errout;
2296		}
2297	} else if (n->nlmsg_flags & NLM_F_EXCL) {
2298		tfilter_put(tp, fh);
2299		NL_SET_ERR_MSG(extack, "Filter already exists");
2300		err = -EEXIST;
2301		goto errout;
2302	}
2303
2304	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2305		tfilter_put(tp, fh);
2306		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
2307		err = -EINVAL;
2308		goto errout;
2309	}
2310
2311	if (!(n->nlmsg_flags & NLM_F_CREATE))
2312		flags |= TCA_ACT_FLAGS_REPLACE;
2313	if (!rtnl_held)
2314		flags |= TCA_ACT_FLAGS_NO_RTNL;
2315	if (is_qdisc_ingress(parent))
2316		flags |= TCA_ACT_FLAGS_AT_INGRESS;
2317	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2318			      flags, extack);
2319	if (err == 0) {
2320		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2321			       RTM_NEWTFILTER, false, rtnl_held, extack);
2322		tfilter_put(tp, fh);
2323		/* q pointer is NULL for shared blocks */
2324		if (q)
2325			q->flags &= ~TCQ_F_CAN_BYPASS;
2326	}
2327
2328errout:
2329	if (err && tp_created)
2330		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2331errout_tp:
2332	if (chain) {
2333		if (tp && !IS_ERR(tp))
2334			tcf_proto_put(tp, rtnl_held, NULL);
2335		if (!tp_created)
2336			tcf_chain_put(chain);
2337	}
2338	tcf_block_release(q, block, rtnl_held);
2339
2340	if (rtnl_held)
2341		rtnl_unlock();
2342
2343	if (err == -EAGAIN) {
2344		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
2345		 * of target chain.
2346		 */
2347		rtnl_held = true;
2348		/* Replay the request. */
2349		goto replay;
2350	}
2351	return err;
2352
2353errout_locked:
2354	mutex_unlock(&chain->filter_chain_lock);
2355	goto errout;
2356}
2357
2358static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2359			  struct netlink_ext_ack *extack)
2360{
2361	struct net *net = sock_net(skb->sk);
2362	struct nlattr *tca[TCA_MAX + 1];
2363	char name[IFNAMSIZ];
2364	struct tcmsg *t;
2365	u32 protocol;
2366	u32 prio;
2367	u32 parent;
2368	u32 chain_index;
2369	struct Qdisc *q = NULL;
2370	struct tcf_chain_info chain_info;
2371	struct tcf_chain *chain = NULL;
2372	struct tcf_block *block = NULL;
2373	struct tcf_proto *tp = NULL;
2374	unsigned long cl = 0;
2375	void *fh = NULL;
2376	int err;
2377	bool rtnl_held = false;
2378
2379	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2380				     rtm_tca_policy, extack);
2381	if (err < 0)
2382		return err;
2383
2384	t = nlmsg_data(n);
2385	protocol = TC_H_MIN(t->tcm_info);
2386	prio = TC_H_MAJ(t->tcm_info);
2387	parent = t->tcm_parent;
2388
2389	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
2390		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
2391		return -ENOENT;
2392	}
2393
2394	/* Find head of filter chain. */
2395
2396	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2397	if (err)
2398		return err;
2399
2400	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2401		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2402		err = -EINVAL;
2403		goto errout;
2404	}
2405	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2406	 * found), qdisc is not unlocked, classifier type is not specified,
2407	 * classifier is not unlocked.
2408	 */
2409	if (!prio ||
2410	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2411	    !tcf_proto_is_unlocked(name)) {
2412		rtnl_held = true;
2413		rtnl_lock();
2414	}
2415
2416	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2417	if (err)
2418		goto errout;
2419
2420	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2421				 extack);
2422	if (IS_ERR(block)) {
2423		err = PTR_ERR(block);
2424		goto errout;
2425	}
2426
2427	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2428	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2429		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2430		err = -EINVAL;
2431		goto errout;
2432	}
2433	chain = tcf_chain_get(block, chain_index, false);
2434	if (!chain) {
2435		/* User requested flush on non-existent chain. Nothing to do,
2436		 * so just return success.
2437		 */
2438		if (prio == 0) {
2439			err = 0;
2440			goto errout;
2441		}
2442		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2443		err = -ENOENT;
2444		goto errout;
2445	}
2446
2447	if (prio == 0) {
2448		tfilter_notify_chain(net, skb, block, q, parent, n,
2449				     chain, RTM_DELTFILTER, extack);
2450		tcf_chain_flush(chain, rtnl_held);
2451		err = 0;
2452		goto errout;
2453	}
2454
2455	mutex_lock(&chain->filter_chain_lock);
2456	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2457			       prio, false);
2458	if (!tp || IS_ERR(tp)) {
2459		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2460		err = tp ? PTR_ERR(tp) : -ENOENT;
2461		goto errout_locked;
2462	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2463		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2464		err = -EINVAL;
2465		goto errout_locked;
2466	} else if (t->tcm_handle == 0) {
2467		tcf_proto_signal_destroying(chain, tp);
2468		tcf_chain_tp_remove(chain, &chain_info, tp);
2469		mutex_unlock(&chain->filter_chain_lock);
2470
2471		tcf_proto_put(tp, rtnl_held, NULL);
2472		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2473			       RTM_DELTFILTER, false, rtnl_held, extack);
2474		err = 0;
2475		goto errout;
2476	}
2477	mutex_unlock(&chain->filter_chain_lock);
2478
2479	fh = tp->ops->get(tp, t->tcm_handle);
2480
2481	if (!fh) {
2482		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2483		err = -ENOENT;
2484	} else {
2485		bool last;
2486
2487		err = tfilter_del_notify(net, skb, n, tp, block,
2488					 q, parent, fh, false, &last,
2489					 rtnl_held, extack);
2490
2491		if (err)
2492			goto errout;
2493		if (last)
2494			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2495	}
2496
2497errout:
2498	if (chain) {
2499		if (tp && !IS_ERR(tp))
2500			tcf_proto_put(tp, rtnl_held, NULL);
2501		tcf_chain_put(chain);
2502	}
2503	tcf_block_release(q, block, rtnl_held);
2504
2505	if (rtnl_held)
2506		rtnl_unlock();
2507
2508	return err;
2509
2510errout_locked:
2511	mutex_unlock(&chain->filter_chain_lock);
2512	goto errout;
2513}
2514
2515static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2516			  struct netlink_ext_ack *extack)
2517{
2518	struct net *net = sock_net(skb->sk);
2519	struct nlattr *tca[TCA_MAX + 1];
2520	char name[IFNAMSIZ];
2521	struct tcmsg *t;
2522	u32 protocol;
2523	u32 prio;
2524	u32 parent;
2525	u32 chain_index;
2526	struct Qdisc *q = NULL;
2527	struct tcf_chain_info chain_info;
2528	struct tcf_chain *chain = NULL;
2529	struct tcf_block *block = NULL;
2530	struct tcf_proto *tp = NULL;
2531	unsigned long cl = 0;
2532	void *fh = NULL;
2533	int err;
2534	bool rtnl_held = false;
2535
2536	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2537				     rtm_tca_policy, extack);
2538	if (err < 0)
2539		return err;
2540
2541	t = nlmsg_data(n);
2542	protocol = TC_H_MIN(t->tcm_info);
2543	prio = TC_H_MAJ(t->tcm_info);
2544	parent = t->tcm_parent;
2545
2546	if (prio == 0) {
2547		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2548		return -ENOENT;
2549	}
2550
2551	/* Find head of filter chain. */
2552
2553	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2554	if (err)
2555		return err;
2556
2557	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2558		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2559		err = -EINVAL;
2560		goto errout;
2561	}
2562	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2563	 * unlocked, classifier type is not specified, classifier is not
2564	 * unlocked.
2565	 */
2566	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2567	    !tcf_proto_is_unlocked(name)) {
2568		rtnl_held = true;
2569		rtnl_lock();
2570	}
2571
2572	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2573	if (err)
2574		goto errout;
2575
2576	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2577				 extack);
2578	if (IS_ERR(block)) {
2579		err = PTR_ERR(block);
2580		goto errout;
2581	}
2582
2583	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2584	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2585		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2586		err = -EINVAL;
2587		goto errout;
2588	}
2589	chain = tcf_chain_get(block, chain_index, false);
2590	if (!chain) {
2591		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2592		err = -EINVAL;
2593		goto errout;
2594	}
2595
2596	mutex_lock(&chain->filter_chain_lock);
2597	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2598			       prio, false);
2599	mutex_unlock(&chain->filter_chain_lock);
2600	if (!tp || IS_ERR(tp)) {
2601		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2602		err = tp ? PTR_ERR(tp) : -ENOENT;
2603		goto errout;
2604	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2605		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2606		err = -EINVAL;
2607		goto errout;
2608	}
2609
2610	fh = tp->ops->get(tp, t->tcm_handle);
2611
2612	if (!fh) {
2613		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2614		err = -ENOENT;
2615	} else {
2616		err = tfilter_notify(net, skb, n, tp, block, q, parent,
2617				     fh, RTM_NEWTFILTER, true, rtnl_held, NULL);
2618		if (err < 0)
2619			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
2620	}
2621
2622	tfilter_put(tp, fh);
2623errout:
2624	if (chain) {
2625		if (tp && !IS_ERR(tp))
2626			tcf_proto_put(tp, rtnl_held, NULL);
2627		tcf_chain_put(chain);
2628	}
2629	tcf_block_release(q, block, rtnl_held);
2630
2631	if (rtnl_held)
2632		rtnl_unlock();
2633
2634	return err;
2635}
2636
2637struct tcf_dump_args {
2638	struct tcf_walker w;
2639	struct sk_buff *skb;
2640	struct netlink_callback *cb;
2641	struct tcf_block *block;
2642	struct Qdisc *q;
2643	u32 parent;
2644	bool terse_dump;
2645};
2646
2647static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2648{
2649	struct tcf_dump_args *a = (void *)arg;
2650	struct net *net = sock_net(a->skb->sk);
2651
2652	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2653			     n, NETLINK_CB(a->cb->skb).portid,
2654			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2655			     RTM_NEWTFILTER, a->terse_dump, true, NULL);
2656}
2657
2658static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
2659			   struct sk_buff *skb, struct netlink_callback *cb,
2660			   long index_start, long *p_index, bool terse)
2661{
2662	struct net *net = sock_net(skb->sk);
2663	struct tcf_block *block = chain->block;
2664	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2665	struct tcf_proto *tp, *tp_prev;
2666	struct tcf_dump_args arg;
2667
2668	for (tp = __tcf_get_next_proto(chain, NULL);
2669	     tp;
2670	     tp_prev = tp,
2671		     tp = __tcf_get_next_proto(chain, tp),
2672		     tcf_proto_put(tp_prev, true, NULL),
2673		     (*p_index)++) {
2674		if (*p_index < index_start)
2675			continue;
2676		if (TC_H_MAJ(tcm->tcm_info) &&
2677		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
2678			continue;
2679		if (TC_H_MIN(tcm->tcm_info) &&
2680		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
2681			continue;
2682		if (*p_index > index_start)
2683			memset(&cb->args[1], 0,
2684			       sizeof(cb->args) - sizeof(cb->args[0]));
2685		if (cb->args[1] == 0) {
2686			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2687					  NETLINK_CB(cb->skb).portid,
2688					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
2689					  RTM_NEWTFILTER, false, true, NULL) <= 0)
2690				goto errout;
2691			cb->args[1] = 1;
2692		}
2693		if (!tp->ops->walk)
2694			continue;
2695		arg.w.fn = tcf_node_dump;
2696		arg.skb = skb;
2697		arg.cb = cb;
2698		arg.block = block;
2699		arg.q = q;
2700		arg.parent = parent;
2701		arg.w.stop = 0;
2702		arg.w.skip = cb->args[1] - 1;
2703		arg.w.count = 0;
2704		arg.w.cookie = cb->args[2];
2705		arg.terse_dump = terse;
2706		tp->ops->walk(tp, &arg.w, true);
2707		cb->args[2] = arg.w.cookie;
2708		cb->args[1] = arg.w.count + 1;
2709		if (arg.w.stop)
2710			goto errout;
2711	}
2712	return true;
2713
2714errout:
2715	tcf_proto_put(tp, true, NULL);
2716	return false;
2717}
2718
2719static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
2720	[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
2721};
2722
2723/* called with RTNL */
2724static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
2725{
2726	struct tcf_chain *chain, *chain_prev;
2727	struct net *net = sock_net(skb->sk);
2728	struct nlattr *tca[TCA_MAX + 1];
2729	struct Qdisc *q = NULL;
2730	struct tcf_block *block;
2731	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2732	bool terse_dump = false;
2733	long index_start;
2734	long index;
2735	u32 parent;
2736	int err;
2737
2738	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2739		return skb->len;
2740
2741	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2742				     tcf_tfilter_dump_policy, cb->extack);
2743	if (err)
2744		return err;
2745
2746	if (tca[TCA_DUMP_FLAGS]) {
2747		struct nla_bitfield32 flags =
2748			nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
2749
2750		terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
2751	}
2752
2753	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2754		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2755		if (!block)
2756			goto out;
2757		/* If we work with block index, q is NULL and parent value
2758		 * will never be used in the following code. The check
2759		 * in tcf_fill_node prevents it. However, compiler does not
2760		 * see that far, so set parent to zero to silence the warning
2761		 * about parent being uninitialized.
2762		 */
2763		parent = 0;
2764	} else {
2765		const struct Qdisc_class_ops *cops;
2766		struct net_device *dev;
2767		unsigned long cl = 0;
2768
2769		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2770		if (!dev)
2771			return skb->len;
2772
2773		parent = tcm->tcm_parent;
2774		if (!parent)
2775			q = rtnl_dereference(dev->qdisc);
2776		else
2777			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2778		if (!q)
2779			goto out;
2780		cops = q->ops->cl_ops;
2781		if (!cops)
2782			goto out;
2783		if (!cops->tcf_block)
2784			goto out;
2785		if (TC_H_MIN(tcm->tcm_parent)) {
2786			cl = cops->find(q, tcm->tcm_parent);
2787			if (cl == 0)
2788				goto out;
2789		}
2790		block = cops->tcf_block(q, cl, NULL);
2791		if (!block)
2792			goto out;
2793		parent = block->classid;
2794		if (tcf_block_shared(block))
2795			q = NULL;
2796	}
2797
2798	index_start = cb->args[0];
2799	index = 0;
2800
2801	for (chain = __tcf_get_next_chain(block, NULL);
2802	     chain;
2803	     chain_prev = chain,
2804		     chain = __tcf_get_next_chain(block, chain),
2805		     tcf_chain_put(chain_prev)) {
2806		if (tca[TCA_CHAIN] &&
2807		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2808			continue;
2809		if (!tcf_chain_dump(chain, q, parent, skb, cb,
2810				    index_start, &index, terse_dump)) {
2811			tcf_chain_put(chain);
2812			err = -EMSGSIZE;
2813			break;
2814		}
2815	}
2816
2817	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2818		tcf_block_refcnt_put(block, true);
2819	cb->args[0] = index;
2820
2821out:
2822	/* If we did no progress, the error (EMSGSIZE) is real */
2823	if (skb->len == 0 && err)
2824		return err;
2825	return skb->len;
2826}
2827
2828static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2829			      void *tmplt_priv, u32 chain_index,
2830			      struct net *net, struct sk_buff *skb,
2831			      struct tcf_block *block,
2832			      u32 portid, u32 seq, u16 flags, int event,
2833			      struct netlink_ext_ack *extack)
2834{
2835	unsigned char *b = skb_tail_pointer(skb);
2836	const struct tcf_proto_ops *ops;
2837	struct nlmsghdr *nlh;
2838	struct tcmsg *tcm;
2839	void *priv;
2840
2841	ops = tmplt_ops;
2842	priv = tmplt_priv;
2843
2844	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2845	if (!nlh)
2846		goto out_nlmsg_trim;
2847	tcm = nlmsg_data(nlh);
2848	tcm->tcm_family = AF_UNSPEC;
2849	tcm->tcm__pad1 = 0;
2850	tcm->tcm__pad2 = 0;
2851	tcm->tcm_handle = 0;
2852	if (block->q) {
2853		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2854		tcm->tcm_parent = block->q->handle;
2855	} else {
2856		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2857		tcm->tcm_block_index = block->index;
2858	}
2859
2860	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2861		goto nla_put_failure;
2862
2863	if (ops) {
2864		if (nla_put_string(skb, TCA_KIND, ops->kind))
2865			goto nla_put_failure;
2866		if (ops->tmplt_dump(skb, net, priv) < 0)
2867			goto nla_put_failure;
2868	}
2869
2870	if (extack && extack->_msg &&
2871	    nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
2872		goto out_nlmsg_trim;
2873
2874	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2875
2876	return skb->len;
2877
2878out_nlmsg_trim:
2879nla_put_failure:
2880	nlmsg_trim(skb, b);
2881	return -EMSGSIZE;
2882}
2883
2884static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2885			   u32 seq, u16 flags, int event, bool unicast,
2886			   struct netlink_ext_ack *extack)
2887{
2888	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2889	struct tcf_block *block = chain->block;
2890	struct net *net = block->net;
2891	struct sk_buff *skb;
2892	int err = 0;
2893
2894	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2895	if (!skb)
2896		return -ENOBUFS;
2897
2898	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2899			       chain->index, net, skb, block, portid,
2900			       seq, flags, event, extack) <= 0) {
2901		kfree_skb(skb);
2902		return -EINVAL;
2903	}
2904
2905	if (unicast)
2906		err = rtnl_unicast(skb, net, portid);
2907	else
2908		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2909				     flags & NLM_F_ECHO);
2910
2911	return err;
2912}
2913
2914static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2915				  void *tmplt_priv, u32 chain_index,
2916				  struct tcf_block *block, struct sk_buff *oskb,
2917				  u32 seq, u16 flags, bool unicast)
2918{
2919	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2920	struct net *net = block->net;
2921	struct sk_buff *skb;
2922
2923	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2924	if (!skb)
2925		return -ENOBUFS;
2926
2927	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2928			       block, portid, seq, flags, RTM_DELCHAIN, NULL) <= 0) {
2929		kfree_skb(skb);
2930		return -EINVAL;
2931	}
2932
2933	if (unicast)
2934		return rtnl_unicast(skb, net, portid);
2935
2936	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2937}
2938
2939static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2940			      struct nlattr **tca,
2941			      struct netlink_ext_ack *extack)
2942{
2943	const struct tcf_proto_ops *ops;
2944	char name[IFNAMSIZ];
2945	void *tmplt_priv;
2946
2947	/* If kind is not set, user did not specify template. */
2948	if (!tca[TCA_KIND])
2949		return 0;
2950
2951	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2952		NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
2953		return -EINVAL;
2954	}
2955
2956	ops = tcf_proto_lookup_ops(name, true, extack);
2957	if (IS_ERR(ops))
2958		return PTR_ERR(ops);
2959	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump ||
2960	    !ops->tmplt_reoffload) {
2961		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2962		module_put(ops->owner);
2963		return -EOPNOTSUPP;
2964	}
2965
2966	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2967	if (IS_ERR(tmplt_priv)) {
2968		module_put(ops->owner);
2969		return PTR_ERR(tmplt_priv);
2970	}
2971	chain->tmplt_ops = ops;
2972	chain->tmplt_priv = tmplt_priv;
2973	return 0;
2974}
2975
2976static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2977			       void *tmplt_priv)
2978{
2979	/* If template ops are set, no work to do for us. */
2980	if (!tmplt_ops)
2981		return;
2982
2983	tmplt_ops->tmplt_destroy(tmplt_priv);
2984	module_put(tmplt_ops->owner);
2985}
2986
2987/* Add/delete/get a chain */
2988
2989static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2990			struct netlink_ext_ack *extack)
2991{
2992	struct net *net = sock_net(skb->sk);
2993	struct nlattr *tca[TCA_MAX + 1];
2994	struct tcmsg *t;
2995	u32 parent;
2996	u32 chain_index;
2997	struct Qdisc *q;
2998	struct tcf_chain *chain;
2999	struct tcf_block *block;
3000	unsigned long cl;
3001	int err;
3002
3003replay:
3004	q = NULL;
3005	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
3006				     rtm_tca_policy, extack);
3007	if (err < 0)
3008		return err;
3009
3010	t = nlmsg_data(n);
3011	parent = t->tcm_parent;
3012	cl = 0;
3013
3014	block = tcf_block_find(net, &q, &parent, &cl,
3015			       t->tcm_ifindex, t->tcm_block_index, extack);
3016	if (IS_ERR(block))
3017		return PTR_ERR(block);
3018
3019	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
3020	if (chain_index > TC_ACT_EXT_VAL_MASK) {
3021		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
3022		err = -EINVAL;
3023		goto errout_block;
3024	}
3025
3026	mutex_lock(&block->lock);
3027	chain = tcf_chain_lookup(block, chain_index);
3028	if (n->nlmsg_type == RTM_NEWCHAIN) {
3029		if (chain) {
3030			if (tcf_chain_held_by_acts_only(chain)) {
3031				/* The chain exists only because there is
3032				 * some action referencing it.
3033				 */
3034				tcf_chain_hold(chain);
3035			} else {
3036				NL_SET_ERR_MSG(extack, "Filter chain already exists");
3037				err = -EEXIST;
3038				goto errout_block_locked;
3039			}
3040		} else {
3041			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
3042				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
3043				err = -ENOENT;
3044				goto errout_block_locked;
3045			}
3046			chain = tcf_chain_create(block, chain_index);
3047			if (!chain) {
3048				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
3049				err = -ENOMEM;
3050				goto errout_block_locked;
3051			}
3052		}
3053	} else {
3054		if (!chain || tcf_chain_held_by_acts_only(chain)) {
3055			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
3056			err = -EINVAL;
3057			goto errout_block_locked;
3058		}
3059		tcf_chain_hold(chain);
3060	}
3061
3062	if (n->nlmsg_type == RTM_NEWCHAIN) {
3063		/* Modifying chain requires holding parent block lock. In case
3064		 * the chain was successfully added, take a reference to the
3065		 * chain. This ensures that an empty chain does not disappear at
3066		 * the end of this function.
3067		 */
3068		tcf_chain_hold(chain);
3069		chain->explicitly_created = true;
3070	}
3071	mutex_unlock(&block->lock);
3072
3073	switch (n->nlmsg_type) {
3074	case RTM_NEWCHAIN:
3075		err = tc_chain_tmplt_add(chain, net, tca, extack);
3076		if (err) {
3077			tcf_chain_put_explicitly_created(chain);
3078			goto errout;
3079		}
3080
3081		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
3082				RTM_NEWCHAIN, false, extack);
3083		break;
3084	case RTM_DELCHAIN:
3085		tfilter_notify_chain(net, skb, block, q, parent, n,
3086				     chain, RTM_DELTFILTER, extack);
3087		/* Flush the chain first as the user requested chain removal. */
3088		tcf_chain_flush(chain, true);
3089		/* In case the chain was successfully deleted, put a reference
3090		 * to the chain previously taken during addition.
3091		 */
3092		tcf_chain_put_explicitly_created(chain);
3093		break;
3094	case RTM_GETCHAIN:
3095		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
3096				      n->nlmsg_flags, n->nlmsg_type, true, extack);
3097		if (err < 0)
3098			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
3099		break;
3100	default:
3101		err = -EOPNOTSUPP;
3102		NL_SET_ERR_MSG(extack, "Unsupported message type");
3103		goto errout;
3104	}
3105
3106errout:
3107	tcf_chain_put(chain);
3108errout_block:
3109	tcf_block_release(q, block, true);
3110	if (err == -EAGAIN)
3111		/* Replay the request. */
3112		goto replay;
3113	return err;
3114
3115errout_block_locked:
3116	mutex_unlock(&block->lock);
3117	goto errout_block;
3118}
3119
3120/* called with RTNL */
3121static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
3122{
3123	struct net *net = sock_net(skb->sk);
3124	struct nlattr *tca[TCA_MAX + 1];
3125	struct Qdisc *q = NULL;
3126	struct tcf_block *block;
3127	struct tcmsg *tcm = nlmsg_data(cb->nlh);
3128	struct tcf_chain *chain;
3129	long index_start;
3130	long index;
3131	int err;
3132
3133	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
3134		return skb->len;
3135
3136	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
3137				     rtm_tca_policy, cb->extack);
3138	if (err)
3139		return err;
3140
3141	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
3142		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
3143		if (!block)
3144			goto out;
3145	} else {
3146		const struct Qdisc_class_ops *cops;
3147		struct net_device *dev;
3148		unsigned long cl = 0;
3149
3150		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
3151		if (!dev)
3152			return skb->len;
3153
3154		if (!tcm->tcm_parent)
3155			q = rtnl_dereference(dev->qdisc);
3156		else
3157			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
3158
3159		if (!q)
3160			goto out;
3161		cops = q->ops->cl_ops;
3162		if (!cops)
3163			goto out;
3164		if (!cops->tcf_block)
3165			goto out;
3166		if (TC_H_MIN(tcm->tcm_parent)) {
3167			cl = cops->find(q, tcm->tcm_parent);
3168			if (cl == 0)
3169				goto out;
3170		}
3171		block = cops->tcf_block(q, cl, NULL);
3172		if (!block)
3173			goto out;
3174		if (tcf_block_shared(block))
3175			q = NULL;
3176	}
3177
3178	index_start = cb->args[0];
3179	index = 0;
3180
3181	mutex_lock(&block->lock);
3182	list_for_each_entry(chain, &block->chain_list, list) {
3183		if ((tca[TCA_CHAIN] &&
3184		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
3185			continue;
3186		if (index < index_start) {
3187			index++;
3188			continue;
3189		}
3190		if (tcf_chain_held_by_acts_only(chain))
3191			continue;
3192		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
3193					 chain->index, net, skb, block,
3194					 NETLINK_CB(cb->skb).portid,
3195					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
3196					 RTM_NEWCHAIN, NULL);
3197		if (err <= 0)
3198			break;
3199		index++;
3200	}
3201	mutex_unlock(&block->lock);
3202
3203	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3204		tcf_block_refcnt_put(block, true);
3205	cb->args[0] = index;
3206
3207out:
3208	/* If we did no progress, the error (EMSGSIZE) is real */
3209	if (skb->len == 0 && err)
3210		return err;
3211	return skb->len;
3212}
3213
3214int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
3215		     int police, struct tcf_proto *tp, u32 handle,
3216		     bool use_action_miss)
3217{
3218	int err = 0;
3219
3220#ifdef CONFIG_NET_CLS_ACT
3221	exts->type = 0;
3222	exts->nr_actions = 0;
3223	exts->miss_cookie_node = NULL;
3224	/* Note: we do not own yet a reference on net.
3225	 * This reference might be taken later from tcf_exts_get_net().
3226	 */
3227	exts->net = net;
3228	exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
3229				GFP_KERNEL);
3230	if (!exts->actions)
3231		return -ENOMEM;
3232#endif
3233
3234	exts->action = action;
3235	exts->police = police;
3236
3237	if (!use_action_miss)
3238		return 0;
3239
3240	err = tcf_exts_miss_cookie_base_alloc(exts, tp, handle);
3241	if (err)
3242		goto err_miss_alloc;
3243
3244	return 0;
3245
3246err_miss_alloc:
3247	tcf_exts_destroy(exts);
3248#ifdef CONFIG_NET_CLS_ACT
3249	exts->actions = NULL;
3250#endif
3251	return err;
3252}
3253EXPORT_SYMBOL(tcf_exts_init_ex);
3254
3255void tcf_exts_destroy(struct tcf_exts *exts)
3256{
3257	tcf_exts_miss_cookie_base_destroy(exts);
3258
3259#ifdef CONFIG_NET_CLS_ACT
3260	if (exts->actions) {
3261		tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3262		kfree(exts->actions);
3263	}
3264	exts->nr_actions = 0;
3265#endif
3266}
3267EXPORT_SYMBOL(tcf_exts_destroy);
3268
3269int tcf_exts_validate_ex(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3270			 struct nlattr *rate_tlv, struct tcf_exts *exts,
3271			 u32 flags, u32 fl_flags, struct netlink_ext_ack *extack)
3272{
3273#ifdef CONFIG_NET_CLS_ACT
3274	{
3275		int init_res[TCA_ACT_MAX_PRIO] = {};
3276		struct tc_action *act;
3277		size_t attr_size = 0;
3278
3279		if (exts->police && tb[exts->police]) {
3280			struct tc_action_ops *a_o;
3281
3282			a_o = tc_action_load_ops(tb[exts->police], true,
3283						 !(flags & TCA_ACT_FLAGS_NO_RTNL),
3284						 extack);
3285			if (IS_ERR(a_o))
3286				return PTR_ERR(a_o);
3287			flags |= TCA_ACT_FLAGS_POLICE | TCA_ACT_FLAGS_BIND;
3288			act = tcf_action_init_1(net, tp, tb[exts->police],
3289						rate_tlv, a_o, init_res, flags,
3290						extack);
3291			module_put(a_o->owner);
3292			if (IS_ERR(act))
3293				return PTR_ERR(act);
3294
3295			act->type = exts->type = TCA_OLD_COMPAT;
3296			exts->actions[0] = act;
3297			exts->nr_actions = 1;
3298			tcf_idr_insert_many(exts->actions);
3299		} else if (exts->action && tb[exts->action]) {
3300			int err;
3301
3302			flags |= TCA_ACT_FLAGS_BIND;
3303			err = tcf_action_init(net, tp, tb[exts->action],
3304					      rate_tlv, exts->actions, init_res,
3305					      &attr_size, flags, fl_flags,
3306					      extack);
3307			if (err < 0)
3308				return err;
3309			exts->nr_actions = err;
3310		}
3311	}
3312#else
3313	if ((exts->action && tb[exts->action]) ||
3314	    (exts->police && tb[exts->police])) {
3315		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
3316		return -EOPNOTSUPP;
3317	}
3318#endif
3319
3320	return 0;
3321}
3322EXPORT_SYMBOL(tcf_exts_validate_ex);
3323
3324int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3325		      struct nlattr *rate_tlv, struct tcf_exts *exts,
3326		      u32 flags, struct netlink_ext_ack *extack)
3327{
3328	return tcf_exts_validate_ex(net, tp, tb, rate_tlv, exts,
3329				    flags, 0, extack);
3330}
3331EXPORT_SYMBOL(tcf_exts_validate);
3332
3333void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
3334{
3335#ifdef CONFIG_NET_CLS_ACT
3336	struct tcf_exts old = *dst;
3337
3338	*dst = *src;
3339	tcf_exts_destroy(&old);
3340#endif
3341}
3342EXPORT_SYMBOL(tcf_exts_change);
3343
3344#ifdef CONFIG_NET_CLS_ACT
3345static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
3346{
3347	if (exts->nr_actions == 0)
3348		return NULL;
3349	else
3350		return exts->actions[0];
3351}
3352#endif
3353
3354int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
3355{
3356#ifdef CONFIG_NET_CLS_ACT
3357	struct nlattr *nest;
3358
3359	if (exts->action && tcf_exts_has_actions(exts)) {
3360		/*
3361		 * again for backward compatible mode - we want
3362		 * to work with both old and new modes of entering
3363		 * tc data even if iproute2  was newer - jhs
3364		 */
3365		if (exts->type != TCA_OLD_COMPAT) {
3366			nest = nla_nest_start_noflag(skb, exts->action);
3367			if (nest == NULL)
3368				goto nla_put_failure;
3369
3370			if (tcf_action_dump(skb, exts->actions, 0, 0, false)
3371			    < 0)
3372				goto nla_put_failure;
3373			nla_nest_end(skb, nest);
3374		} else if (exts->police) {
3375			struct tc_action *act = tcf_exts_first_act(exts);
3376			nest = nla_nest_start_noflag(skb, exts->police);
3377			if (nest == NULL || !act)
3378				goto nla_put_failure;
3379			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3380				goto nla_put_failure;
3381			nla_nest_end(skb, nest);
3382		}
3383	}
3384	return 0;
3385
3386nla_put_failure:
3387	nla_nest_cancel(skb, nest);
3388	return -1;
3389#else
3390	return 0;
3391#endif
3392}
3393EXPORT_SYMBOL(tcf_exts_dump);
3394
3395int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
3396{
3397#ifdef CONFIG_NET_CLS_ACT
3398	struct nlattr *nest;
3399
3400	if (!exts->action || !tcf_exts_has_actions(exts))
3401		return 0;
3402
3403	nest = nla_nest_start_noflag(skb, exts->action);
3404	if (!nest)
3405		goto nla_put_failure;
3406
3407	if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
3408		goto nla_put_failure;
3409	nla_nest_end(skb, nest);
3410	return 0;
3411
3412nla_put_failure:
3413	nla_nest_cancel(skb, nest);
3414	return -1;
3415#else
3416	return 0;
3417#endif
3418}
3419EXPORT_SYMBOL(tcf_exts_terse_dump);
3420
3421int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
3422{
3423#ifdef CONFIG_NET_CLS_ACT
3424	struct tc_action *a = tcf_exts_first_act(exts);
3425	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3426		return -1;
3427#endif
3428	return 0;
3429}
3430EXPORT_SYMBOL(tcf_exts_dump_stats);
3431
3432static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
3433{
3434	if (*flags & TCA_CLS_FLAGS_IN_HW)
3435		return;
3436	*flags |= TCA_CLS_FLAGS_IN_HW;
3437	atomic_inc(&block->offloadcnt);
3438}
3439
3440static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
3441{
3442	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
3443		return;
3444	*flags &= ~TCA_CLS_FLAGS_IN_HW;
3445	atomic_dec(&block->offloadcnt);
3446}
3447
3448static void tc_cls_offload_cnt_update(struct tcf_block *block,
3449				      struct tcf_proto *tp, u32 *cnt,
3450				      u32 *flags, u32 diff, bool add)
3451{
3452	lockdep_assert_held(&block->cb_lock);
3453
3454	spin_lock(&tp->lock);
3455	if (add) {
3456		if (!*cnt)
3457			tcf_block_offload_inc(block, flags);
3458		*cnt += diff;
3459	} else {
3460		*cnt -= diff;
3461		if (!*cnt)
3462			tcf_block_offload_dec(block, flags);
3463	}
3464	spin_unlock(&tp->lock);
3465}
3466
3467static void
3468tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
3469			 u32 *cnt, u32 *flags)
3470{
3471	lockdep_assert_held(&block->cb_lock);
3472
3473	spin_lock(&tp->lock);
3474	tcf_block_offload_dec(block, flags);
3475	*cnt = 0;
3476	spin_unlock(&tp->lock);
3477}
3478
3479static int
3480__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3481		   void *type_data, bool err_stop)
3482{
3483	struct flow_block_cb *block_cb;
3484	int ok_count = 0;
3485	int err;
3486
3487	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3488		err = block_cb->cb(type, type_data, block_cb->cb_priv);
3489		if (err) {
3490			if (err_stop)
3491				return err;
3492		} else {
3493			ok_count++;
3494		}
3495	}
3496	return ok_count;
3497}
3498
3499int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3500		     void *type_data, bool err_stop, bool rtnl_held)
3501{
3502	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3503	int ok_count;
3504
3505retry:
3506	if (take_rtnl)
3507		rtnl_lock();
3508	down_read(&block->cb_lock);
3509	/* Need to obtain rtnl lock if block is bound to devs that require it.
3510	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3511	 * obtain the locks in same order here.
3512	 */
3513	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3514		up_read(&block->cb_lock);
3515		take_rtnl = true;
3516		goto retry;
3517	}
3518
3519	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3520
3521	up_read(&block->cb_lock);
3522	if (take_rtnl)
3523		rtnl_unlock();
3524	return ok_count;
3525}
3526EXPORT_SYMBOL(tc_setup_cb_call);
3527
3528/* Non-destructive filter add. If filter that wasn't already in hardware is
3529 * successfully offloaded, increment block offloads counter. On failure,
3530 * previously offloaded filter is considered to be intact and offloads counter
3531 * is not decremented.
3532 */
3533
3534int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
3535		    enum tc_setup_type type, void *type_data, bool err_stop,
3536		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3537{
3538	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3539	int ok_count;
3540
3541retry:
3542	if (take_rtnl)
3543		rtnl_lock();
3544	down_read(&block->cb_lock);
3545	/* Need to obtain rtnl lock if block is bound to devs that require it.
3546	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3547	 * obtain the locks in same order here.
3548	 */
3549	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3550		up_read(&block->cb_lock);
3551		take_rtnl = true;
3552		goto retry;
3553	}
3554
3555	/* Make sure all netdevs sharing this block are offload-capable. */
3556	if (block->nooffloaddevcnt && err_stop) {
3557		ok_count = -EOPNOTSUPP;
3558		goto err_unlock;
3559	}
3560
3561	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3562	if (ok_count < 0)
3563		goto err_unlock;
3564
3565	if (tp->ops->hw_add)
3566		tp->ops->hw_add(tp, type_data);
3567	if (ok_count > 0)
3568		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
3569					  ok_count, true);
3570err_unlock:
3571	up_read(&block->cb_lock);
3572	if (take_rtnl)
3573		rtnl_unlock();
3574	return min(ok_count, 0);
3575}
3576EXPORT_SYMBOL(tc_setup_cb_add);
3577
3578/* Destructive filter replace. If filter that wasn't already in hardware is
3579 * successfully offloaded, increment block offload counter. On failure,
3580 * previously offloaded filter is considered to be destroyed and offload counter
3581 * is decremented.
3582 */
3583
3584int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
3585			enum tc_setup_type type, void *type_data, bool err_stop,
3586			u32 *old_flags, unsigned int *old_in_hw_count,
3587			u32 *new_flags, unsigned int *new_in_hw_count,
3588			bool rtnl_held)
3589{
3590	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3591	int ok_count;
3592
3593retry:
3594	if (take_rtnl)
3595		rtnl_lock();
3596	down_read(&block->cb_lock);
3597	/* Need to obtain rtnl lock if block is bound to devs that require it.
3598	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3599	 * obtain the locks in same order here.
3600	 */
3601	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3602		up_read(&block->cb_lock);
3603		take_rtnl = true;
3604		goto retry;
3605	}
3606
3607	/* Make sure all netdevs sharing this block are offload-capable. */
3608	if (block->nooffloaddevcnt && err_stop) {
3609		ok_count = -EOPNOTSUPP;
3610		goto err_unlock;
3611	}
3612
3613	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3614	if (tp->ops->hw_del)
3615		tp->ops->hw_del(tp, type_data);
3616
3617	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3618	if (ok_count < 0)
3619		goto err_unlock;
3620
3621	if (tp->ops->hw_add)
3622		tp->ops->hw_add(tp, type_data);
3623	if (ok_count > 0)
3624		tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
3625					  new_flags, ok_count, true);
3626err_unlock:
3627	up_read(&block->cb_lock);
3628	if (take_rtnl)
3629		rtnl_unlock();
3630	return min(ok_count, 0);
3631}
3632EXPORT_SYMBOL(tc_setup_cb_replace);
3633
3634/* Destroy filter and decrement block offload counter, if filter was previously
3635 * offloaded.
3636 */
3637
3638int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
3639			enum tc_setup_type type, void *type_data, bool err_stop,
3640			u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3641{
3642	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3643	int ok_count;
3644
3645retry:
3646	if (take_rtnl)
3647		rtnl_lock();
3648	down_read(&block->cb_lock);
3649	/* Need to obtain rtnl lock if block is bound to devs that require it.
3650	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3651	 * obtain the locks in same order here.
3652	 */
3653	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3654		up_read(&block->cb_lock);
3655		take_rtnl = true;
3656		goto retry;
3657	}
3658
3659	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3660
3661	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3662	if (tp->ops->hw_del)
3663		tp->ops->hw_del(tp, type_data);
3664
3665	up_read(&block->cb_lock);
3666	if (take_rtnl)
3667		rtnl_unlock();
3668	return min(ok_count, 0);
3669}
3670EXPORT_SYMBOL(tc_setup_cb_destroy);
3671
3672int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
3673			  bool add, flow_setup_cb_t *cb,
3674			  enum tc_setup_type type, void *type_data,
3675			  void *cb_priv, u32 *flags, unsigned int *in_hw_count)
3676{
3677	int err = cb(type, type_data, cb_priv);
3678
3679	if (err) {
3680		if (add && tc_skip_sw(*flags))
3681			return err;
3682	} else {
3683		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
3684					  add);
3685	}
3686
3687	return 0;
3688}
3689EXPORT_SYMBOL(tc_setup_cb_reoffload);
3690
3691static int tcf_act_get_user_cookie(struct flow_action_entry *entry,
3692				   const struct tc_action *act)
3693{
3694	struct tc_cookie *user_cookie;
3695	int err = 0;
3696
3697	rcu_read_lock();
3698	user_cookie = rcu_dereference(act->user_cookie);
3699	if (user_cookie) {
3700		entry->user_cookie = flow_action_cookie_create(user_cookie->data,
3701							       user_cookie->len,
3702							       GFP_ATOMIC);
3703		if (!entry->user_cookie)
3704			err = -ENOMEM;
3705	}
3706	rcu_read_unlock();
3707	return err;
3708}
3709
3710static void tcf_act_put_user_cookie(struct flow_action_entry *entry)
3711{
3712	flow_action_cookie_destroy(entry->user_cookie);
3713}
3714
3715void tc_cleanup_offload_action(struct flow_action *flow_action)
3716{
3717	struct flow_action_entry *entry;
3718	int i;
3719
3720	flow_action_for_each(i, entry, flow_action) {
3721		tcf_act_put_user_cookie(entry);
3722		if (entry->destructor)
3723			entry->destructor(entry->destructor_priv);
3724	}
3725}
3726EXPORT_SYMBOL(tc_cleanup_offload_action);
3727
3728static int tc_setup_offload_act(struct tc_action *act,
3729				struct flow_action_entry *entry,
3730				u32 *index_inc,
3731				struct netlink_ext_ack *extack)
3732{
3733#ifdef CONFIG_NET_CLS_ACT
3734	if (act->ops->offload_act_setup) {
3735		return act->ops->offload_act_setup(act, entry, index_inc, true,
3736						   extack);
3737	} else {
3738		NL_SET_ERR_MSG(extack, "Action does not support offload");
3739		return -EOPNOTSUPP;
3740	}
3741#else
3742	return 0;
3743#endif
3744}
3745
3746int tc_setup_action(struct flow_action *flow_action,
3747		    struct tc_action *actions[],
3748		    u32 miss_cookie_base,
3749		    struct netlink_ext_ack *extack)
3750{
3751	int i, j, k, index, err = 0;
3752	struct tc_action *act;
3753
3754	BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
3755	BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
3756	BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
3757
3758	if (!actions)
3759		return 0;
3760
3761	j = 0;
3762	tcf_act_for_each_action(i, act, actions) {
3763		struct flow_action_entry *entry;
3764
3765		entry = &flow_action->entries[j];
3766		spin_lock_bh(&act->tcfa_lock);
3767		err = tcf_act_get_user_cookie(entry, act);
3768		if (err)
3769			goto err_out_locked;
3770
3771		index = 0;
3772		err = tc_setup_offload_act(act, entry, &index, extack);
3773		if (err)
3774			goto err_out_locked;
3775
3776		for (k = 0; k < index ; k++) {
3777			entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
3778			entry[k].hw_index = act->tcfa_index;
3779			entry[k].cookie = (unsigned long)act;
3780			entry[k].miss_cookie =
3781				tcf_exts_miss_cookie_get(miss_cookie_base, i);
3782		}
3783
3784		j += index;
3785
3786		spin_unlock_bh(&act->tcfa_lock);
3787	}
3788
3789err_out:
3790	if (err)
3791		tc_cleanup_offload_action(flow_action);
3792
3793	return err;
3794err_out_locked:
3795	spin_unlock_bh(&act->tcfa_lock);
3796	goto err_out;
3797}
3798
3799int tc_setup_offload_action(struct flow_action *flow_action,
3800			    const struct tcf_exts *exts,
3801			    struct netlink_ext_ack *extack)
3802{
3803#ifdef CONFIG_NET_CLS_ACT
3804	u32 miss_cookie_base;
3805
3806	if (!exts)
3807		return 0;
3808
3809	miss_cookie_base = exts->miss_cookie_node ?
3810			   exts->miss_cookie_node->miss_cookie_base : 0;
3811	return tc_setup_action(flow_action, exts->actions, miss_cookie_base,
3812			       extack);
3813#else
3814	return 0;
3815#endif
3816}
3817EXPORT_SYMBOL(tc_setup_offload_action);
3818
3819unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3820{
3821	unsigned int num_acts = 0;
3822	struct tc_action *act;
3823	int i;
3824
3825	tcf_exts_for_each_action(i, act, exts) {
3826		if (is_tcf_pedit(act))
3827			num_acts += tcf_pedit_nkeys(act);
3828		else
3829			num_acts++;
3830	}
3831	return num_acts;
3832}
3833EXPORT_SYMBOL(tcf_exts_num_actions);
3834
3835#ifdef CONFIG_NET_CLS_ACT
3836static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
3837					u32 *p_block_index,
3838					struct netlink_ext_ack *extack)
3839{
3840	*p_block_index = nla_get_u32(block_index_attr);
3841	if (!*p_block_index) {
3842		NL_SET_ERR_MSG(extack, "Block number may not be zero");
3843		return -EINVAL;
3844	}
3845
3846	return 0;
3847}
3848
3849int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
3850		    enum flow_block_binder_type binder_type,
3851		    struct nlattr *block_index_attr,
3852		    struct netlink_ext_ack *extack)
3853{
3854	u32 block_index;
3855	int err;
3856
3857	if (!block_index_attr)
3858		return 0;
3859
3860	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3861	if (err)
3862		return err;
3863
3864	qe->info.binder_type = binder_type;
3865	qe->info.chain_head_change = tcf_chain_head_change_dflt;
3866	qe->info.chain_head_change_priv = &qe->filter_chain;
3867	qe->info.block_index = block_index;
3868
3869	return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
3870}
3871EXPORT_SYMBOL(tcf_qevent_init);
3872
3873void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
3874{
3875	if (qe->info.block_index)
3876		tcf_block_put_ext(qe->block, sch, &qe->info);
3877}
3878EXPORT_SYMBOL(tcf_qevent_destroy);
3879
3880int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
3881			       struct netlink_ext_ack *extack)
3882{
3883	u32 block_index;
3884	int err;
3885
3886	if (!block_index_attr)
3887		return 0;
3888
3889	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3890	if (err)
3891		return err;
3892
3893	/* Bounce newly-configured block or change in block. */
3894	if (block_index != qe->info.block_index) {
3895		NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
3896		return -EINVAL;
3897	}
3898
3899	return 0;
3900}
3901EXPORT_SYMBOL(tcf_qevent_validate_change);
3902
3903struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
3904				  struct sk_buff **to_free, int *ret)
3905{
3906	struct tcf_result cl_res;
3907	struct tcf_proto *fl;
3908
3909	if (!qe->info.block_index)
3910		return skb;
3911
3912	fl = rcu_dereference_bh(qe->filter_chain);
3913
3914	switch (tcf_classify(skb, NULL, fl, &cl_res, false)) {
3915	case TC_ACT_SHOT:
3916		qdisc_qstats_drop(sch);
3917		__qdisc_drop(skb, to_free);
3918		*ret = __NET_XMIT_BYPASS;
3919		return NULL;
3920	case TC_ACT_STOLEN:
3921	case TC_ACT_QUEUED:
3922	case TC_ACT_TRAP:
3923		__qdisc_drop(skb, to_free);
3924		*ret = __NET_XMIT_STOLEN;
3925		return NULL;
3926	case TC_ACT_REDIRECT:
3927		skb_do_redirect(skb);
3928		*ret = __NET_XMIT_STOLEN;
3929		return NULL;
3930	}
3931
3932	return skb;
3933}
3934EXPORT_SYMBOL(tcf_qevent_handle);
3935
3936int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
3937{
3938	if (!qe->info.block_index)
3939		return 0;
3940	return nla_put_u32(skb, attr_name, qe->info.block_index);
3941}
3942EXPORT_SYMBOL(tcf_qevent_dump);
3943#endif
3944
3945static __net_init int tcf_net_init(struct net *net)
3946{
3947	struct tcf_net *tn = net_generic(net, tcf_net_id);
3948
3949	spin_lock_init(&tn->idr_lock);
3950	idr_init(&tn->idr);
3951	return 0;
3952}
3953
3954static void __net_exit tcf_net_exit(struct net *net)
3955{
3956	struct tcf_net *tn = net_generic(net, tcf_net_id);
3957
3958	idr_destroy(&tn->idr);
3959}
3960
3961static struct pernet_operations tcf_net_ops = {
3962	.init = tcf_net_init,
3963	.exit = tcf_net_exit,
3964	.id   = &tcf_net_id,
3965	.size = sizeof(struct tcf_net),
3966};
3967
3968static int __init tc_filter_init(void)
3969{
3970	int err;
3971
3972	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
3973	if (!tc_filter_wq)
3974		return -ENOMEM;
3975
3976	err = register_pernet_subsys(&tcf_net_ops);
3977	if (err)
3978		goto err_register_pernet_subsys;
3979
3980	xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1);
3981
3982	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3983		      RTNL_FLAG_DOIT_UNLOCKED);
3984	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3985		      RTNL_FLAG_DOIT_UNLOCKED);
3986	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3987		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3988	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
3989	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
3990	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
3991		      tc_dump_chain, 0);
3992
3993	return 0;
3994
3995err_register_pernet_subsys:
3996	destroy_workqueue(tc_filter_wq);
3997	return err;
3998}
3999
4000subsys_initcall(tc_filter_init);
4001