xref: /kernel/linux/linux-5.10/net/sched/cls_api.c (revision 8c2ecf20)
1// SPDX-License-Identifier: GPL-2.0-or-later
2/*
3 * net/sched/cls_api.c	Packet classifier API.
4 *
5 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
6 *
7 * Changes:
8 *
9 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
10 */
11
12#include <linux/module.h>
13#include <linux/types.h>
14#include <linux/kernel.h>
15#include <linux/string.h>
16#include <linux/errno.h>
17#include <linux/err.h>
18#include <linux/skbuff.h>
19#include <linux/init.h>
20#include <linux/kmod.h>
21#include <linux/slab.h>
22#include <linux/idr.h>
23#include <linux/jhash.h>
24#include <linux/rculist.h>
25#include <net/net_namespace.h>
26#include <net/sock.h>
27#include <net/netlink.h>
28#include <net/pkt_sched.h>
29#include <net/pkt_cls.h>
30#include <net/tc_act/tc_pedit.h>
31#include <net/tc_act/tc_mirred.h>
32#include <net/tc_act/tc_vlan.h>
33#include <net/tc_act/tc_tunnel_key.h>
34#include <net/tc_act/tc_csum.h>
35#include <net/tc_act/tc_gact.h>
36#include <net/tc_act/tc_police.h>
37#include <net/tc_act/tc_sample.h>
38#include <net/tc_act/tc_skbedit.h>
39#include <net/tc_act/tc_ct.h>
40#include <net/tc_act/tc_mpls.h>
41#include <net/tc_act/tc_gate.h>
42#include <net/flow_offload.h>
43
44/* The list of all installed classifier types */
45static LIST_HEAD(tcf_proto_base);
46
47/* Protects list of registered TC modules. It is pure SMP lock. */
48static DEFINE_RWLOCK(cls_mod_lock);
49
50static u32 destroy_obj_hashfn(const struct tcf_proto *tp)
51{
52	return jhash_3words(tp->chain->index, tp->prio,
53			    (__force __u32)tp->protocol, 0);
54}
55
56static void tcf_proto_signal_destroying(struct tcf_chain *chain,
57					struct tcf_proto *tp)
58{
59	struct tcf_block *block = chain->block;
60
61	mutex_lock(&block->proto_destroy_lock);
62	hash_add_rcu(block->proto_destroy_ht, &tp->destroy_ht_node,
63		     destroy_obj_hashfn(tp));
64	mutex_unlock(&block->proto_destroy_lock);
65}
66
67static bool tcf_proto_cmp(const struct tcf_proto *tp1,
68			  const struct tcf_proto *tp2)
69{
70	return tp1->chain->index == tp2->chain->index &&
71	       tp1->prio == tp2->prio &&
72	       tp1->protocol == tp2->protocol;
73}
74
75static bool tcf_proto_exists_destroying(struct tcf_chain *chain,
76					struct tcf_proto *tp)
77{
78	u32 hash = destroy_obj_hashfn(tp);
79	struct tcf_proto *iter;
80	bool found = false;
81
82	rcu_read_lock();
83	hash_for_each_possible_rcu(chain->block->proto_destroy_ht, iter,
84				   destroy_ht_node, hash) {
85		if (tcf_proto_cmp(tp, iter)) {
86			found = true;
87			break;
88		}
89	}
90	rcu_read_unlock();
91
92	return found;
93}
94
95static void
96tcf_proto_signal_destroyed(struct tcf_chain *chain, struct tcf_proto *tp)
97{
98	struct tcf_block *block = chain->block;
99
100	mutex_lock(&block->proto_destroy_lock);
101	if (hash_hashed(&tp->destroy_ht_node))
102		hash_del_rcu(&tp->destroy_ht_node);
103	mutex_unlock(&block->proto_destroy_lock);
104}
105
106/* Find classifier type by string name */
107
108static const struct tcf_proto_ops *__tcf_proto_lookup_ops(const char *kind)
109{
110	const struct tcf_proto_ops *t, *res = NULL;
111
112	if (kind) {
113		read_lock(&cls_mod_lock);
114		list_for_each_entry(t, &tcf_proto_base, head) {
115			if (strcmp(kind, t->kind) == 0) {
116				if (try_module_get(t->owner))
117					res = t;
118				break;
119			}
120		}
121		read_unlock(&cls_mod_lock);
122	}
123	return res;
124}
125
126static const struct tcf_proto_ops *
127tcf_proto_lookup_ops(const char *kind, bool rtnl_held,
128		     struct netlink_ext_ack *extack)
129{
130	const struct tcf_proto_ops *ops;
131
132	ops = __tcf_proto_lookup_ops(kind);
133	if (ops)
134		return ops;
135#ifdef CONFIG_MODULES
136	if (rtnl_held)
137		rtnl_unlock();
138	request_module("cls_%s", kind);
139	if (rtnl_held)
140		rtnl_lock();
141	ops = __tcf_proto_lookup_ops(kind);
142	/* We dropped the RTNL semaphore in order to perform
143	 * the module load. So, even if we succeeded in loading
144	 * the module we have to replay the request. We indicate
145	 * this using -EAGAIN.
146	 */
147	if (ops) {
148		module_put(ops->owner);
149		return ERR_PTR(-EAGAIN);
150	}
151#endif
152	NL_SET_ERR_MSG(extack, "TC classifier not found");
153	return ERR_PTR(-ENOENT);
154}
155
156/* Register(unregister) new classifier type */
157
158int register_tcf_proto_ops(struct tcf_proto_ops *ops)
159{
160	struct tcf_proto_ops *t;
161	int rc = -EEXIST;
162
163	write_lock(&cls_mod_lock);
164	list_for_each_entry(t, &tcf_proto_base, head)
165		if (!strcmp(ops->kind, t->kind))
166			goto out;
167
168	list_add_tail(&ops->head, &tcf_proto_base);
169	rc = 0;
170out:
171	write_unlock(&cls_mod_lock);
172	return rc;
173}
174EXPORT_SYMBOL(register_tcf_proto_ops);
175
176static struct workqueue_struct *tc_filter_wq;
177
178int unregister_tcf_proto_ops(struct tcf_proto_ops *ops)
179{
180	struct tcf_proto_ops *t;
181	int rc = -ENOENT;
182
183	/* Wait for outstanding call_rcu()s, if any, from a
184	 * tcf_proto_ops's destroy() handler.
185	 */
186	rcu_barrier();
187	flush_workqueue(tc_filter_wq);
188
189	write_lock(&cls_mod_lock);
190	list_for_each_entry(t, &tcf_proto_base, head) {
191		if (t == ops) {
192			list_del(&t->head);
193			rc = 0;
194			break;
195		}
196	}
197	write_unlock(&cls_mod_lock);
198	return rc;
199}
200EXPORT_SYMBOL(unregister_tcf_proto_ops);
201
202bool tcf_queue_work(struct rcu_work *rwork, work_func_t func)
203{
204	INIT_RCU_WORK(rwork, func);
205	return queue_rcu_work(tc_filter_wq, rwork);
206}
207EXPORT_SYMBOL(tcf_queue_work);
208
209/* Select new prio value from the range, managed by kernel. */
210
211static inline u32 tcf_auto_prio(struct tcf_proto *tp)
212{
213	u32 first = TC_H_MAKE(0xC0000000U, 0U);
214
215	if (tp)
216		first = tp->prio - 1;
217
218	return TC_H_MAJ(first);
219}
220
221static bool tcf_proto_check_kind(struct nlattr *kind, char *name)
222{
223	if (kind)
224		return nla_strlcpy(name, kind, IFNAMSIZ) >= IFNAMSIZ;
225	memset(name, 0, IFNAMSIZ);
226	return false;
227}
228
229static bool tcf_proto_is_unlocked(const char *kind)
230{
231	const struct tcf_proto_ops *ops;
232	bool ret;
233
234	if (strlen(kind) == 0)
235		return false;
236
237	ops = tcf_proto_lookup_ops(kind, false, NULL);
238	/* On error return false to take rtnl lock. Proto lookup/create
239	 * functions will perform lookup again and properly handle errors.
240	 */
241	if (IS_ERR(ops))
242		return false;
243
244	ret = !!(ops->flags & TCF_PROTO_OPS_DOIT_UNLOCKED);
245	module_put(ops->owner);
246	return ret;
247}
248
249static struct tcf_proto *tcf_proto_create(const char *kind, u32 protocol,
250					  u32 prio, struct tcf_chain *chain,
251					  bool rtnl_held,
252					  struct netlink_ext_ack *extack)
253{
254	struct tcf_proto *tp;
255	int err;
256
257	tp = kzalloc(sizeof(*tp), GFP_KERNEL);
258	if (!tp)
259		return ERR_PTR(-ENOBUFS);
260
261	tp->ops = tcf_proto_lookup_ops(kind, rtnl_held, extack);
262	if (IS_ERR(tp->ops)) {
263		err = PTR_ERR(tp->ops);
264		goto errout;
265	}
266	tp->classify = tp->ops->classify;
267	tp->protocol = protocol;
268	tp->prio = prio;
269	tp->chain = chain;
270	spin_lock_init(&tp->lock);
271	refcount_set(&tp->refcnt, 1);
272
273	err = tp->ops->init(tp);
274	if (err) {
275		module_put(tp->ops->owner);
276		goto errout;
277	}
278	return tp;
279
280errout:
281	kfree(tp);
282	return ERR_PTR(err);
283}
284
285static void tcf_proto_get(struct tcf_proto *tp)
286{
287	refcount_inc(&tp->refcnt);
288}
289
290static void tcf_chain_put(struct tcf_chain *chain);
291
292static void tcf_proto_destroy(struct tcf_proto *tp, bool rtnl_held,
293			      bool sig_destroy, struct netlink_ext_ack *extack)
294{
295	tp->ops->destroy(tp, rtnl_held, extack);
296	if (sig_destroy)
297		tcf_proto_signal_destroyed(tp->chain, tp);
298	tcf_chain_put(tp->chain);
299	module_put(tp->ops->owner);
300	kfree_rcu(tp, rcu);
301}
302
303static void tcf_proto_put(struct tcf_proto *tp, bool rtnl_held,
304			  struct netlink_ext_ack *extack)
305{
306	if (refcount_dec_and_test(&tp->refcnt))
307		tcf_proto_destroy(tp, rtnl_held, true, extack);
308}
309
310static bool tcf_proto_check_delete(struct tcf_proto *tp)
311{
312	if (tp->ops->delete_empty)
313		return tp->ops->delete_empty(tp);
314
315	tp->deleting = true;
316	return tp->deleting;
317}
318
319static void tcf_proto_mark_delete(struct tcf_proto *tp)
320{
321	spin_lock(&tp->lock);
322	tp->deleting = true;
323	spin_unlock(&tp->lock);
324}
325
326static bool tcf_proto_is_deleting(struct tcf_proto *tp)
327{
328	bool deleting;
329
330	spin_lock(&tp->lock);
331	deleting = tp->deleting;
332	spin_unlock(&tp->lock);
333
334	return deleting;
335}
336
337#define ASSERT_BLOCK_LOCKED(block)					\
338	lockdep_assert_held(&(block)->lock)
339
340struct tcf_filter_chain_list_item {
341	struct list_head list;
342	tcf_chain_head_change_t *chain_head_change;
343	void *chain_head_change_priv;
344};
345
346static struct tcf_chain *tcf_chain_create(struct tcf_block *block,
347					  u32 chain_index)
348{
349	struct tcf_chain *chain;
350
351	ASSERT_BLOCK_LOCKED(block);
352
353	chain = kzalloc(sizeof(*chain), GFP_KERNEL);
354	if (!chain)
355		return NULL;
356	list_add_tail_rcu(&chain->list, &block->chain_list);
357	mutex_init(&chain->filter_chain_lock);
358	chain->block = block;
359	chain->index = chain_index;
360	chain->refcnt = 1;
361	if (!chain->index)
362		block->chain0.chain = chain;
363	return chain;
364}
365
366static void tcf_chain_head_change_item(struct tcf_filter_chain_list_item *item,
367				       struct tcf_proto *tp_head)
368{
369	if (item->chain_head_change)
370		item->chain_head_change(tp_head, item->chain_head_change_priv);
371}
372
373static void tcf_chain0_head_change(struct tcf_chain *chain,
374				   struct tcf_proto *tp_head)
375{
376	struct tcf_filter_chain_list_item *item;
377	struct tcf_block *block = chain->block;
378
379	if (chain->index)
380		return;
381
382	mutex_lock(&block->lock);
383	list_for_each_entry(item, &block->chain0.filter_chain_list, list)
384		tcf_chain_head_change_item(item, tp_head);
385	mutex_unlock(&block->lock);
386}
387
388/* Returns true if block can be safely freed. */
389
390static bool tcf_chain_detach(struct tcf_chain *chain)
391{
392	struct tcf_block *block = chain->block;
393
394	ASSERT_BLOCK_LOCKED(block);
395
396	list_del_rcu(&chain->list);
397	if (!chain->index)
398		block->chain0.chain = NULL;
399
400	if (list_empty(&block->chain_list) &&
401	    refcount_read(&block->refcnt) == 0)
402		return true;
403
404	return false;
405}
406
407static void tcf_block_destroy(struct tcf_block *block)
408{
409	mutex_destroy(&block->lock);
410	mutex_destroy(&block->proto_destroy_lock);
411	kfree_rcu(block, rcu);
412}
413
414static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
415{
416	struct tcf_block *block = chain->block;
417
418	mutex_destroy(&chain->filter_chain_lock);
419	kfree_rcu(chain, rcu);
420	if (free_block)
421		tcf_block_destroy(block);
422}
423
424static void tcf_chain_hold(struct tcf_chain *chain)
425{
426	ASSERT_BLOCK_LOCKED(chain->block);
427
428	++chain->refcnt;
429}
430
431static bool tcf_chain_held_by_acts_only(struct tcf_chain *chain)
432{
433	ASSERT_BLOCK_LOCKED(chain->block);
434
435	/* In case all the references are action references, this
436	 * chain should not be shown to the user.
437	 */
438	return chain->refcnt == chain->action_refcnt;
439}
440
441static struct tcf_chain *tcf_chain_lookup(struct tcf_block *block,
442					  u32 chain_index)
443{
444	struct tcf_chain *chain;
445
446	ASSERT_BLOCK_LOCKED(block);
447
448	list_for_each_entry(chain, &block->chain_list, list) {
449		if (chain->index == chain_index)
450			return chain;
451	}
452	return NULL;
453}
454
455#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
456static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
457					      u32 chain_index)
458{
459	struct tcf_chain *chain;
460
461	list_for_each_entry_rcu(chain, &block->chain_list, list) {
462		if (chain->index == chain_index)
463			return chain;
464	}
465	return NULL;
466}
467#endif
468
469static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
470			   u32 seq, u16 flags, int event, bool unicast);
471
472static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
473					 u32 chain_index, bool create,
474					 bool by_act)
475{
476	struct tcf_chain *chain = NULL;
477	bool is_first_reference;
478
479	mutex_lock(&block->lock);
480	chain = tcf_chain_lookup(block, chain_index);
481	if (chain) {
482		tcf_chain_hold(chain);
483	} else {
484		if (!create)
485			goto errout;
486		chain = tcf_chain_create(block, chain_index);
487		if (!chain)
488			goto errout;
489	}
490
491	if (by_act)
492		++chain->action_refcnt;
493	is_first_reference = chain->refcnt - chain->action_refcnt == 1;
494	mutex_unlock(&block->lock);
495
496	/* Send notification only in case we got the first
497	 * non-action reference. Until then, the chain acts only as
498	 * a placeholder for actions pointing to it and user ought
499	 * not know about them.
500	 */
501	if (is_first_reference && !by_act)
502		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
503				RTM_NEWCHAIN, false);
504
505	return chain;
506
507errout:
508	mutex_unlock(&block->lock);
509	return chain;
510}
511
512static struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
513				       bool create)
514{
515	return __tcf_chain_get(block, chain_index, create, false);
516}
517
518struct tcf_chain *tcf_chain_get_by_act(struct tcf_block *block, u32 chain_index)
519{
520	return __tcf_chain_get(block, chain_index, true, true);
521}
522EXPORT_SYMBOL(tcf_chain_get_by_act);
523
524static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
525			       void *tmplt_priv);
526static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
527				  void *tmplt_priv, u32 chain_index,
528				  struct tcf_block *block, struct sk_buff *oskb,
529				  u32 seq, u16 flags, bool unicast);
530
531static void __tcf_chain_put(struct tcf_chain *chain, bool by_act,
532			    bool explicitly_created)
533{
534	struct tcf_block *block = chain->block;
535	const struct tcf_proto_ops *tmplt_ops;
536	unsigned int refcnt, non_act_refcnt;
537	bool free_block = false;
538	void *tmplt_priv;
539
540	mutex_lock(&block->lock);
541	if (explicitly_created) {
542		if (!chain->explicitly_created) {
543			mutex_unlock(&block->lock);
544			return;
545		}
546		chain->explicitly_created = false;
547	}
548
549	if (by_act)
550		chain->action_refcnt--;
551
552	/* tc_chain_notify_delete can't be called while holding block lock.
553	 * However, when block is unlocked chain can be changed concurrently, so
554	 * save these to temporary variables.
555	 */
556	refcnt = --chain->refcnt;
557	non_act_refcnt = refcnt - chain->action_refcnt;
558	tmplt_ops = chain->tmplt_ops;
559	tmplt_priv = chain->tmplt_priv;
560
561	if (non_act_refcnt == chain->explicitly_created && !by_act) {
562		if (non_act_refcnt == 0)
563			tc_chain_notify_delete(tmplt_ops, tmplt_priv,
564					       chain->index, block, NULL, 0, 0,
565					       false);
566		/* Last reference to chain, no need to lock. */
567		chain->flushing = false;
568	}
569
570	if (refcnt == 0)
571		free_block = tcf_chain_detach(chain);
572	mutex_unlock(&block->lock);
573
574	if (refcnt == 0) {
575		tc_chain_tmplt_del(tmplt_ops, tmplt_priv);
576		tcf_chain_destroy(chain, free_block);
577	}
578}
579
580static void tcf_chain_put(struct tcf_chain *chain)
581{
582	__tcf_chain_put(chain, false, false);
583}
584
585void tcf_chain_put_by_act(struct tcf_chain *chain)
586{
587	__tcf_chain_put(chain, true, false);
588}
589EXPORT_SYMBOL(tcf_chain_put_by_act);
590
591static void tcf_chain_put_explicitly_created(struct tcf_chain *chain)
592{
593	__tcf_chain_put(chain, false, true);
594}
595
596static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
597{
598	struct tcf_proto *tp, *tp_next;
599
600	mutex_lock(&chain->filter_chain_lock);
601	tp = tcf_chain_dereference(chain->filter_chain, chain);
602	while (tp) {
603		tp_next = rcu_dereference_protected(tp->next, 1);
604		tcf_proto_signal_destroying(chain, tp);
605		tp = tp_next;
606	}
607	tp = tcf_chain_dereference(chain->filter_chain, chain);
608	RCU_INIT_POINTER(chain->filter_chain, NULL);
609	tcf_chain0_head_change(chain, NULL);
610	chain->flushing = true;
611	mutex_unlock(&chain->filter_chain_lock);
612
613	while (tp) {
614		tp_next = rcu_dereference_protected(tp->next, 1);
615		tcf_proto_put(tp, rtnl_held, NULL);
616		tp = tp_next;
617	}
618}
619
620static int tcf_block_setup(struct tcf_block *block,
621			   struct flow_block_offload *bo);
622
623static void tcf_block_offload_init(struct flow_block_offload *bo,
624				   struct net_device *dev, struct Qdisc *sch,
625				   enum flow_block_command command,
626				   enum flow_block_binder_type binder_type,
627				   struct flow_block *flow_block,
628				   bool shared, struct netlink_ext_ack *extack)
629{
630	bo->net = dev_net(dev);
631	bo->command = command;
632	bo->binder_type = binder_type;
633	bo->block = flow_block;
634	bo->block_shared = shared;
635	bo->extack = extack;
636	bo->sch = sch;
637	bo->cb_list_head = &flow_block->cb_list;
638	INIT_LIST_HEAD(&bo->cb_list);
639}
640
641static void tcf_block_unbind(struct tcf_block *block,
642			     struct flow_block_offload *bo);
643
644static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
645{
646	struct tcf_block *block = block_cb->indr.data;
647	struct net_device *dev = block_cb->indr.dev;
648	struct Qdisc *sch = block_cb->indr.sch;
649	struct netlink_ext_ack extack = {};
650	struct flow_block_offload bo = {};
651
652	tcf_block_offload_init(&bo, dev, sch, FLOW_BLOCK_UNBIND,
653			       block_cb->indr.binder_type,
654			       &block->flow_block, tcf_block_shared(block),
655			       &extack);
656	rtnl_lock();
657	down_write(&block->cb_lock);
658	list_del(&block_cb->driver_list);
659	list_move(&block_cb->list, &bo.cb_list);
660	tcf_block_unbind(block, &bo);
661	up_write(&block->cb_lock);
662	rtnl_unlock();
663}
664
665static bool tcf_block_offload_in_use(struct tcf_block *block)
666{
667	return atomic_read(&block->offloadcnt);
668}
669
670static int tcf_block_offload_cmd(struct tcf_block *block,
671				 struct net_device *dev, struct Qdisc *sch,
672				 struct tcf_block_ext_info *ei,
673				 enum flow_block_command command,
674				 struct netlink_ext_ack *extack)
675{
676	struct flow_block_offload bo = {};
677
678	tcf_block_offload_init(&bo, dev, sch, command, ei->binder_type,
679			       &block->flow_block, tcf_block_shared(block),
680			       extack);
681
682	if (dev->netdev_ops->ndo_setup_tc) {
683		int err;
684
685		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
686		if (err < 0) {
687			if (err != -EOPNOTSUPP)
688				NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
689			return err;
690		}
691
692		return tcf_block_setup(block, &bo);
693	}
694
695	flow_indr_dev_setup_offload(dev, sch, TC_SETUP_BLOCK, block, &bo,
696				    tc_block_indr_cleanup);
697	tcf_block_setup(block, &bo);
698
699	return -EOPNOTSUPP;
700}
701
702static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
703				  struct tcf_block_ext_info *ei,
704				  struct netlink_ext_ack *extack)
705{
706	struct net_device *dev = q->dev_queue->dev;
707	int err;
708
709	down_write(&block->cb_lock);
710
711	/* If tc offload feature is disabled and the block we try to bind
712	 * to already has some offloaded filters, forbid to bind.
713	 */
714	if (dev->netdev_ops->ndo_setup_tc &&
715	    !tc_can_offload(dev) &&
716	    tcf_block_offload_in_use(block)) {
717		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
718		err = -EOPNOTSUPP;
719		goto err_unlock;
720	}
721
722	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_BIND, extack);
723	if (err == -EOPNOTSUPP)
724		goto no_offload_dev_inc;
725	if (err)
726		goto err_unlock;
727
728	up_write(&block->cb_lock);
729	return 0;
730
731no_offload_dev_inc:
732	if (tcf_block_offload_in_use(block))
733		goto err_unlock;
734
735	err = 0;
736	block->nooffloaddevcnt++;
737err_unlock:
738	up_write(&block->cb_lock);
739	return err;
740}
741
742static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
743				     struct tcf_block_ext_info *ei)
744{
745	struct net_device *dev = q->dev_queue->dev;
746	int err;
747
748	down_write(&block->cb_lock);
749	err = tcf_block_offload_cmd(block, dev, q, ei, FLOW_BLOCK_UNBIND, NULL);
750	if (err == -EOPNOTSUPP)
751		goto no_offload_dev_dec;
752	up_write(&block->cb_lock);
753	return;
754
755no_offload_dev_dec:
756	WARN_ON(block->nooffloaddevcnt-- == 0);
757	up_write(&block->cb_lock);
758}
759
760static int
761tcf_chain0_head_change_cb_add(struct tcf_block *block,
762			      struct tcf_block_ext_info *ei,
763			      struct netlink_ext_ack *extack)
764{
765	struct tcf_filter_chain_list_item *item;
766	struct tcf_chain *chain0;
767
768	item = kmalloc(sizeof(*item), GFP_KERNEL);
769	if (!item) {
770		NL_SET_ERR_MSG(extack, "Memory allocation for head change callback item failed");
771		return -ENOMEM;
772	}
773	item->chain_head_change = ei->chain_head_change;
774	item->chain_head_change_priv = ei->chain_head_change_priv;
775
776	mutex_lock(&block->lock);
777	chain0 = block->chain0.chain;
778	if (chain0)
779		tcf_chain_hold(chain0);
780	else
781		list_add(&item->list, &block->chain0.filter_chain_list);
782	mutex_unlock(&block->lock);
783
784	if (chain0) {
785		struct tcf_proto *tp_head;
786
787		mutex_lock(&chain0->filter_chain_lock);
788
789		tp_head = tcf_chain_dereference(chain0->filter_chain, chain0);
790		if (tp_head)
791			tcf_chain_head_change_item(item, tp_head);
792
793		mutex_lock(&block->lock);
794		list_add(&item->list, &block->chain0.filter_chain_list);
795		mutex_unlock(&block->lock);
796
797		mutex_unlock(&chain0->filter_chain_lock);
798		tcf_chain_put(chain0);
799	}
800
801	return 0;
802}
803
804static void
805tcf_chain0_head_change_cb_del(struct tcf_block *block,
806			      struct tcf_block_ext_info *ei)
807{
808	struct tcf_filter_chain_list_item *item;
809
810	mutex_lock(&block->lock);
811	list_for_each_entry(item, &block->chain0.filter_chain_list, list) {
812		if ((!ei->chain_head_change && !ei->chain_head_change_priv) ||
813		    (item->chain_head_change == ei->chain_head_change &&
814		     item->chain_head_change_priv == ei->chain_head_change_priv)) {
815			if (block->chain0.chain)
816				tcf_chain_head_change_item(item, NULL);
817			list_del(&item->list);
818			mutex_unlock(&block->lock);
819
820			kfree(item);
821			return;
822		}
823	}
824	mutex_unlock(&block->lock);
825	WARN_ON(1);
826}
827
828struct tcf_net {
829	spinlock_t idr_lock; /* Protects idr */
830	struct idr idr;
831};
832
833static unsigned int tcf_net_id;
834
835static int tcf_block_insert(struct tcf_block *block, struct net *net,
836			    struct netlink_ext_ack *extack)
837{
838	struct tcf_net *tn = net_generic(net, tcf_net_id);
839	int err;
840
841	idr_preload(GFP_KERNEL);
842	spin_lock(&tn->idr_lock);
843	err = idr_alloc_u32(&tn->idr, block, &block->index, block->index,
844			    GFP_NOWAIT);
845	spin_unlock(&tn->idr_lock);
846	idr_preload_end();
847
848	return err;
849}
850
851static void tcf_block_remove(struct tcf_block *block, struct net *net)
852{
853	struct tcf_net *tn = net_generic(net, tcf_net_id);
854
855	spin_lock(&tn->idr_lock);
856	idr_remove(&tn->idr, block->index);
857	spin_unlock(&tn->idr_lock);
858}
859
860static struct tcf_block *tcf_block_create(struct net *net, struct Qdisc *q,
861					  u32 block_index,
862					  struct netlink_ext_ack *extack)
863{
864	struct tcf_block *block;
865
866	block = kzalloc(sizeof(*block), GFP_KERNEL);
867	if (!block) {
868		NL_SET_ERR_MSG(extack, "Memory allocation for block failed");
869		return ERR_PTR(-ENOMEM);
870	}
871	mutex_init(&block->lock);
872	mutex_init(&block->proto_destroy_lock);
873	init_rwsem(&block->cb_lock);
874	flow_block_init(&block->flow_block);
875	INIT_LIST_HEAD(&block->chain_list);
876	INIT_LIST_HEAD(&block->owner_list);
877	INIT_LIST_HEAD(&block->chain0.filter_chain_list);
878
879	refcount_set(&block->refcnt, 1);
880	block->net = net;
881	block->index = block_index;
882
883	/* Don't store q pointer for blocks which are shared */
884	if (!tcf_block_shared(block))
885		block->q = q;
886	return block;
887}
888
889static struct tcf_block *tcf_block_lookup(struct net *net, u32 block_index)
890{
891	struct tcf_net *tn = net_generic(net, tcf_net_id);
892
893	return idr_find(&tn->idr, block_index);
894}
895
896static struct tcf_block *tcf_block_refcnt_get(struct net *net, u32 block_index)
897{
898	struct tcf_block *block;
899
900	rcu_read_lock();
901	block = tcf_block_lookup(net, block_index);
902	if (block && !refcount_inc_not_zero(&block->refcnt))
903		block = NULL;
904	rcu_read_unlock();
905
906	return block;
907}
908
909static struct tcf_chain *
910__tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
911{
912	mutex_lock(&block->lock);
913	if (chain)
914		chain = list_is_last(&chain->list, &block->chain_list) ?
915			NULL : list_next_entry(chain, list);
916	else
917		chain = list_first_entry_or_null(&block->chain_list,
918						 struct tcf_chain, list);
919
920	/* skip all action-only chains */
921	while (chain && tcf_chain_held_by_acts_only(chain))
922		chain = list_is_last(&chain->list, &block->chain_list) ?
923			NULL : list_next_entry(chain, list);
924
925	if (chain)
926		tcf_chain_hold(chain);
927	mutex_unlock(&block->lock);
928
929	return chain;
930}
931
932/* Function to be used by all clients that want to iterate over all chains on
933 * block. It properly obtains block->lock and takes reference to chain before
934 * returning it. Users of this function must be tolerant to concurrent chain
935 * insertion/deletion or ensure that no concurrent chain modification is
936 * possible. Note that all netlink dump callbacks cannot guarantee to provide
937 * consistent dump because rtnl lock is released each time skb is filled with
938 * data and sent to user-space.
939 */
940
941struct tcf_chain *
942tcf_get_next_chain(struct tcf_block *block, struct tcf_chain *chain)
943{
944	struct tcf_chain *chain_next = __tcf_get_next_chain(block, chain);
945
946	if (chain)
947		tcf_chain_put(chain);
948
949	return chain_next;
950}
951EXPORT_SYMBOL(tcf_get_next_chain);
952
953static struct tcf_proto *
954__tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp)
955{
956	u32 prio = 0;
957
958	ASSERT_RTNL();
959	mutex_lock(&chain->filter_chain_lock);
960
961	if (!tp) {
962		tp = tcf_chain_dereference(chain->filter_chain, chain);
963	} else if (tcf_proto_is_deleting(tp)) {
964		/* 'deleting' flag is set and chain->filter_chain_lock was
965		 * unlocked, which means next pointer could be invalid. Restart
966		 * search.
967		 */
968		prio = tp->prio + 1;
969		tp = tcf_chain_dereference(chain->filter_chain, chain);
970
971		for (; tp; tp = tcf_chain_dereference(tp->next, chain))
972			if (!tp->deleting && tp->prio >= prio)
973				break;
974	} else {
975		tp = tcf_chain_dereference(tp->next, chain);
976	}
977
978	if (tp)
979		tcf_proto_get(tp);
980
981	mutex_unlock(&chain->filter_chain_lock);
982
983	return tp;
984}
985
986/* Function to be used by all clients that want to iterate over all tp's on
987 * chain. Users of this function must be tolerant to concurrent tp
988 * insertion/deletion or ensure that no concurrent chain modification is
989 * possible. Note that all netlink dump callbacks cannot guarantee to provide
990 * consistent dump because rtnl lock is released each time skb is filled with
991 * data and sent to user-space.
992 */
993
994struct tcf_proto *
995tcf_get_next_proto(struct tcf_chain *chain, struct tcf_proto *tp,
996		   bool rtnl_held)
997{
998	struct tcf_proto *tp_next = __tcf_get_next_proto(chain, tp);
999
1000	if (tp)
1001		tcf_proto_put(tp, rtnl_held, NULL);
1002
1003	return tp_next;
1004}
1005EXPORT_SYMBOL(tcf_get_next_proto);
1006
1007static void tcf_block_flush_all_chains(struct tcf_block *block, bool rtnl_held)
1008{
1009	struct tcf_chain *chain;
1010
1011	/* Last reference to block. At this point chains cannot be added or
1012	 * removed concurrently.
1013	 */
1014	for (chain = tcf_get_next_chain(block, NULL);
1015	     chain;
1016	     chain = tcf_get_next_chain(block, chain)) {
1017		tcf_chain_put_explicitly_created(chain);
1018		tcf_chain_flush(chain, rtnl_held);
1019	}
1020}
1021
1022/* Lookup Qdisc and increments its reference counter.
1023 * Set parent, if necessary.
1024 */
1025
1026static int __tcf_qdisc_find(struct net *net, struct Qdisc **q,
1027			    u32 *parent, int ifindex, bool rtnl_held,
1028			    struct netlink_ext_ack *extack)
1029{
1030	const struct Qdisc_class_ops *cops;
1031	struct net_device *dev;
1032	int err = 0;
1033
1034	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1035		return 0;
1036
1037	rcu_read_lock();
1038
1039	/* Find link */
1040	dev = dev_get_by_index_rcu(net, ifindex);
1041	if (!dev) {
1042		rcu_read_unlock();
1043		return -ENODEV;
1044	}
1045
1046	/* Find qdisc */
1047	if (!*parent) {
1048		*q = rcu_dereference(dev->qdisc);
1049		*parent = (*q)->handle;
1050	} else {
1051		*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
1052		if (!*q) {
1053			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1054			err = -EINVAL;
1055			goto errout_rcu;
1056		}
1057	}
1058
1059	*q = qdisc_refcount_inc_nz(*q);
1060	if (!*q) {
1061		NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
1062		err = -EINVAL;
1063		goto errout_rcu;
1064	}
1065
1066	/* Is it classful? */
1067	cops = (*q)->ops->cl_ops;
1068	if (!cops) {
1069		NL_SET_ERR_MSG(extack, "Qdisc not classful");
1070		err = -EINVAL;
1071		goto errout_qdisc;
1072	}
1073
1074	if (!cops->tcf_block) {
1075		NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
1076		err = -EOPNOTSUPP;
1077		goto errout_qdisc;
1078	}
1079
1080errout_rcu:
1081	/* At this point we know that qdisc is not noop_qdisc,
1082	 * which means that qdisc holds a reference to net_device
1083	 * and we hold a reference to qdisc, so it is safe to release
1084	 * rcu read lock.
1085	 */
1086	rcu_read_unlock();
1087	return err;
1088
1089errout_qdisc:
1090	rcu_read_unlock();
1091
1092	if (rtnl_held)
1093		qdisc_put(*q);
1094	else
1095		qdisc_put_unlocked(*q);
1096	*q = NULL;
1097
1098	return err;
1099}
1100
1101static int __tcf_qdisc_cl_find(struct Qdisc *q, u32 parent, unsigned long *cl,
1102			       int ifindex, struct netlink_ext_ack *extack)
1103{
1104	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK)
1105		return 0;
1106
1107	/* Do we search for filter, attached to class? */
1108	if (TC_H_MIN(parent)) {
1109		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1110
1111		*cl = cops->find(q, parent);
1112		if (*cl == 0) {
1113			NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
1114			return -ENOENT;
1115		}
1116	}
1117
1118	return 0;
1119}
1120
1121static struct tcf_block *__tcf_block_find(struct net *net, struct Qdisc *q,
1122					  unsigned long cl, int ifindex,
1123					  u32 block_index,
1124					  struct netlink_ext_ack *extack)
1125{
1126	struct tcf_block *block;
1127
1128	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
1129		block = tcf_block_refcnt_get(net, block_index);
1130		if (!block) {
1131			NL_SET_ERR_MSG(extack, "Block of given index was not found");
1132			return ERR_PTR(-EINVAL);
1133		}
1134	} else {
1135		const struct Qdisc_class_ops *cops = q->ops->cl_ops;
1136
1137		block = cops->tcf_block(q, cl, extack);
1138		if (!block)
1139			return ERR_PTR(-EINVAL);
1140
1141		if (tcf_block_shared(block)) {
1142			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
1143			return ERR_PTR(-EOPNOTSUPP);
1144		}
1145
1146		/* Always take reference to block in order to support execution
1147		 * of rules update path of cls API without rtnl lock. Caller
1148		 * must release block when it is finished using it. 'if' block
1149		 * of this conditional obtain reference to block by calling
1150		 * tcf_block_refcnt_get().
1151		 */
1152		refcount_inc(&block->refcnt);
1153	}
1154
1155	return block;
1156}
1157
1158static void __tcf_block_put(struct tcf_block *block, struct Qdisc *q,
1159			    struct tcf_block_ext_info *ei, bool rtnl_held)
1160{
1161	if (refcount_dec_and_mutex_lock(&block->refcnt, &block->lock)) {
1162		/* Flushing/putting all chains will cause the block to be
1163		 * deallocated when last chain is freed. However, if chain_list
1164		 * is empty, block has to be manually deallocated. After block
1165		 * reference counter reached 0, it is no longer possible to
1166		 * increment it or add new chains to block.
1167		 */
1168		bool free_block = list_empty(&block->chain_list);
1169
1170		mutex_unlock(&block->lock);
1171		if (tcf_block_shared(block))
1172			tcf_block_remove(block, block->net);
1173
1174		if (q)
1175			tcf_block_offload_unbind(block, q, ei);
1176
1177		if (free_block)
1178			tcf_block_destroy(block);
1179		else
1180			tcf_block_flush_all_chains(block, rtnl_held);
1181	} else if (q) {
1182		tcf_block_offload_unbind(block, q, ei);
1183	}
1184}
1185
1186static void tcf_block_refcnt_put(struct tcf_block *block, bool rtnl_held)
1187{
1188	__tcf_block_put(block, NULL, NULL, rtnl_held);
1189}
1190
1191/* Find tcf block.
1192 * Set q, parent, cl when appropriate.
1193 */
1194
1195static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
1196					u32 *parent, unsigned long *cl,
1197					int ifindex, u32 block_index,
1198					struct netlink_ext_ack *extack)
1199{
1200	struct tcf_block *block;
1201	int err = 0;
1202
1203	ASSERT_RTNL();
1204
1205	err = __tcf_qdisc_find(net, q, parent, ifindex, true, extack);
1206	if (err)
1207		goto errout;
1208
1209	err = __tcf_qdisc_cl_find(*q, *parent, cl, ifindex, extack);
1210	if (err)
1211		goto errout_qdisc;
1212
1213	block = __tcf_block_find(net, *q, *cl, ifindex, block_index, extack);
1214	if (IS_ERR(block)) {
1215		err = PTR_ERR(block);
1216		goto errout_qdisc;
1217	}
1218
1219	return block;
1220
1221errout_qdisc:
1222	if (*q)
1223		qdisc_put(*q);
1224errout:
1225	*q = NULL;
1226	return ERR_PTR(err);
1227}
1228
1229static void tcf_block_release(struct Qdisc *q, struct tcf_block *block,
1230			      bool rtnl_held)
1231{
1232	if (!IS_ERR_OR_NULL(block))
1233		tcf_block_refcnt_put(block, rtnl_held);
1234
1235	if (q) {
1236		if (rtnl_held)
1237			qdisc_put(q);
1238		else
1239			qdisc_put_unlocked(q);
1240	}
1241}
1242
1243struct tcf_block_owner_item {
1244	struct list_head list;
1245	struct Qdisc *q;
1246	enum flow_block_binder_type binder_type;
1247};
1248
1249static void
1250tcf_block_owner_netif_keep_dst(struct tcf_block *block,
1251			       struct Qdisc *q,
1252			       enum flow_block_binder_type binder_type)
1253{
1254	if (block->keep_dst &&
1255	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS &&
1256	    binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
1257		netif_keep_dst(qdisc_dev(q));
1258}
1259
1260void tcf_block_netif_keep_dst(struct tcf_block *block)
1261{
1262	struct tcf_block_owner_item *item;
1263
1264	block->keep_dst = true;
1265	list_for_each_entry(item, &block->owner_list, list)
1266		tcf_block_owner_netif_keep_dst(block, item->q,
1267					       item->binder_type);
1268}
1269EXPORT_SYMBOL(tcf_block_netif_keep_dst);
1270
1271static int tcf_block_owner_add(struct tcf_block *block,
1272			       struct Qdisc *q,
1273			       enum flow_block_binder_type binder_type)
1274{
1275	struct tcf_block_owner_item *item;
1276
1277	item = kmalloc(sizeof(*item), GFP_KERNEL);
1278	if (!item)
1279		return -ENOMEM;
1280	item->q = q;
1281	item->binder_type = binder_type;
1282	list_add(&item->list, &block->owner_list);
1283	return 0;
1284}
1285
1286static void tcf_block_owner_del(struct tcf_block *block,
1287				struct Qdisc *q,
1288				enum flow_block_binder_type binder_type)
1289{
1290	struct tcf_block_owner_item *item;
1291
1292	list_for_each_entry(item, &block->owner_list, list) {
1293		if (item->q == q && item->binder_type == binder_type) {
1294			list_del(&item->list);
1295			kfree(item);
1296			return;
1297		}
1298	}
1299	WARN_ON(1);
1300}
1301
1302int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q,
1303		      struct tcf_block_ext_info *ei,
1304		      struct netlink_ext_ack *extack)
1305{
1306	struct net *net = qdisc_net(q);
1307	struct tcf_block *block = NULL;
1308	int err;
1309
1310	if (ei->block_index)
1311		/* block_index not 0 means the shared block is requested */
1312		block = tcf_block_refcnt_get(net, ei->block_index);
1313
1314	if (!block) {
1315		block = tcf_block_create(net, q, ei->block_index, extack);
1316		if (IS_ERR(block))
1317			return PTR_ERR(block);
1318		if (tcf_block_shared(block)) {
1319			err = tcf_block_insert(block, net, extack);
1320			if (err)
1321				goto err_block_insert;
1322		}
1323	}
1324
1325	err = tcf_block_owner_add(block, q, ei->binder_type);
1326	if (err)
1327		goto err_block_owner_add;
1328
1329	tcf_block_owner_netif_keep_dst(block, q, ei->binder_type);
1330
1331	err = tcf_chain0_head_change_cb_add(block, ei, extack);
1332	if (err)
1333		goto err_chain0_head_change_cb_add;
1334
1335	err = tcf_block_offload_bind(block, q, ei, extack);
1336	if (err)
1337		goto err_block_offload_bind;
1338
1339	*p_block = block;
1340	return 0;
1341
1342err_block_offload_bind:
1343	tcf_chain0_head_change_cb_del(block, ei);
1344err_chain0_head_change_cb_add:
1345	tcf_block_owner_del(block, q, ei->binder_type);
1346err_block_owner_add:
1347err_block_insert:
1348	tcf_block_refcnt_put(block, true);
1349	return err;
1350}
1351EXPORT_SYMBOL(tcf_block_get_ext);
1352
1353static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv)
1354{
1355	struct tcf_proto __rcu **p_filter_chain = priv;
1356
1357	rcu_assign_pointer(*p_filter_chain, tp_head);
1358}
1359
1360int tcf_block_get(struct tcf_block **p_block,
1361		  struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q,
1362		  struct netlink_ext_ack *extack)
1363{
1364	struct tcf_block_ext_info ei = {
1365		.chain_head_change = tcf_chain_head_change_dflt,
1366		.chain_head_change_priv = p_filter_chain,
1367	};
1368
1369	WARN_ON(!p_filter_chain);
1370	return tcf_block_get_ext(p_block, q, &ei, extack);
1371}
1372EXPORT_SYMBOL(tcf_block_get);
1373
1374/* XXX: Standalone actions are not allowed to jump to any chain, and bound
1375 * actions should be all removed after flushing.
1376 */
1377void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q,
1378		       struct tcf_block_ext_info *ei)
1379{
1380	if (!block)
1381		return;
1382	tcf_chain0_head_change_cb_del(block, ei);
1383	tcf_block_owner_del(block, q, ei->binder_type);
1384
1385	__tcf_block_put(block, q, ei, true);
1386}
1387EXPORT_SYMBOL(tcf_block_put_ext);
1388
1389void tcf_block_put(struct tcf_block *block)
1390{
1391	struct tcf_block_ext_info ei = {0, };
1392
1393	if (!block)
1394		return;
1395	tcf_block_put_ext(block, block->q, &ei);
1396}
1397
1398EXPORT_SYMBOL(tcf_block_put);
1399
1400static int
1401tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
1402			    void *cb_priv, bool add, bool offload_in_use,
1403			    struct netlink_ext_ack *extack)
1404{
1405	struct tcf_chain *chain, *chain_prev;
1406	struct tcf_proto *tp, *tp_prev;
1407	int err;
1408
1409	lockdep_assert_held(&block->cb_lock);
1410
1411	for (chain = __tcf_get_next_chain(block, NULL);
1412	     chain;
1413	     chain_prev = chain,
1414		     chain = __tcf_get_next_chain(block, chain),
1415		     tcf_chain_put(chain_prev)) {
1416		for (tp = __tcf_get_next_proto(chain, NULL); tp;
1417		     tp_prev = tp,
1418			     tp = __tcf_get_next_proto(chain, tp),
1419			     tcf_proto_put(tp_prev, true, NULL)) {
1420			if (tp->ops->reoffload) {
1421				err = tp->ops->reoffload(tp, add, cb, cb_priv,
1422							 extack);
1423				if (err && add)
1424					goto err_playback_remove;
1425			} else if (add && offload_in_use) {
1426				err = -EOPNOTSUPP;
1427				NL_SET_ERR_MSG(extack, "Filter HW offload failed - classifier without re-offloading support");
1428				goto err_playback_remove;
1429			}
1430		}
1431	}
1432
1433	return 0;
1434
1435err_playback_remove:
1436	tcf_proto_put(tp, true, NULL);
1437	tcf_chain_put(chain);
1438	tcf_block_playback_offloads(block, cb, cb_priv, false, offload_in_use,
1439				    extack);
1440	return err;
1441}
1442
1443static int tcf_block_bind(struct tcf_block *block,
1444			  struct flow_block_offload *bo)
1445{
1446	struct flow_block_cb *block_cb, *next;
1447	int err, i = 0;
1448
1449	lockdep_assert_held(&block->cb_lock);
1450
1451	list_for_each_entry(block_cb, &bo->cb_list, list) {
1452		err = tcf_block_playback_offloads(block, block_cb->cb,
1453						  block_cb->cb_priv, true,
1454						  tcf_block_offload_in_use(block),
1455						  bo->extack);
1456		if (err)
1457			goto err_unroll;
1458		if (!bo->unlocked_driver_cb)
1459			block->lockeddevcnt++;
1460
1461		i++;
1462	}
1463	list_splice(&bo->cb_list, &block->flow_block.cb_list);
1464
1465	return 0;
1466
1467err_unroll:
1468	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1469		list_del(&block_cb->driver_list);
1470		if (i-- > 0) {
1471			list_del(&block_cb->list);
1472			tcf_block_playback_offloads(block, block_cb->cb,
1473						    block_cb->cb_priv, false,
1474						    tcf_block_offload_in_use(block),
1475						    NULL);
1476			if (!bo->unlocked_driver_cb)
1477				block->lockeddevcnt--;
1478		}
1479		flow_block_cb_free(block_cb);
1480	}
1481
1482	return err;
1483}
1484
1485static void tcf_block_unbind(struct tcf_block *block,
1486			     struct flow_block_offload *bo)
1487{
1488	struct flow_block_cb *block_cb, *next;
1489
1490	lockdep_assert_held(&block->cb_lock);
1491
1492	list_for_each_entry_safe(block_cb, next, &bo->cb_list, list) {
1493		tcf_block_playback_offloads(block, block_cb->cb,
1494					    block_cb->cb_priv, false,
1495					    tcf_block_offload_in_use(block),
1496					    NULL);
1497		list_del(&block_cb->list);
1498		flow_block_cb_free(block_cb);
1499		if (!bo->unlocked_driver_cb)
1500			block->lockeddevcnt--;
1501	}
1502}
1503
1504static int tcf_block_setup(struct tcf_block *block,
1505			   struct flow_block_offload *bo)
1506{
1507	int err;
1508
1509	switch (bo->command) {
1510	case FLOW_BLOCK_BIND:
1511		err = tcf_block_bind(block, bo);
1512		break;
1513	case FLOW_BLOCK_UNBIND:
1514		err = 0;
1515		tcf_block_unbind(block, bo);
1516		break;
1517	default:
1518		WARN_ON_ONCE(1);
1519		err = -EOPNOTSUPP;
1520	}
1521
1522	return err;
1523}
1524
1525/* Main classifier routine: scans classifier chain attached
1526 * to this qdisc, (optionally) tests for protocol and asks
1527 * specific classifiers.
1528 */
1529static inline int __tcf_classify(struct sk_buff *skb,
1530				 const struct tcf_proto *tp,
1531				 const struct tcf_proto *orig_tp,
1532				 struct tcf_result *res,
1533				 bool compat_mode,
1534				 u32 *last_executed_chain)
1535{
1536#ifdef CONFIG_NET_CLS_ACT
1537	const int max_reclassify_loop = 16;
1538	const struct tcf_proto *first_tp;
1539	int limit = 0;
1540
1541reclassify:
1542#endif
1543	for (; tp; tp = rcu_dereference_bh(tp->next)) {
1544		__be16 protocol = skb_protocol(skb, false);
1545		int err;
1546
1547		if (tp->protocol != protocol &&
1548		    tp->protocol != htons(ETH_P_ALL))
1549			continue;
1550
1551		err = tp->classify(skb, tp, res);
1552#ifdef CONFIG_NET_CLS_ACT
1553		if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
1554			first_tp = orig_tp;
1555			*last_executed_chain = first_tp->chain->index;
1556			goto reset;
1557		} else if (unlikely(TC_ACT_EXT_CMP(err, TC_ACT_GOTO_CHAIN))) {
1558			first_tp = res->goto_tp;
1559			*last_executed_chain = err & TC_ACT_EXT_VAL_MASK;
1560			goto reset;
1561		}
1562#endif
1563		if (err >= 0)
1564			return err;
1565	}
1566
1567	return TC_ACT_UNSPEC; /* signal: continue lookup */
1568#ifdef CONFIG_NET_CLS_ACT
1569reset:
1570	if (unlikely(limit++ >= max_reclassify_loop)) {
1571		net_notice_ratelimited("%u: reclassify loop, rule prio %u, protocol %02x\n",
1572				       tp->chain->block->index,
1573				       tp->prio & 0xffff,
1574				       ntohs(tp->protocol));
1575		return TC_ACT_SHOT;
1576	}
1577
1578	tp = first_tp;
1579	goto reclassify;
1580#endif
1581}
1582
1583int tcf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
1584		 struct tcf_result *res, bool compat_mode)
1585{
1586	u32 last_executed_chain = 0;
1587
1588	return __tcf_classify(skb, tp, tp, res, compat_mode,
1589			      &last_executed_chain);
1590}
1591EXPORT_SYMBOL(tcf_classify);
1592
1593int tcf_classify_ingress(struct sk_buff *skb,
1594			 const struct tcf_block *ingress_block,
1595			 const struct tcf_proto *tp,
1596			 struct tcf_result *res, bool compat_mode)
1597{
1598#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1599	u32 last_executed_chain = 0;
1600
1601	return __tcf_classify(skb, tp, tp, res, compat_mode,
1602			      &last_executed_chain);
1603#else
1604	u32 last_executed_chain = tp ? tp->chain->index : 0;
1605	const struct tcf_proto *orig_tp = tp;
1606	struct tc_skb_ext *ext;
1607	int ret;
1608
1609	ext = skb_ext_find(skb, TC_SKB_EXT);
1610
1611	if (ext && ext->chain) {
1612		struct tcf_chain *fchain;
1613
1614		fchain = tcf_chain_lookup_rcu(ingress_block, ext->chain);
1615		if (!fchain)
1616			return TC_ACT_SHOT;
1617
1618		/* Consume, so cloned/redirect skbs won't inherit ext */
1619		skb_ext_del(skb, TC_SKB_EXT);
1620
1621		tp = rcu_dereference_bh(fchain->filter_chain);
1622		last_executed_chain = fchain->index;
1623	}
1624
1625	ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
1626			     &last_executed_chain);
1627
1628	/* If we missed on some chain */
1629	if (ret == TC_ACT_UNSPEC && last_executed_chain) {
1630		ext = tc_skb_ext_alloc(skb);
1631		if (WARN_ON_ONCE(!ext))
1632			return TC_ACT_SHOT;
1633		ext->chain = last_executed_chain;
1634		ext->mru = qdisc_skb_cb(skb)->mru;
1635	}
1636
1637	return ret;
1638#endif
1639}
1640EXPORT_SYMBOL(tcf_classify_ingress);
1641
1642struct tcf_chain_info {
1643	struct tcf_proto __rcu **pprev;
1644	struct tcf_proto __rcu *next;
1645};
1646
1647static struct tcf_proto *tcf_chain_tp_prev(struct tcf_chain *chain,
1648					   struct tcf_chain_info *chain_info)
1649{
1650	return tcf_chain_dereference(*chain_info->pprev, chain);
1651}
1652
1653static int tcf_chain_tp_insert(struct tcf_chain *chain,
1654			       struct tcf_chain_info *chain_info,
1655			       struct tcf_proto *tp)
1656{
1657	if (chain->flushing)
1658		return -EAGAIN;
1659
1660	RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain, chain_info));
1661	if (*chain_info->pprev == chain->filter_chain)
1662		tcf_chain0_head_change(chain, tp);
1663	tcf_proto_get(tp);
1664	rcu_assign_pointer(*chain_info->pprev, tp);
1665
1666	return 0;
1667}
1668
1669static void tcf_chain_tp_remove(struct tcf_chain *chain,
1670				struct tcf_chain_info *chain_info,
1671				struct tcf_proto *tp)
1672{
1673	struct tcf_proto *next = tcf_chain_dereference(chain_info->next, chain);
1674
1675	tcf_proto_mark_delete(tp);
1676	if (tp == chain->filter_chain)
1677		tcf_chain0_head_change(chain, next);
1678	RCU_INIT_POINTER(*chain_info->pprev, next);
1679}
1680
1681static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1682					   struct tcf_chain_info *chain_info,
1683					   u32 protocol, u32 prio,
1684					   bool prio_allocate);
1685
1686/* Try to insert new proto.
1687 * If proto with specified priority already exists, free new proto
1688 * and return existing one.
1689 */
1690
1691static struct tcf_proto *tcf_chain_tp_insert_unique(struct tcf_chain *chain,
1692						    struct tcf_proto *tp_new,
1693						    u32 protocol, u32 prio,
1694						    bool rtnl_held)
1695{
1696	struct tcf_chain_info chain_info;
1697	struct tcf_proto *tp;
1698	int err = 0;
1699
1700	mutex_lock(&chain->filter_chain_lock);
1701
1702	if (tcf_proto_exists_destroying(chain, tp_new)) {
1703		mutex_unlock(&chain->filter_chain_lock);
1704		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1705		return ERR_PTR(-EAGAIN);
1706	}
1707
1708	tp = tcf_chain_tp_find(chain, &chain_info,
1709			       protocol, prio, false);
1710	if (!tp)
1711		err = tcf_chain_tp_insert(chain, &chain_info, tp_new);
1712	mutex_unlock(&chain->filter_chain_lock);
1713
1714	if (tp) {
1715		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1716		tp_new = tp;
1717	} else if (err) {
1718		tcf_proto_destroy(tp_new, rtnl_held, false, NULL);
1719		tp_new = ERR_PTR(err);
1720	}
1721
1722	return tp_new;
1723}
1724
1725static void tcf_chain_tp_delete_empty(struct tcf_chain *chain,
1726				      struct tcf_proto *tp, bool rtnl_held,
1727				      struct netlink_ext_ack *extack)
1728{
1729	struct tcf_chain_info chain_info;
1730	struct tcf_proto *tp_iter;
1731	struct tcf_proto **pprev;
1732	struct tcf_proto *next;
1733
1734	mutex_lock(&chain->filter_chain_lock);
1735
1736	/* Atomically find and remove tp from chain. */
1737	for (pprev = &chain->filter_chain;
1738	     (tp_iter = tcf_chain_dereference(*pprev, chain));
1739	     pprev = &tp_iter->next) {
1740		if (tp_iter == tp) {
1741			chain_info.pprev = pprev;
1742			chain_info.next = tp_iter->next;
1743			WARN_ON(tp_iter->deleting);
1744			break;
1745		}
1746	}
1747	/* Verify that tp still exists and no new filters were inserted
1748	 * concurrently.
1749	 * Mark tp for deletion if it is empty.
1750	 */
1751	if (!tp_iter || !tcf_proto_check_delete(tp)) {
1752		mutex_unlock(&chain->filter_chain_lock);
1753		return;
1754	}
1755
1756	tcf_proto_signal_destroying(chain, tp);
1757	next = tcf_chain_dereference(chain_info.next, chain);
1758	if (tp == chain->filter_chain)
1759		tcf_chain0_head_change(chain, next);
1760	RCU_INIT_POINTER(*chain_info.pprev, next);
1761	mutex_unlock(&chain->filter_chain_lock);
1762
1763	tcf_proto_put(tp, rtnl_held, extack);
1764}
1765
1766static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain,
1767					   struct tcf_chain_info *chain_info,
1768					   u32 protocol, u32 prio,
1769					   bool prio_allocate)
1770{
1771	struct tcf_proto **pprev;
1772	struct tcf_proto *tp;
1773
1774	/* Check the chain for existence of proto-tcf with this priority */
1775	for (pprev = &chain->filter_chain;
1776	     (tp = tcf_chain_dereference(*pprev, chain));
1777	     pprev = &tp->next) {
1778		if (tp->prio >= prio) {
1779			if (tp->prio == prio) {
1780				if (prio_allocate ||
1781				    (tp->protocol != protocol && protocol))
1782					return ERR_PTR(-EINVAL);
1783			} else {
1784				tp = NULL;
1785			}
1786			break;
1787		}
1788	}
1789	chain_info->pprev = pprev;
1790	if (tp) {
1791		chain_info->next = tp->next;
1792		tcf_proto_get(tp);
1793	} else {
1794		chain_info->next = NULL;
1795	}
1796	return tp;
1797}
1798
1799static int tcf_fill_node(struct net *net, struct sk_buff *skb,
1800			 struct tcf_proto *tp, struct tcf_block *block,
1801			 struct Qdisc *q, u32 parent, void *fh,
1802			 u32 portid, u32 seq, u16 flags, int event,
1803			 bool terse_dump, bool rtnl_held)
1804{
1805	struct tcmsg *tcm;
1806	struct nlmsghdr  *nlh;
1807	unsigned char *b = skb_tail_pointer(skb);
1808
1809	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
1810	if (!nlh)
1811		goto out_nlmsg_trim;
1812	tcm = nlmsg_data(nlh);
1813	tcm->tcm_family = AF_UNSPEC;
1814	tcm->tcm__pad1 = 0;
1815	tcm->tcm__pad2 = 0;
1816	if (q) {
1817		tcm->tcm_ifindex = qdisc_dev(q)->ifindex;
1818		tcm->tcm_parent = parent;
1819	} else {
1820		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
1821		tcm->tcm_block_index = block->index;
1822	}
1823	tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol);
1824	if (nla_put_string(skb, TCA_KIND, tp->ops->kind))
1825		goto nla_put_failure;
1826	if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index))
1827		goto nla_put_failure;
1828	if (!fh) {
1829		tcm->tcm_handle = 0;
1830	} else if (terse_dump) {
1831		if (tp->ops->terse_dump) {
1832			if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
1833						rtnl_held) < 0)
1834				goto nla_put_failure;
1835		} else {
1836			goto cls_op_not_supp;
1837		}
1838	} else {
1839		if (tp->ops->dump &&
1840		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
1841			goto nla_put_failure;
1842	}
1843	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1844	return skb->len;
1845
1846out_nlmsg_trim:
1847nla_put_failure:
1848cls_op_not_supp:
1849	nlmsg_trim(skb, b);
1850	return -1;
1851}
1852
1853static int tfilter_notify(struct net *net, struct sk_buff *oskb,
1854			  struct nlmsghdr *n, struct tcf_proto *tp,
1855			  struct tcf_block *block, struct Qdisc *q,
1856			  u32 parent, void *fh, int event, bool unicast,
1857			  bool rtnl_held)
1858{
1859	struct sk_buff *skb;
1860	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1861	int err = 0;
1862
1863	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1864	if (!skb)
1865		return -ENOBUFS;
1866
1867	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1868			  n->nlmsg_seq, n->nlmsg_flags, event,
1869			  false, rtnl_held) <= 0) {
1870		kfree_skb(skb);
1871		return -EINVAL;
1872	}
1873
1874	if (unicast)
1875		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1876	else
1877		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1878				     n->nlmsg_flags & NLM_F_ECHO);
1879
1880	if (err > 0)
1881		err = 0;
1882	return err;
1883}
1884
1885static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
1886			      struct nlmsghdr *n, struct tcf_proto *tp,
1887			      struct tcf_block *block, struct Qdisc *q,
1888			      u32 parent, void *fh, bool unicast, bool *last,
1889			      bool rtnl_held, struct netlink_ext_ack *extack)
1890{
1891	struct sk_buff *skb;
1892	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
1893	int err;
1894
1895	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1896	if (!skb)
1897		return -ENOBUFS;
1898
1899	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
1900			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
1901			  false, rtnl_held) <= 0) {
1902		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
1903		kfree_skb(skb);
1904		return -EINVAL;
1905	}
1906
1907	err = tp->ops->delete(tp, fh, last, rtnl_held, extack);
1908	if (err) {
1909		kfree_skb(skb);
1910		return err;
1911	}
1912
1913	if (unicast)
1914		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
1915	else
1916		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
1917				     n->nlmsg_flags & NLM_F_ECHO);
1918	if (err < 0)
1919		NL_SET_ERR_MSG(extack, "Failed to send filter delete notification");
1920
1921	if (err > 0)
1922		err = 0;
1923	return err;
1924}
1925
1926static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
1927				 struct tcf_block *block, struct Qdisc *q,
1928				 u32 parent, struct nlmsghdr *n,
1929				 struct tcf_chain *chain, int event,
1930				 bool rtnl_held)
1931{
1932	struct tcf_proto *tp;
1933
1934	for (tp = tcf_get_next_proto(chain, NULL, rtnl_held);
1935	     tp; tp = tcf_get_next_proto(chain, tp, rtnl_held))
1936		tfilter_notify(net, oskb, n, tp, block,
1937			       q, parent, NULL, event, false, rtnl_held);
1938}
1939
1940static void tfilter_put(struct tcf_proto *tp, void *fh)
1941{
1942	if (tp->ops->put && fh)
1943		tp->ops->put(tp, fh);
1944}
1945
1946static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
1947			  struct netlink_ext_ack *extack)
1948{
1949	struct net *net = sock_net(skb->sk);
1950	struct nlattr *tca[TCA_MAX + 1];
1951	char name[IFNAMSIZ];
1952	struct tcmsg *t;
1953	u32 protocol;
1954	u32 prio;
1955	bool prio_allocate;
1956	u32 parent;
1957	u32 chain_index;
1958	struct Qdisc *q;
1959	struct tcf_chain_info chain_info;
1960	struct tcf_chain *chain;
1961	struct tcf_block *block;
1962	struct tcf_proto *tp;
1963	unsigned long cl;
1964	void *fh;
1965	int err;
1966	int tp_created;
1967	bool rtnl_held = false;
1968
1969	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
1970		return -EPERM;
1971
1972replay:
1973	tp_created = 0;
1974
1975	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
1976				     rtm_tca_policy, extack);
1977	if (err < 0)
1978		return err;
1979
1980	t = nlmsg_data(n);
1981	protocol = TC_H_MIN(t->tcm_info);
1982	prio = TC_H_MAJ(t->tcm_info);
1983	prio_allocate = false;
1984	parent = t->tcm_parent;
1985	tp = NULL;
1986	cl = 0;
1987	block = NULL;
1988	q = NULL;
1989	chain = NULL;
1990
1991	if (prio == 0) {
1992		/* If no priority is provided by the user,
1993		 * we allocate one.
1994		 */
1995		if (n->nlmsg_flags & NLM_F_CREATE) {
1996			prio = TC_H_MAKE(0x80000000U, 0U);
1997			prio_allocate = true;
1998		} else {
1999			NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2000			return -ENOENT;
2001		}
2002	}
2003
2004	/* Find head of filter chain. */
2005
2006	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2007	if (err)
2008		return err;
2009
2010	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2011		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2012		err = -EINVAL;
2013		goto errout;
2014	}
2015
2016	/* Take rtnl mutex if rtnl_held was set to true on previous iteration,
2017	 * block is shared (no qdisc found), qdisc is not unlocked, classifier
2018	 * type is not specified, classifier is not unlocked.
2019	 */
2020	if (rtnl_held ||
2021	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2022	    !tcf_proto_is_unlocked(name)) {
2023		rtnl_held = true;
2024		rtnl_lock();
2025	}
2026
2027	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2028	if (err)
2029		goto errout;
2030
2031	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2032				 extack);
2033	if (IS_ERR(block)) {
2034		err = PTR_ERR(block);
2035		goto errout;
2036	}
2037	block->classid = parent;
2038
2039	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2040	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2041		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2042		err = -EINVAL;
2043		goto errout;
2044	}
2045	chain = tcf_chain_get(block, chain_index, true);
2046	if (!chain) {
2047		NL_SET_ERR_MSG(extack, "Cannot create specified filter chain");
2048		err = -ENOMEM;
2049		goto errout;
2050	}
2051
2052	mutex_lock(&chain->filter_chain_lock);
2053	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2054			       prio, prio_allocate);
2055	if (IS_ERR(tp)) {
2056		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2057		err = PTR_ERR(tp);
2058		goto errout_locked;
2059	}
2060
2061	if (tp == NULL) {
2062		struct tcf_proto *tp_new = NULL;
2063
2064		if (chain->flushing) {
2065			err = -EAGAIN;
2066			goto errout_locked;
2067		}
2068
2069		/* Proto-tcf does not exist, create new one */
2070
2071		if (tca[TCA_KIND] == NULL || !protocol) {
2072			NL_SET_ERR_MSG(extack, "Filter kind and protocol must be specified");
2073			err = -EINVAL;
2074			goto errout_locked;
2075		}
2076
2077		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2078			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2079			err = -ENOENT;
2080			goto errout_locked;
2081		}
2082
2083		if (prio_allocate)
2084			prio = tcf_auto_prio(tcf_chain_tp_prev(chain,
2085							       &chain_info));
2086
2087		mutex_unlock(&chain->filter_chain_lock);
2088		tp_new = tcf_proto_create(name, protocol, prio, chain,
2089					  rtnl_held, extack);
2090		if (IS_ERR(tp_new)) {
2091			err = PTR_ERR(tp_new);
2092			goto errout_tp;
2093		}
2094
2095		tp_created = 1;
2096		tp = tcf_chain_tp_insert_unique(chain, tp_new, protocol, prio,
2097						rtnl_held);
2098		if (IS_ERR(tp)) {
2099			err = PTR_ERR(tp);
2100			goto errout_tp;
2101		}
2102	} else {
2103		mutex_unlock(&chain->filter_chain_lock);
2104	}
2105
2106	if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2107		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2108		err = -EINVAL;
2109		goto errout;
2110	}
2111
2112	fh = tp->ops->get(tp, t->tcm_handle);
2113
2114	if (!fh) {
2115		if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2116			NL_SET_ERR_MSG(extack, "Need both RTM_NEWTFILTER and NLM_F_CREATE to create a new filter");
2117			err = -ENOENT;
2118			goto errout;
2119		}
2120	} else if (n->nlmsg_flags & NLM_F_EXCL) {
2121		tfilter_put(tp, fh);
2122		NL_SET_ERR_MSG(extack, "Filter already exists");
2123		err = -EEXIST;
2124		goto errout;
2125	}
2126
2127	if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) {
2128		tfilter_put(tp, fh);
2129		NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind");
2130		err = -EINVAL;
2131		goto errout;
2132	}
2133
2134	err = tp->ops->change(net, skb, tp, cl, t->tcm_handle, tca, &fh,
2135			      n->nlmsg_flags & NLM_F_CREATE ? TCA_ACT_NOREPLACE : TCA_ACT_REPLACE,
2136			      rtnl_held, extack);
2137	if (err == 0) {
2138		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2139			       RTM_NEWTFILTER, false, rtnl_held);
2140		tfilter_put(tp, fh);
2141		/* q pointer is NULL for shared blocks */
2142		if (q)
2143			q->flags &= ~TCQ_F_CAN_BYPASS;
2144	}
2145
2146errout:
2147	if (err && tp_created)
2148		tcf_chain_tp_delete_empty(chain, tp, rtnl_held, NULL);
2149errout_tp:
2150	if (chain) {
2151		if (tp && !IS_ERR(tp))
2152			tcf_proto_put(tp, rtnl_held, NULL);
2153		if (!tp_created)
2154			tcf_chain_put(chain);
2155	}
2156	tcf_block_release(q, block, rtnl_held);
2157
2158	if (rtnl_held)
2159		rtnl_unlock();
2160
2161	if (err == -EAGAIN) {
2162		/* Take rtnl lock in case EAGAIN is caused by concurrent flush
2163		 * of target chain.
2164		 */
2165		rtnl_held = true;
2166		/* Replay the request. */
2167		goto replay;
2168	}
2169	return err;
2170
2171errout_locked:
2172	mutex_unlock(&chain->filter_chain_lock);
2173	goto errout;
2174}
2175
2176static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2177			  struct netlink_ext_ack *extack)
2178{
2179	struct net *net = sock_net(skb->sk);
2180	struct nlattr *tca[TCA_MAX + 1];
2181	char name[IFNAMSIZ];
2182	struct tcmsg *t;
2183	u32 protocol;
2184	u32 prio;
2185	u32 parent;
2186	u32 chain_index;
2187	struct Qdisc *q = NULL;
2188	struct tcf_chain_info chain_info;
2189	struct tcf_chain *chain = NULL;
2190	struct tcf_block *block = NULL;
2191	struct tcf_proto *tp = NULL;
2192	unsigned long cl = 0;
2193	void *fh = NULL;
2194	int err;
2195	bool rtnl_held = false;
2196
2197	if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2198		return -EPERM;
2199
2200	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2201				     rtm_tca_policy, extack);
2202	if (err < 0)
2203		return err;
2204
2205	t = nlmsg_data(n);
2206	protocol = TC_H_MIN(t->tcm_info);
2207	prio = TC_H_MAJ(t->tcm_info);
2208	parent = t->tcm_parent;
2209
2210	if (prio == 0 && (protocol || t->tcm_handle || tca[TCA_KIND])) {
2211		NL_SET_ERR_MSG(extack, "Cannot flush filters with protocol, handle or kind set");
2212		return -ENOENT;
2213	}
2214
2215	/* Find head of filter chain. */
2216
2217	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2218	if (err)
2219		return err;
2220
2221	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2222		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2223		err = -EINVAL;
2224		goto errout;
2225	}
2226	/* Take rtnl mutex if flushing whole chain, block is shared (no qdisc
2227	 * found), qdisc is not unlocked, classifier type is not specified,
2228	 * classifier is not unlocked.
2229	 */
2230	if (!prio ||
2231	    (q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2232	    !tcf_proto_is_unlocked(name)) {
2233		rtnl_held = true;
2234		rtnl_lock();
2235	}
2236
2237	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2238	if (err)
2239		goto errout;
2240
2241	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2242				 extack);
2243	if (IS_ERR(block)) {
2244		err = PTR_ERR(block);
2245		goto errout;
2246	}
2247
2248	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2249	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2250		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2251		err = -EINVAL;
2252		goto errout;
2253	}
2254	chain = tcf_chain_get(block, chain_index, false);
2255	if (!chain) {
2256		/* User requested flush on non-existent chain. Nothing to do,
2257		 * so just return success.
2258		 */
2259		if (prio == 0) {
2260			err = 0;
2261			goto errout;
2262		}
2263		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2264		err = -ENOENT;
2265		goto errout;
2266	}
2267
2268	if (prio == 0) {
2269		tfilter_notify_chain(net, skb, block, q, parent, n,
2270				     chain, RTM_DELTFILTER, rtnl_held);
2271		tcf_chain_flush(chain, rtnl_held);
2272		err = 0;
2273		goto errout;
2274	}
2275
2276	mutex_lock(&chain->filter_chain_lock);
2277	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2278			       prio, false);
2279	if (!tp || IS_ERR(tp)) {
2280		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2281		err = tp ? PTR_ERR(tp) : -ENOENT;
2282		goto errout_locked;
2283	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2284		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2285		err = -EINVAL;
2286		goto errout_locked;
2287	} else if (t->tcm_handle == 0) {
2288		tcf_proto_signal_destroying(chain, tp);
2289		tcf_chain_tp_remove(chain, &chain_info, tp);
2290		mutex_unlock(&chain->filter_chain_lock);
2291
2292		tcf_proto_put(tp, rtnl_held, NULL);
2293		tfilter_notify(net, skb, n, tp, block, q, parent, fh,
2294			       RTM_DELTFILTER, false, rtnl_held);
2295		err = 0;
2296		goto errout;
2297	}
2298	mutex_unlock(&chain->filter_chain_lock);
2299
2300	fh = tp->ops->get(tp, t->tcm_handle);
2301
2302	if (!fh) {
2303		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2304		err = -ENOENT;
2305	} else {
2306		bool last;
2307
2308		err = tfilter_del_notify(net, skb, n, tp, block,
2309					 q, parent, fh, false, &last,
2310					 rtnl_held, extack);
2311
2312		if (err)
2313			goto errout;
2314		if (last)
2315			tcf_chain_tp_delete_empty(chain, tp, rtnl_held, extack);
2316	}
2317
2318errout:
2319	if (chain) {
2320		if (tp && !IS_ERR(tp))
2321			tcf_proto_put(tp, rtnl_held, NULL);
2322		tcf_chain_put(chain);
2323	}
2324	tcf_block_release(q, block, rtnl_held);
2325
2326	if (rtnl_held)
2327		rtnl_unlock();
2328
2329	return err;
2330
2331errout_locked:
2332	mutex_unlock(&chain->filter_chain_lock);
2333	goto errout;
2334}
2335
2336static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
2337			  struct netlink_ext_ack *extack)
2338{
2339	struct net *net = sock_net(skb->sk);
2340	struct nlattr *tca[TCA_MAX + 1];
2341	char name[IFNAMSIZ];
2342	struct tcmsg *t;
2343	u32 protocol;
2344	u32 prio;
2345	u32 parent;
2346	u32 chain_index;
2347	struct Qdisc *q = NULL;
2348	struct tcf_chain_info chain_info;
2349	struct tcf_chain *chain = NULL;
2350	struct tcf_block *block = NULL;
2351	struct tcf_proto *tp = NULL;
2352	unsigned long cl = 0;
2353	void *fh = NULL;
2354	int err;
2355	bool rtnl_held = false;
2356
2357	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2358				     rtm_tca_policy, extack);
2359	if (err < 0)
2360		return err;
2361
2362	t = nlmsg_data(n);
2363	protocol = TC_H_MIN(t->tcm_info);
2364	prio = TC_H_MAJ(t->tcm_info);
2365	parent = t->tcm_parent;
2366
2367	if (prio == 0) {
2368		NL_SET_ERR_MSG(extack, "Invalid filter command with priority of zero");
2369		return -ENOENT;
2370	}
2371
2372	/* Find head of filter chain. */
2373
2374	err = __tcf_qdisc_find(net, &q, &parent, t->tcm_ifindex, false, extack);
2375	if (err)
2376		return err;
2377
2378	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2379		NL_SET_ERR_MSG(extack, "Specified TC filter name too long");
2380		err = -EINVAL;
2381		goto errout;
2382	}
2383	/* Take rtnl mutex if block is shared (no qdisc found), qdisc is not
2384	 * unlocked, classifier type is not specified, classifier is not
2385	 * unlocked.
2386	 */
2387	if ((q && !(q->ops->cl_ops->flags & QDISC_CLASS_OPS_DOIT_UNLOCKED)) ||
2388	    !tcf_proto_is_unlocked(name)) {
2389		rtnl_held = true;
2390		rtnl_lock();
2391	}
2392
2393	err = __tcf_qdisc_cl_find(q, parent, &cl, t->tcm_ifindex, extack);
2394	if (err)
2395		goto errout;
2396
2397	block = __tcf_block_find(net, q, cl, t->tcm_ifindex, t->tcm_block_index,
2398				 extack);
2399	if (IS_ERR(block)) {
2400		err = PTR_ERR(block);
2401		goto errout;
2402	}
2403
2404	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2405	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2406		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2407		err = -EINVAL;
2408		goto errout;
2409	}
2410	chain = tcf_chain_get(block, chain_index, false);
2411	if (!chain) {
2412		NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2413		err = -EINVAL;
2414		goto errout;
2415	}
2416
2417	mutex_lock(&chain->filter_chain_lock);
2418	tp = tcf_chain_tp_find(chain, &chain_info, protocol,
2419			       prio, false);
2420	mutex_unlock(&chain->filter_chain_lock);
2421	if (!tp || IS_ERR(tp)) {
2422		NL_SET_ERR_MSG(extack, "Filter with specified priority/protocol not found");
2423		err = tp ? PTR_ERR(tp) : -ENOENT;
2424		goto errout;
2425	} else if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], tp->ops->kind)) {
2426		NL_SET_ERR_MSG(extack, "Specified filter kind does not match existing one");
2427		err = -EINVAL;
2428		goto errout;
2429	}
2430
2431	fh = tp->ops->get(tp, t->tcm_handle);
2432
2433	if (!fh) {
2434		NL_SET_ERR_MSG(extack, "Specified filter handle not found");
2435		err = -ENOENT;
2436	} else {
2437		err = tfilter_notify(net, skb, n, tp, block, q, parent,
2438				     fh, RTM_NEWTFILTER, true, rtnl_held);
2439		if (err < 0)
2440			NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
2441	}
2442
2443	tfilter_put(tp, fh);
2444errout:
2445	if (chain) {
2446		if (tp && !IS_ERR(tp))
2447			tcf_proto_put(tp, rtnl_held, NULL);
2448		tcf_chain_put(chain);
2449	}
2450	tcf_block_release(q, block, rtnl_held);
2451
2452	if (rtnl_held)
2453		rtnl_unlock();
2454
2455	return err;
2456}
2457
2458struct tcf_dump_args {
2459	struct tcf_walker w;
2460	struct sk_buff *skb;
2461	struct netlink_callback *cb;
2462	struct tcf_block *block;
2463	struct Qdisc *q;
2464	u32 parent;
2465	bool terse_dump;
2466};
2467
2468static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
2469{
2470	struct tcf_dump_args *a = (void *)arg;
2471	struct net *net = sock_net(a->skb->sk);
2472
2473	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
2474			     n, NETLINK_CB(a->cb->skb).portid,
2475			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
2476			     RTM_NEWTFILTER, a->terse_dump, true);
2477}
2478
2479static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
2480			   struct sk_buff *skb, struct netlink_callback *cb,
2481			   long index_start, long *p_index, bool terse)
2482{
2483	struct net *net = sock_net(skb->sk);
2484	struct tcf_block *block = chain->block;
2485	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2486	struct tcf_proto *tp, *tp_prev;
2487	struct tcf_dump_args arg;
2488
2489	for (tp = __tcf_get_next_proto(chain, NULL);
2490	     tp;
2491	     tp_prev = tp,
2492		     tp = __tcf_get_next_proto(chain, tp),
2493		     tcf_proto_put(tp_prev, true, NULL),
2494		     (*p_index)++) {
2495		if (*p_index < index_start)
2496			continue;
2497		if (TC_H_MAJ(tcm->tcm_info) &&
2498		    TC_H_MAJ(tcm->tcm_info) != tp->prio)
2499			continue;
2500		if (TC_H_MIN(tcm->tcm_info) &&
2501		    TC_H_MIN(tcm->tcm_info) != tp->protocol)
2502			continue;
2503		if (*p_index > index_start)
2504			memset(&cb->args[1], 0,
2505			       sizeof(cb->args) - sizeof(cb->args[0]));
2506		if (cb->args[1] == 0) {
2507			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
2508					  NETLINK_CB(cb->skb).portid,
2509					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
2510					  RTM_NEWTFILTER, false, true) <= 0)
2511				goto errout;
2512			cb->args[1] = 1;
2513		}
2514		if (!tp->ops->walk)
2515			continue;
2516		arg.w.fn = tcf_node_dump;
2517		arg.skb = skb;
2518		arg.cb = cb;
2519		arg.block = block;
2520		arg.q = q;
2521		arg.parent = parent;
2522		arg.w.stop = 0;
2523		arg.w.skip = cb->args[1] - 1;
2524		arg.w.count = 0;
2525		arg.w.cookie = cb->args[2];
2526		arg.terse_dump = terse;
2527		tp->ops->walk(tp, &arg.w, true);
2528		cb->args[2] = arg.w.cookie;
2529		cb->args[1] = arg.w.count + 1;
2530		if (arg.w.stop)
2531			goto errout;
2532	}
2533	return true;
2534
2535errout:
2536	tcf_proto_put(tp, true, NULL);
2537	return false;
2538}
2539
2540static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
2541	[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
2542};
2543
2544/* called with RTNL */
2545static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
2546{
2547	struct tcf_chain *chain, *chain_prev;
2548	struct net *net = sock_net(skb->sk);
2549	struct nlattr *tca[TCA_MAX + 1];
2550	struct Qdisc *q = NULL;
2551	struct tcf_block *block;
2552	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2553	bool terse_dump = false;
2554	long index_start;
2555	long index;
2556	u32 parent;
2557	int err;
2558
2559	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2560		return skb->len;
2561
2562	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2563				     tcf_tfilter_dump_policy, cb->extack);
2564	if (err)
2565		return err;
2566
2567	if (tca[TCA_DUMP_FLAGS]) {
2568		struct nla_bitfield32 flags =
2569			nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
2570
2571		terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
2572	}
2573
2574	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2575		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2576		if (!block)
2577			goto out;
2578		/* If we work with block index, q is NULL and parent value
2579		 * will never be used in the following code. The check
2580		 * in tcf_fill_node prevents it. However, compiler does not
2581		 * see that far, so set parent to zero to silence the warning
2582		 * about parent being uninitialized.
2583		 */
2584		parent = 0;
2585	} else {
2586		const struct Qdisc_class_ops *cops;
2587		struct net_device *dev;
2588		unsigned long cl = 0;
2589
2590		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2591		if (!dev)
2592			return skb->len;
2593
2594		parent = tcm->tcm_parent;
2595		if (!parent)
2596			q = rtnl_dereference(dev->qdisc);
2597		else
2598			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2599		if (!q)
2600			goto out;
2601		cops = q->ops->cl_ops;
2602		if (!cops)
2603			goto out;
2604		if (!cops->tcf_block)
2605			goto out;
2606		if (TC_H_MIN(tcm->tcm_parent)) {
2607			cl = cops->find(q, tcm->tcm_parent);
2608			if (cl == 0)
2609				goto out;
2610		}
2611		block = cops->tcf_block(q, cl, NULL);
2612		if (!block)
2613			goto out;
2614		parent = block->classid;
2615		if (tcf_block_shared(block))
2616			q = NULL;
2617	}
2618
2619	index_start = cb->args[0];
2620	index = 0;
2621
2622	for (chain = __tcf_get_next_chain(block, NULL);
2623	     chain;
2624	     chain_prev = chain,
2625		     chain = __tcf_get_next_chain(block, chain),
2626		     tcf_chain_put(chain_prev)) {
2627		if (tca[TCA_CHAIN] &&
2628		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
2629			continue;
2630		if (!tcf_chain_dump(chain, q, parent, skb, cb,
2631				    index_start, &index, terse_dump)) {
2632			tcf_chain_put(chain);
2633			err = -EMSGSIZE;
2634			break;
2635		}
2636	}
2637
2638	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
2639		tcf_block_refcnt_put(block, true);
2640	cb->args[0] = index;
2641
2642out:
2643	/* If we did no progress, the error (EMSGSIZE) is real */
2644	if (skb->len == 0 && err)
2645		return err;
2646	return skb->len;
2647}
2648
2649static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
2650			      void *tmplt_priv, u32 chain_index,
2651			      struct net *net, struct sk_buff *skb,
2652			      struct tcf_block *block,
2653			      u32 portid, u32 seq, u16 flags, int event)
2654{
2655	unsigned char *b = skb_tail_pointer(skb);
2656	const struct tcf_proto_ops *ops;
2657	struct nlmsghdr *nlh;
2658	struct tcmsg *tcm;
2659	void *priv;
2660
2661	ops = tmplt_ops;
2662	priv = tmplt_priv;
2663
2664	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags);
2665	if (!nlh)
2666		goto out_nlmsg_trim;
2667	tcm = nlmsg_data(nlh);
2668	tcm->tcm_family = AF_UNSPEC;
2669	tcm->tcm__pad1 = 0;
2670	tcm->tcm__pad2 = 0;
2671	tcm->tcm_handle = 0;
2672	if (block->q) {
2673		tcm->tcm_ifindex = qdisc_dev(block->q)->ifindex;
2674		tcm->tcm_parent = block->q->handle;
2675	} else {
2676		tcm->tcm_ifindex = TCM_IFINDEX_MAGIC_BLOCK;
2677		tcm->tcm_block_index = block->index;
2678	}
2679
2680	if (nla_put_u32(skb, TCA_CHAIN, chain_index))
2681		goto nla_put_failure;
2682
2683	if (ops) {
2684		if (nla_put_string(skb, TCA_KIND, ops->kind))
2685			goto nla_put_failure;
2686		if (ops->tmplt_dump(skb, net, priv) < 0)
2687			goto nla_put_failure;
2688	}
2689
2690	nlh->nlmsg_len = skb_tail_pointer(skb) - b;
2691	return skb->len;
2692
2693out_nlmsg_trim:
2694nla_put_failure:
2695	nlmsg_trim(skb, b);
2696	return -EMSGSIZE;
2697}
2698
2699static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
2700			   u32 seq, u16 flags, int event, bool unicast)
2701{
2702	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2703	struct tcf_block *block = chain->block;
2704	struct net *net = block->net;
2705	struct sk_buff *skb;
2706	int err = 0;
2707
2708	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2709	if (!skb)
2710		return -ENOBUFS;
2711
2712	if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
2713			       chain->index, net, skb, block, portid,
2714			       seq, flags, event) <= 0) {
2715		kfree_skb(skb);
2716		return -EINVAL;
2717	}
2718
2719	if (unicast)
2720		err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2721	else
2722		err = rtnetlink_send(skb, net, portid, RTNLGRP_TC,
2723				     flags & NLM_F_ECHO);
2724
2725	if (err > 0)
2726		err = 0;
2727	return err;
2728}
2729
2730static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
2731				  void *tmplt_priv, u32 chain_index,
2732				  struct tcf_block *block, struct sk_buff *oskb,
2733				  u32 seq, u16 flags, bool unicast)
2734{
2735	u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
2736	struct net *net = block->net;
2737	struct sk_buff *skb;
2738
2739	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2740	if (!skb)
2741		return -ENOBUFS;
2742
2743	if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
2744			       block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
2745		kfree_skb(skb);
2746		return -EINVAL;
2747	}
2748
2749	if (unicast)
2750		return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT);
2751
2752	return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO);
2753}
2754
2755static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
2756			      struct nlattr **tca,
2757			      struct netlink_ext_ack *extack)
2758{
2759	const struct tcf_proto_ops *ops;
2760	char name[IFNAMSIZ];
2761	void *tmplt_priv;
2762
2763	/* If kind is not set, user did not specify template. */
2764	if (!tca[TCA_KIND])
2765		return 0;
2766
2767	if (tcf_proto_check_kind(tca[TCA_KIND], name)) {
2768		NL_SET_ERR_MSG(extack, "Specified TC chain template name too long");
2769		return -EINVAL;
2770	}
2771
2772	ops = tcf_proto_lookup_ops(name, true, extack);
2773	if (IS_ERR(ops))
2774		return PTR_ERR(ops);
2775	if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
2776		NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
2777		module_put(ops->owner);
2778		return -EOPNOTSUPP;
2779	}
2780
2781	tmplt_priv = ops->tmplt_create(net, chain, tca, extack);
2782	if (IS_ERR(tmplt_priv)) {
2783		module_put(ops->owner);
2784		return PTR_ERR(tmplt_priv);
2785	}
2786	chain->tmplt_ops = ops;
2787	chain->tmplt_priv = tmplt_priv;
2788	return 0;
2789}
2790
2791static void tc_chain_tmplt_del(const struct tcf_proto_ops *tmplt_ops,
2792			       void *tmplt_priv)
2793{
2794	/* If template ops are set, no work to do for us. */
2795	if (!tmplt_ops)
2796		return;
2797
2798	tmplt_ops->tmplt_destroy(tmplt_priv);
2799	module_put(tmplt_ops->owner);
2800}
2801
2802/* Add/delete/get a chain */
2803
2804static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
2805			struct netlink_ext_ack *extack)
2806{
2807	struct net *net = sock_net(skb->sk);
2808	struct nlattr *tca[TCA_MAX + 1];
2809	struct tcmsg *t;
2810	u32 parent;
2811	u32 chain_index;
2812	struct Qdisc *q;
2813	struct tcf_chain *chain;
2814	struct tcf_block *block;
2815	unsigned long cl;
2816	int err;
2817
2818	if (n->nlmsg_type != RTM_GETCHAIN &&
2819	    !netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN))
2820		return -EPERM;
2821
2822replay:
2823	q = NULL;
2824	err = nlmsg_parse_deprecated(n, sizeof(*t), tca, TCA_MAX,
2825				     rtm_tca_policy, extack);
2826	if (err < 0)
2827		return err;
2828
2829	t = nlmsg_data(n);
2830	parent = t->tcm_parent;
2831	cl = 0;
2832
2833	block = tcf_block_find(net, &q, &parent, &cl,
2834			       t->tcm_ifindex, t->tcm_block_index, extack);
2835	if (IS_ERR(block))
2836		return PTR_ERR(block);
2837
2838	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
2839	if (chain_index > TC_ACT_EXT_VAL_MASK) {
2840		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
2841		err = -EINVAL;
2842		goto errout_block;
2843	}
2844
2845	mutex_lock(&block->lock);
2846	chain = tcf_chain_lookup(block, chain_index);
2847	if (n->nlmsg_type == RTM_NEWCHAIN) {
2848		if (chain) {
2849			if (tcf_chain_held_by_acts_only(chain)) {
2850				/* The chain exists only because there is
2851				 * some action referencing it.
2852				 */
2853				tcf_chain_hold(chain);
2854			} else {
2855				NL_SET_ERR_MSG(extack, "Filter chain already exists");
2856				err = -EEXIST;
2857				goto errout_block_locked;
2858			}
2859		} else {
2860			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
2861				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
2862				err = -ENOENT;
2863				goto errout_block_locked;
2864			}
2865			chain = tcf_chain_create(block, chain_index);
2866			if (!chain) {
2867				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
2868				err = -ENOMEM;
2869				goto errout_block_locked;
2870			}
2871		}
2872	} else {
2873		if (!chain || tcf_chain_held_by_acts_only(chain)) {
2874			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
2875			err = -EINVAL;
2876			goto errout_block_locked;
2877		}
2878		tcf_chain_hold(chain);
2879	}
2880
2881	if (n->nlmsg_type == RTM_NEWCHAIN) {
2882		/* Modifying chain requires holding parent block lock. In case
2883		 * the chain was successfully added, take a reference to the
2884		 * chain. This ensures that an empty chain does not disappear at
2885		 * the end of this function.
2886		 */
2887		tcf_chain_hold(chain);
2888		chain->explicitly_created = true;
2889	}
2890	mutex_unlock(&block->lock);
2891
2892	switch (n->nlmsg_type) {
2893	case RTM_NEWCHAIN:
2894		err = tc_chain_tmplt_add(chain, net, tca, extack);
2895		if (err) {
2896			tcf_chain_put_explicitly_created(chain);
2897			goto errout;
2898		}
2899
2900		tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
2901				RTM_NEWCHAIN, false);
2902		break;
2903	case RTM_DELCHAIN:
2904		tfilter_notify_chain(net, skb, block, q, parent, n,
2905				     chain, RTM_DELTFILTER, true);
2906		/* Flush the chain first as the user requested chain removal. */
2907		tcf_chain_flush(chain, true);
2908		/* In case the chain was successfully deleted, put a reference
2909		 * to the chain previously taken during addition.
2910		 */
2911		tcf_chain_put_explicitly_created(chain);
2912		break;
2913	case RTM_GETCHAIN:
2914		err = tc_chain_notify(chain, skb, n->nlmsg_seq,
2915				      n->nlmsg_flags, n->nlmsg_type, true);
2916		if (err < 0)
2917			NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
2918		break;
2919	default:
2920		err = -EOPNOTSUPP;
2921		NL_SET_ERR_MSG(extack, "Unsupported message type");
2922		goto errout;
2923	}
2924
2925errout:
2926	tcf_chain_put(chain);
2927errout_block:
2928	tcf_block_release(q, block, true);
2929	if (err == -EAGAIN)
2930		/* Replay the request. */
2931		goto replay;
2932	return err;
2933
2934errout_block_locked:
2935	mutex_unlock(&block->lock);
2936	goto errout_block;
2937}
2938
2939/* called with RTNL */
2940static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
2941{
2942	struct net *net = sock_net(skb->sk);
2943	struct nlattr *tca[TCA_MAX + 1];
2944	struct Qdisc *q = NULL;
2945	struct tcf_block *block;
2946	struct tcmsg *tcm = nlmsg_data(cb->nlh);
2947	struct tcf_chain *chain;
2948	long index_start;
2949	long index;
2950	u32 parent;
2951	int err;
2952
2953	if (nlmsg_len(cb->nlh) < sizeof(*tcm))
2954		return skb->len;
2955
2956	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
2957				     rtm_tca_policy, cb->extack);
2958	if (err)
2959		return err;
2960
2961	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
2962		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
2963		if (!block)
2964			goto out;
2965		/* If we work with block index, q is NULL and parent value
2966		 * will never be used in the following code. The check
2967		 * in tcf_fill_node prevents it. However, compiler does not
2968		 * see that far, so set parent to zero to silence the warning
2969		 * about parent being uninitialized.
2970		 */
2971		parent = 0;
2972	} else {
2973		const struct Qdisc_class_ops *cops;
2974		struct net_device *dev;
2975		unsigned long cl = 0;
2976
2977		dev = __dev_get_by_index(net, tcm->tcm_ifindex);
2978		if (!dev)
2979			return skb->len;
2980
2981		parent = tcm->tcm_parent;
2982		if (!parent) {
2983			q = rtnl_dereference(dev->qdisc);
2984			parent = q->handle;
2985		} else {
2986			q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent));
2987		}
2988		if (!q)
2989			goto out;
2990		cops = q->ops->cl_ops;
2991		if (!cops)
2992			goto out;
2993		if (!cops->tcf_block)
2994			goto out;
2995		if (TC_H_MIN(tcm->tcm_parent)) {
2996			cl = cops->find(q, tcm->tcm_parent);
2997			if (cl == 0)
2998				goto out;
2999		}
3000		block = cops->tcf_block(q, cl, NULL);
3001		if (!block)
3002			goto out;
3003		if (tcf_block_shared(block))
3004			q = NULL;
3005	}
3006
3007	index_start = cb->args[0];
3008	index = 0;
3009
3010	mutex_lock(&block->lock);
3011	list_for_each_entry(chain, &block->chain_list, list) {
3012		if ((tca[TCA_CHAIN] &&
3013		     nla_get_u32(tca[TCA_CHAIN]) != chain->index))
3014			continue;
3015		if (index < index_start) {
3016			index++;
3017			continue;
3018		}
3019		if (tcf_chain_held_by_acts_only(chain))
3020			continue;
3021		err = tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
3022					 chain->index, net, skb, block,
3023					 NETLINK_CB(cb->skb).portid,
3024					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
3025					 RTM_NEWCHAIN);
3026		if (err <= 0)
3027			break;
3028		index++;
3029	}
3030	mutex_unlock(&block->lock);
3031
3032	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK)
3033		tcf_block_refcnt_put(block, true);
3034	cb->args[0] = index;
3035
3036out:
3037	/* If we did no progress, the error (EMSGSIZE) is real */
3038	if (skb->len == 0 && err)
3039		return err;
3040	return skb->len;
3041}
3042
3043void tcf_exts_destroy(struct tcf_exts *exts)
3044{
3045#ifdef CONFIG_NET_CLS_ACT
3046	if (exts->actions) {
3047		tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
3048		kfree(exts->actions);
3049	}
3050	exts->nr_actions = 0;
3051#endif
3052}
3053EXPORT_SYMBOL(tcf_exts_destroy);
3054
3055int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
3056		      struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr,
3057		      bool rtnl_held, struct netlink_ext_ack *extack)
3058{
3059#ifdef CONFIG_NET_CLS_ACT
3060	{
3061		int init_res[TCA_ACT_MAX_PRIO] = {};
3062		struct tc_action *act;
3063		size_t attr_size = 0;
3064
3065		if (exts->police && tb[exts->police]) {
3066			struct tc_action_ops *a_o;
3067
3068			a_o = tc_action_load_ops("police", tb[exts->police], rtnl_held, extack);
3069			if (IS_ERR(a_o))
3070				return PTR_ERR(a_o);
3071			act = tcf_action_init_1(net, tp, tb[exts->police],
3072						rate_tlv, "police", ovr,
3073						TCA_ACT_BIND, a_o, init_res,
3074						rtnl_held, extack);
3075			module_put(a_o->owner);
3076			if (IS_ERR(act))
3077				return PTR_ERR(act);
3078
3079			act->type = exts->type = TCA_OLD_COMPAT;
3080			exts->actions[0] = act;
3081			exts->nr_actions = 1;
3082			tcf_idr_insert_many(exts->actions);
3083		} else if (exts->action && tb[exts->action]) {
3084			int err;
3085
3086			err = tcf_action_init(net, tp, tb[exts->action],
3087					      rate_tlv, NULL, ovr, TCA_ACT_BIND,
3088					      exts->actions, init_res,
3089					      &attr_size, rtnl_held, extack);
3090			if (err < 0)
3091				return err;
3092			exts->nr_actions = err;
3093		}
3094	}
3095#else
3096	if ((exts->action && tb[exts->action]) ||
3097	    (exts->police && tb[exts->police])) {
3098		NL_SET_ERR_MSG(extack, "Classifier actions are not supported per compile options (CONFIG_NET_CLS_ACT)");
3099		return -EOPNOTSUPP;
3100	}
3101#endif
3102
3103	return 0;
3104}
3105EXPORT_SYMBOL(tcf_exts_validate);
3106
3107void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src)
3108{
3109#ifdef CONFIG_NET_CLS_ACT
3110	struct tcf_exts old = *dst;
3111
3112	*dst = *src;
3113	tcf_exts_destroy(&old);
3114#endif
3115}
3116EXPORT_SYMBOL(tcf_exts_change);
3117
3118#ifdef CONFIG_NET_CLS_ACT
3119static struct tc_action *tcf_exts_first_act(struct tcf_exts *exts)
3120{
3121	if (exts->nr_actions == 0)
3122		return NULL;
3123	else
3124		return exts->actions[0];
3125}
3126#endif
3127
3128int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
3129{
3130#ifdef CONFIG_NET_CLS_ACT
3131	struct nlattr *nest;
3132
3133	if (exts->action && tcf_exts_has_actions(exts)) {
3134		/*
3135		 * again for backward compatible mode - we want
3136		 * to work with both old and new modes of entering
3137		 * tc data even if iproute2  was newer - jhs
3138		 */
3139		if (exts->type != TCA_OLD_COMPAT) {
3140			nest = nla_nest_start_noflag(skb, exts->action);
3141			if (nest == NULL)
3142				goto nla_put_failure;
3143
3144			if (tcf_action_dump(skb, exts->actions, 0, 0, false)
3145			    < 0)
3146				goto nla_put_failure;
3147			nla_nest_end(skb, nest);
3148		} else if (exts->police) {
3149			struct tc_action *act = tcf_exts_first_act(exts);
3150			nest = nla_nest_start_noflag(skb, exts->police);
3151			if (nest == NULL || !act)
3152				goto nla_put_failure;
3153			if (tcf_action_dump_old(skb, act, 0, 0) < 0)
3154				goto nla_put_failure;
3155			nla_nest_end(skb, nest);
3156		}
3157	}
3158	return 0;
3159
3160nla_put_failure:
3161	nla_nest_cancel(skb, nest);
3162	return -1;
3163#else
3164	return 0;
3165#endif
3166}
3167EXPORT_SYMBOL(tcf_exts_dump);
3168
3169int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
3170{
3171#ifdef CONFIG_NET_CLS_ACT
3172	struct nlattr *nest;
3173
3174	if (!exts->action || !tcf_exts_has_actions(exts))
3175		return 0;
3176
3177	nest = nla_nest_start_noflag(skb, exts->action);
3178	if (!nest)
3179		goto nla_put_failure;
3180
3181	if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
3182		goto nla_put_failure;
3183	nla_nest_end(skb, nest);
3184	return 0;
3185
3186nla_put_failure:
3187	nla_nest_cancel(skb, nest);
3188	return -1;
3189#else
3190	return 0;
3191#endif
3192}
3193EXPORT_SYMBOL(tcf_exts_terse_dump);
3194
3195int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
3196{
3197#ifdef CONFIG_NET_CLS_ACT
3198	struct tc_action *a = tcf_exts_first_act(exts);
3199	if (a != NULL && tcf_action_copy_stats(skb, a, 1) < 0)
3200		return -1;
3201#endif
3202	return 0;
3203}
3204EXPORT_SYMBOL(tcf_exts_dump_stats);
3205
3206static void tcf_block_offload_inc(struct tcf_block *block, u32 *flags)
3207{
3208	if (*flags & TCA_CLS_FLAGS_IN_HW)
3209		return;
3210	*flags |= TCA_CLS_FLAGS_IN_HW;
3211	atomic_inc(&block->offloadcnt);
3212}
3213
3214static void tcf_block_offload_dec(struct tcf_block *block, u32 *flags)
3215{
3216	if (!(*flags & TCA_CLS_FLAGS_IN_HW))
3217		return;
3218	*flags &= ~TCA_CLS_FLAGS_IN_HW;
3219	atomic_dec(&block->offloadcnt);
3220}
3221
3222static void tc_cls_offload_cnt_update(struct tcf_block *block,
3223				      struct tcf_proto *tp, u32 *cnt,
3224				      u32 *flags, u32 diff, bool add)
3225{
3226	lockdep_assert_held(&block->cb_lock);
3227
3228	spin_lock(&tp->lock);
3229	if (add) {
3230		if (!*cnt)
3231			tcf_block_offload_inc(block, flags);
3232		*cnt += diff;
3233	} else {
3234		*cnt -= diff;
3235		if (!*cnt)
3236			tcf_block_offload_dec(block, flags);
3237	}
3238	spin_unlock(&tp->lock);
3239}
3240
3241static void
3242tc_cls_offload_cnt_reset(struct tcf_block *block, struct tcf_proto *tp,
3243			 u32 *cnt, u32 *flags)
3244{
3245	lockdep_assert_held(&block->cb_lock);
3246
3247	spin_lock(&tp->lock);
3248	tcf_block_offload_dec(block, flags);
3249	*cnt = 0;
3250	spin_unlock(&tp->lock);
3251}
3252
3253static int
3254__tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3255		   void *type_data, bool err_stop)
3256{
3257	struct flow_block_cb *block_cb;
3258	int ok_count = 0;
3259	int err;
3260
3261	list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
3262		err = block_cb->cb(type, type_data, block_cb->cb_priv);
3263		if (err) {
3264			if (err_stop)
3265				return err;
3266		} else {
3267			ok_count++;
3268		}
3269	}
3270	return ok_count;
3271}
3272
3273int tc_setup_cb_call(struct tcf_block *block, enum tc_setup_type type,
3274		     void *type_data, bool err_stop, bool rtnl_held)
3275{
3276	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3277	int ok_count;
3278
3279retry:
3280	if (take_rtnl)
3281		rtnl_lock();
3282	down_read(&block->cb_lock);
3283	/* Need to obtain rtnl lock if block is bound to devs that require it.
3284	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3285	 * obtain the locks in same order here.
3286	 */
3287	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3288		up_read(&block->cb_lock);
3289		take_rtnl = true;
3290		goto retry;
3291	}
3292
3293	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3294
3295	up_read(&block->cb_lock);
3296	if (take_rtnl)
3297		rtnl_unlock();
3298	return ok_count;
3299}
3300EXPORT_SYMBOL(tc_setup_cb_call);
3301
3302/* Non-destructive filter add. If filter that wasn't already in hardware is
3303 * successfully offloaded, increment block offloads counter. On failure,
3304 * previously offloaded filter is considered to be intact and offloads counter
3305 * is not decremented.
3306 */
3307
3308int tc_setup_cb_add(struct tcf_block *block, struct tcf_proto *tp,
3309		    enum tc_setup_type type, void *type_data, bool err_stop,
3310		    u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3311{
3312	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3313	int ok_count;
3314
3315retry:
3316	if (take_rtnl)
3317		rtnl_lock();
3318	down_read(&block->cb_lock);
3319	/* Need to obtain rtnl lock if block is bound to devs that require it.
3320	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3321	 * obtain the locks in same order here.
3322	 */
3323	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3324		up_read(&block->cb_lock);
3325		take_rtnl = true;
3326		goto retry;
3327	}
3328
3329	/* Make sure all netdevs sharing this block are offload-capable. */
3330	if (block->nooffloaddevcnt && err_stop) {
3331		ok_count = -EOPNOTSUPP;
3332		goto err_unlock;
3333	}
3334
3335	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3336	if (ok_count < 0)
3337		goto err_unlock;
3338
3339	if (tp->ops->hw_add)
3340		tp->ops->hw_add(tp, type_data);
3341	if (ok_count > 0)
3342		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags,
3343					  ok_count, true);
3344err_unlock:
3345	up_read(&block->cb_lock);
3346	if (take_rtnl)
3347		rtnl_unlock();
3348	return ok_count < 0 ? ok_count : 0;
3349}
3350EXPORT_SYMBOL(tc_setup_cb_add);
3351
3352/* Destructive filter replace. If filter that wasn't already in hardware is
3353 * successfully offloaded, increment block offload counter. On failure,
3354 * previously offloaded filter is considered to be destroyed and offload counter
3355 * is decremented.
3356 */
3357
3358int tc_setup_cb_replace(struct tcf_block *block, struct tcf_proto *tp,
3359			enum tc_setup_type type, void *type_data, bool err_stop,
3360			u32 *old_flags, unsigned int *old_in_hw_count,
3361			u32 *new_flags, unsigned int *new_in_hw_count,
3362			bool rtnl_held)
3363{
3364	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3365	int ok_count;
3366
3367retry:
3368	if (take_rtnl)
3369		rtnl_lock();
3370	down_read(&block->cb_lock);
3371	/* Need to obtain rtnl lock if block is bound to devs that require it.
3372	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3373	 * obtain the locks in same order here.
3374	 */
3375	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3376		up_read(&block->cb_lock);
3377		take_rtnl = true;
3378		goto retry;
3379	}
3380
3381	/* Make sure all netdevs sharing this block are offload-capable. */
3382	if (block->nooffloaddevcnt && err_stop) {
3383		ok_count = -EOPNOTSUPP;
3384		goto err_unlock;
3385	}
3386
3387	tc_cls_offload_cnt_reset(block, tp, old_in_hw_count, old_flags);
3388	if (tp->ops->hw_del)
3389		tp->ops->hw_del(tp, type_data);
3390
3391	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3392	if (ok_count < 0)
3393		goto err_unlock;
3394
3395	if (tp->ops->hw_add)
3396		tp->ops->hw_add(tp, type_data);
3397	if (ok_count > 0)
3398		tc_cls_offload_cnt_update(block, tp, new_in_hw_count,
3399					  new_flags, ok_count, true);
3400err_unlock:
3401	up_read(&block->cb_lock);
3402	if (take_rtnl)
3403		rtnl_unlock();
3404	return ok_count < 0 ? ok_count : 0;
3405}
3406EXPORT_SYMBOL(tc_setup_cb_replace);
3407
3408/* Destroy filter and decrement block offload counter, if filter was previously
3409 * offloaded.
3410 */
3411
3412int tc_setup_cb_destroy(struct tcf_block *block, struct tcf_proto *tp,
3413			enum tc_setup_type type, void *type_data, bool err_stop,
3414			u32 *flags, unsigned int *in_hw_count, bool rtnl_held)
3415{
3416	bool take_rtnl = READ_ONCE(block->lockeddevcnt) && !rtnl_held;
3417	int ok_count;
3418
3419retry:
3420	if (take_rtnl)
3421		rtnl_lock();
3422	down_read(&block->cb_lock);
3423	/* Need to obtain rtnl lock if block is bound to devs that require it.
3424	 * In block bind code cb_lock is obtained while holding rtnl, so we must
3425	 * obtain the locks in same order here.
3426	 */
3427	if (!rtnl_held && !take_rtnl && block->lockeddevcnt) {
3428		up_read(&block->cb_lock);
3429		take_rtnl = true;
3430		goto retry;
3431	}
3432
3433	ok_count = __tc_setup_cb_call(block, type, type_data, err_stop);
3434
3435	tc_cls_offload_cnt_reset(block, tp, in_hw_count, flags);
3436	if (tp->ops->hw_del)
3437		tp->ops->hw_del(tp, type_data);
3438
3439	up_read(&block->cb_lock);
3440	if (take_rtnl)
3441		rtnl_unlock();
3442	return ok_count < 0 ? ok_count : 0;
3443}
3444EXPORT_SYMBOL(tc_setup_cb_destroy);
3445
3446int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
3447			  bool add, flow_setup_cb_t *cb,
3448			  enum tc_setup_type type, void *type_data,
3449			  void *cb_priv, u32 *flags, unsigned int *in_hw_count)
3450{
3451	int err = cb(type, type_data, cb_priv);
3452
3453	if (err) {
3454		if (add && tc_skip_sw(*flags))
3455			return err;
3456	} else {
3457		tc_cls_offload_cnt_update(block, tp, in_hw_count, flags, 1,
3458					  add);
3459	}
3460
3461	return 0;
3462}
3463EXPORT_SYMBOL(tc_setup_cb_reoffload);
3464
3465static int tcf_act_get_cookie(struct flow_action_entry *entry,
3466			      const struct tc_action *act)
3467{
3468	struct tc_cookie *cookie;
3469	int err = 0;
3470
3471	rcu_read_lock();
3472	cookie = rcu_dereference(act->act_cookie);
3473	if (cookie) {
3474		entry->cookie = flow_action_cookie_create(cookie->data,
3475							  cookie->len,
3476							  GFP_ATOMIC);
3477		if (!entry->cookie)
3478			err = -ENOMEM;
3479	}
3480	rcu_read_unlock();
3481	return err;
3482}
3483
3484static void tcf_act_put_cookie(struct flow_action_entry *entry)
3485{
3486	flow_action_cookie_destroy(entry->cookie);
3487}
3488
3489void tc_cleanup_flow_action(struct flow_action *flow_action)
3490{
3491	struct flow_action_entry *entry;
3492	int i;
3493
3494	flow_action_for_each(i, entry, flow_action) {
3495		tcf_act_put_cookie(entry);
3496		if (entry->destructor)
3497			entry->destructor(entry->destructor_priv);
3498	}
3499}
3500EXPORT_SYMBOL(tc_cleanup_flow_action);
3501
3502static void tcf_mirred_get_dev(struct flow_action_entry *entry,
3503			       const struct tc_action *act)
3504{
3505#ifdef CONFIG_NET_CLS_ACT
3506	entry->dev = act->ops->get_dev(act, &entry->destructor);
3507	if (!entry->dev)
3508		return;
3509	entry->destructor_priv = entry->dev;
3510#endif
3511}
3512
3513static void tcf_tunnel_encap_put_tunnel(void *priv)
3514{
3515	struct ip_tunnel_info *tunnel = priv;
3516
3517	kfree(tunnel);
3518}
3519
3520static int tcf_tunnel_encap_get_tunnel(struct flow_action_entry *entry,
3521				       const struct tc_action *act)
3522{
3523	entry->tunnel = tcf_tunnel_info_copy(act);
3524	if (!entry->tunnel)
3525		return -ENOMEM;
3526	entry->destructor = tcf_tunnel_encap_put_tunnel;
3527	entry->destructor_priv = entry->tunnel;
3528	return 0;
3529}
3530
3531static void tcf_sample_get_group(struct flow_action_entry *entry,
3532				 const struct tc_action *act)
3533{
3534#ifdef CONFIG_NET_CLS_ACT
3535	entry->sample.psample_group =
3536		act->ops->get_psample_group(act, &entry->destructor);
3537	entry->destructor_priv = entry->sample.psample_group;
3538#endif
3539}
3540
3541static void tcf_gate_entry_destructor(void *priv)
3542{
3543	struct action_gate_entry *oe = priv;
3544
3545	kfree(oe);
3546}
3547
3548static int tcf_gate_get_entries(struct flow_action_entry *entry,
3549				const struct tc_action *act)
3550{
3551	entry->gate.entries = tcf_gate_get_list(act);
3552
3553	if (!entry->gate.entries)
3554		return -EINVAL;
3555
3556	entry->destructor = tcf_gate_entry_destructor;
3557	entry->destructor_priv = entry->gate.entries;
3558
3559	return 0;
3560}
3561
3562static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats)
3563{
3564	if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY))
3565		return FLOW_ACTION_HW_STATS_DONT_CARE;
3566	else if (!hw_stats)
3567		return FLOW_ACTION_HW_STATS_DISABLED;
3568
3569	return hw_stats;
3570}
3571
3572int tc_setup_flow_action(struct flow_action *flow_action,
3573			 const struct tcf_exts *exts)
3574{
3575	struct tc_action *act;
3576	int i, j, k, err = 0;
3577
3578	BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
3579	BUILD_BUG_ON(TCA_ACT_HW_STATS_IMMEDIATE != FLOW_ACTION_HW_STATS_IMMEDIATE);
3580	BUILD_BUG_ON(TCA_ACT_HW_STATS_DELAYED != FLOW_ACTION_HW_STATS_DELAYED);
3581
3582	if (!exts)
3583		return 0;
3584
3585	j = 0;
3586	tcf_exts_for_each_action(i, act, exts) {
3587		struct flow_action_entry *entry;
3588
3589		entry = &flow_action->entries[j];
3590		spin_lock_bh(&act->tcfa_lock);
3591		err = tcf_act_get_cookie(entry, act);
3592		if (err)
3593			goto err_out_locked;
3594
3595		entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3596
3597		if (is_tcf_gact_ok(act)) {
3598			entry->id = FLOW_ACTION_ACCEPT;
3599		} else if (is_tcf_gact_shot(act)) {
3600			entry->id = FLOW_ACTION_DROP;
3601		} else if (is_tcf_gact_trap(act)) {
3602			entry->id = FLOW_ACTION_TRAP;
3603		} else if (is_tcf_gact_goto_chain(act)) {
3604			entry->id = FLOW_ACTION_GOTO;
3605			entry->chain_index = tcf_gact_goto_chain_index(act);
3606		} else if (is_tcf_mirred_egress_redirect(act)) {
3607			entry->id = FLOW_ACTION_REDIRECT;
3608			tcf_mirred_get_dev(entry, act);
3609		} else if (is_tcf_mirred_egress_mirror(act)) {
3610			entry->id = FLOW_ACTION_MIRRED;
3611			tcf_mirred_get_dev(entry, act);
3612		} else if (is_tcf_mirred_ingress_redirect(act)) {
3613			entry->id = FLOW_ACTION_REDIRECT_INGRESS;
3614			tcf_mirred_get_dev(entry, act);
3615		} else if (is_tcf_mirred_ingress_mirror(act)) {
3616			entry->id = FLOW_ACTION_MIRRED_INGRESS;
3617			tcf_mirred_get_dev(entry, act);
3618		} else if (is_tcf_vlan(act)) {
3619			switch (tcf_vlan_action(act)) {
3620			case TCA_VLAN_ACT_PUSH:
3621				entry->id = FLOW_ACTION_VLAN_PUSH;
3622				entry->vlan.vid = tcf_vlan_push_vid(act);
3623				entry->vlan.proto = tcf_vlan_push_proto(act);
3624				entry->vlan.prio = tcf_vlan_push_prio(act);
3625				break;
3626			case TCA_VLAN_ACT_POP:
3627				entry->id = FLOW_ACTION_VLAN_POP;
3628				break;
3629			case TCA_VLAN_ACT_MODIFY:
3630				entry->id = FLOW_ACTION_VLAN_MANGLE;
3631				entry->vlan.vid = tcf_vlan_push_vid(act);
3632				entry->vlan.proto = tcf_vlan_push_proto(act);
3633				entry->vlan.prio = tcf_vlan_push_prio(act);
3634				break;
3635			default:
3636				err = -EOPNOTSUPP;
3637				goto err_out_locked;
3638			}
3639		} else if (is_tcf_tunnel_set(act)) {
3640			entry->id = FLOW_ACTION_TUNNEL_ENCAP;
3641			err = tcf_tunnel_encap_get_tunnel(entry, act);
3642			if (err)
3643				goto err_out_locked;
3644		} else if (is_tcf_tunnel_release(act)) {
3645			entry->id = FLOW_ACTION_TUNNEL_DECAP;
3646		} else if (is_tcf_pedit(act)) {
3647			for (k = 0; k < tcf_pedit_nkeys(act); k++) {
3648				switch (tcf_pedit_cmd(act, k)) {
3649				case TCA_PEDIT_KEY_EX_CMD_SET:
3650					entry->id = FLOW_ACTION_MANGLE;
3651					break;
3652				case TCA_PEDIT_KEY_EX_CMD_ADD:
3653					entry->id = FLOW_ACTION_ADD;
3654					break;
3655				default:
3656					err = -EOPNOTSUPP;
3657					goto err_out_locked;
3658				}
3659				entry->mangle.htype = tcf_pedit_htype(act, k);
3660				entry->mangle.mask = tcf_pedit_mask(act, k);
3661				entry->mangle.val = tcf_pedit_val(act, k);
3662				entry->mangle.offset = tcf_pedit_offset(act, k);
3663				entry->hw_stats = tc_act_hw_stats(act->hw_stats);
3664				entry = &flow_action->entries[++j];
3665			}
3666		} else if (is_tcf_csum(act)) {
3667			entry->id = FLOW_ACTION_CSUM;
3668			entry->csum_flags = tcf_csum_update_flags(act);
3669		} else if (is_tcf_skbedit_mark(act)) {
3670			entry->id = FLOW_ACTION_MARK;
3671			entry->mark = tcf_skbedit_mark(act);
3672		} else if (is_tcf_sample(act)) {
3673			entry->id = FLOW_ACTION_SAMPLE;
3674			entry->sample.trunc_size = tcf_sample_trunc_size(act);
3675			entry->sample.truncate = tcf_sample_truncate(act);
3676			entry->sample.rate = tcf_sample_rate(act);
3677			tcf_sample_get_group(entry, act);
3678		} else if (is_tcf_police(act)) {
3679			entry->id = FLOW_ACTION_POLICE;
3680			entry->police.burst = tcf_police_burst(act);
3681			entry->police.rate_bytes_ps =
3682				tcf_police_rate_bytes_ps(act);
3683			entry->police.mtu = tcf_police_tcfp_mtu(act);
3684			entry->police.index = act->tcfa_index;
3685		} else if (is_tcf_ct(act)) {
3686			entry->id = FLOW_ACTION_CT;
3687			entry->ct.action = tcf_ct_action(act);
3688			entry->ct.zone = tcf_ct_zone(act);
3689			entry->ct.flow_table = tcf_ct_ft(act);
3690		} else if (is_tcf_mpls(act)) {
3691			switch (tcf_mpls_action(act)) {
3692			case TCA_MPLS_ACT_PUSH:
3693				entry->id = FLOW_ACTION_MPLS_PUSH;
3694				entry->mpls_push.proto = tcf_mpls_proto(act);
3695				entry->mpls_push.label = tcf_mpls_label(act);
3696				entry->mpls_push.tc = tcf_mpls_tc(act);
3697				entry->mpls_push.bos = tcf_mpls_bos(act);
3698				entry->mpls_push.ttl = tcf_mpls_ttl(act);
3699				break;
3700			case TCA_MPLS_ACT_POP:
3701				entry->id = FLOW_ACTION_MPLS_POP;
3702				entry->mpls_pop.proto = tcf_mpls_proto(act);
3703				break;
3704			case TCA_MPLS_ACT_MODIFY:
3705				entry->id = FLOW_ACTION_MPLS_MANGLE;
3706				entry->mpls_mangle.label = tcf_mpls_label(act);
3707				entry->mpls_mangle.tc = tcf_mpls_tc(act);
3708				entry->mpls_mangle.bos = tcf_mpls_bos(act);
3709				entry->mpls_mangle.ttl = tcf_mpls_ttl(act);
3710				break;
3711			default:
3712				err = -EOPNOTSUPP;
3713				goto err_out_locked;
3714			}
3715		} else if (is_tcf_skbedit_ptype(act)) {
3716			entry->id = FLOW_ACTION_PTYPE;
3717			entry->ptype = tcf_skbedit_ptype(act);
3718		} else if (is_tcf_skbedit_priority(act)) {
3719			entry->id = FLOW_ACTION_PRIORITY;
3720			entry->priority = tcf_skbedit_priority(act);
3721		} else if (is_tcf_gate(act)) {
3722			entry->id = FLOW_ACTION_GATE;
3723			entry->gate.index = tcf_gate_index(act);
3724			entry->gate.prio = tcf_gate_prio(act);
3725			entry->gate.basetime = tcf_gate_basetime(act);
3726			entry->gate.cycletime = tcf_gate_cycletime(act);
3727			entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
3728			entry->gate.num_entries = tcf_gate_num_entries(act);
3729			err = tcf_gate_get_entries(entry, act);
3730			if (err)
3731				goto err_out_locked;
3732		} else {
3733			err = -EOPNOTSUPP;
3734			goto err_out_locked;
3735		}
3736		spin_unlock_bh(&act->tcfa_lock);
3737
3738		if (!is_tcf_pedit(act))
3739			j++;
3740	}
3741
3742err_out:
3743	if (err)
3744		tc_cleanup_flow_action(flow_action);
3745
3746	return err;
3747err_out_locked:
3748	spin_unlock_bh(&act->tcfa_lock);
3749	goto err_out;
3750}
3751EXPORT_SYMBOL(tc_setup_flow_action);
3752
3753unsigned int tcf_exts_num_actions(struct tcf_exts *exts)
3754{
3755	unsigned int num_acts = 0;
3756	struct tc_action *act;
3757	int i;
3758
3759	tcf_exts_for_each_action(i, act, exts) {
3760		if (is_tcf_pedit(act))
3761			num_acts += tcf_pedit_nkeys(act);
3762		else
3763			num_acts++;
3764	}
3765	return num_acts;
3766}
3767EXPORT_SYMBOL(tcf_exts_num_actions);
3768
3769#ifdef CONFIG_NET_CLS_ACT
3770static int tcf_qevent_parse_block_index(struct nlattr *block_index_attr,
3771					u32 *p_block_index,
3772					struct netlink_ext_ack *extack)
3773{
3774	*p_block_index = nla_get_u32(block_index_attr);
3775	if (!*p_block_index) {
3776		NL_SET_ERR_MSG(extack, "Block number may not be zero");
3777		return -EINVAL;
3778	}
3779
3780	return 0;
3781}
3782
3783int tcf_qevent_init(struct tcf_qevent *qe, struct Qdisc *sch,
3784		    enum flow_block_binder_type binder_type,
3785		    struct nlattr *block_index_attr,
3786		    struct netlink_ext_ack *extack)
3787{
3788	u32 block_index;
3789	int err;
3790
3791	if (!block_index_attr)
3792		return 0;
3793
3794	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3795	if (err)
3796		return err;
3797
3798	if (!block_index)
3799		return 0;
3800
3801	qe->info.binder_type = binder_type;
3802	qe->info.chain_head_change = tcf_chain_head_change_dflt;
3803	qe->info.chain_head_change_priv = &qe->filter_chain;
3804	qe->info.block_index = block_index;
3805
3806	return tcf_block_get_ext(&qe->block, sch, &qe->info, extack);
3807}
3808EXPORT_SYMBOL(tcf_qevent_init);
3809
3810void tcf_qevent_destroy(struct tcf_qevent *qe, struct Qdisc *sch)
3811{
3812	if (qe->info.block_index)
3813		tcf_block_put_ext(qe->block, sch, &qe->info);
3814}
3815EXPORT_SYMBOL(tcf_qevent_destroy);
3816
3817int tcf_qevent_validate_change(struct tcf_qevent *qe, struct nlattr *block_index_attr,
3818			       struct netlink_ext_ack *extack)
3819{
3820	u32 block_index;
3821	int err;
3822
3823	if (!block_index_attr)
3824		return 0;
3825
3826	err = tcf_qevent_parse_block_index(block_index_attr, &block_index, extack);
3827	if (err)
3828		return err;
3829
3830	/* Bounce newly-configured block or change in block. */
3831	if (block_index != qe->info.block_index) {
3832		NL_SET_ERR_MSG(extack, "Change of blocks is not supported");
3833		return -EINVAL;
3834	}
3835
3836	return 0;
3837}
3838EXPORT_SYMBOL(tcf_qevent_validate_change);
3839
3840struct sk_buff *tcf_qevent_handle(struct tcf_qevent *qe, struct Qdisc *sch, struct sk_buff *skb,
3841				  struct sk_buff **to_free, int *ret)
3842{
3843	struct tcf_result cl_res;
3844	struct tcf_proto *fl;
3845
3846	if (!qe->info.block_index)
3847		return skb;
3848
3849	fl = rcu_dereference_bh(qe->filter_chain);
3850
3851	switch (tcf_classify(skb, fl, &cl_res, false)) {
3852	case TC_ACT_SHOT:
3853		qdisc_qstats_drop(sch);
3854		__qdisc_drop(skb, to_free);
3855		*ret = __NET_XMIT_BYPASS;
3856		return NULL;
3857	case TC_ACT_STOLEN:
3858	case TC_ACT_QUEUED:
3859	case TC_ACT_TRAP:
3860		__qdisc_drop(skb, to_free);
3861		*ret = __NET_XMIT_STOLEN;
3862		return NULL;
3863	case TC_ACT_REDIRECT:
3864		skb_do_redirect(skb);
3865		*ret = __NET_XMIT_STOLEN;
3866		return NULL;
3867	}
3868
3869	return skb;
3870}
3871EXPORT_SYMBOL(tcf_qevent_handle);
3872
3873int tcf_qevent_dump(struct sk_buff *skb, int attr_name, struct tcf_qevent *qe)
3874{
3875	if (!qe->info.block_index)
3876		return 0;
3877	return nla_put_u32(skb, attr_name, qe->info.block_index);
3878}
3879EXPORT_SYMBOL(tcf_qevent_dump);
3880#endif
3881
3882static __net_init int tcf_net_init(struct net *net)
3883{
3884	struct tcf_net *tn = net_generic(net, tcf_net_id);
3885
3886	spin_lock_init(&tn->idr_lock);
3887	idr_init(&tn->idr);
3888	return 0;
3889}
3890
3891static void __net_exit tcf_net_exit(struct net *net)
3892{
3893	struct tcf_net *tn = net_generic(net, tcf_net_id);
3894
3895	idr_destroy(&tn->idr);
3896}
3897
3898static struct pernet_operations tcf_net_ops = {
3899	.init = tcf_net_init,
3900	.exit = tcf_net_exit,
3901	.id   = &tcf_net_id,
3902	.size = sizeof(struct tcf_net),
3903};
3904
3905static int __init tc_filter_init(void)
3906{
3907	int err;
3908
3909	tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0);
3910	if (!tc_filter_wq)
3911		return -ENOMEM;
3912
3913	err = register_pernet_subsys(&tcf_net_ops);
3914	if (err)
3915		goto err_register_pernet_subsys;
3916
3917	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
3918		      RTNL_FLAG_DOIT_UNLOCKED);
3919	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
3920		      RTNL_FLAG_DOIT_UNLOCKED);
3921	rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_get_tfilter,
3922		      tc_dump_tfilter, RTNL_FLAG_DOIT_UNLOCKED);
3923	rtnl_register(PF_UNSPEC, RTM_NEWCHAIN, tc_ctl_chain, NULL, 0);
3924	rtnl_register(PF_UNSPEC, RTM_DELCHAIN, tc_ctl_chain, NULL, 0);
3925	rtnl_register(PF_UNSPEC, RTM_GETCHAIN, tc_ctl_chain,
3926		      tc_dump_chain, 0);
3927
3928	return 0;
3929
3930err_register_pernet_subsys:
3931	destroy_workqueue(tc_filter_wq);
3932	return err;
3933}
3934
3935subsys_initcall(tc_filter_init);
3936