1// SPDX-License-Identifier: GPL-2.0-only
2/****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2019 Solarflare Communications Inc.
5 * Copyright 2020-2022 Xilinx Inc.
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as published
9 * by the Free Software Foundation, incorporated herein by reference.
10 */
11
12#include <net/pkt_cls.h>
13#include <net/vxlan.h>
14#include <net/geneve.h>
15#include <net/tc_act/tc_ct.h>
16#include "tc.h"
17#include "tc_bindings.h"
18#include "tc_encap_actions.h"
19#include "tc_conntrack.h"
20#include "mae.h"
21#include "ef100_rep.h"
22#include "efx.h"
23
24enum efx_encap_type efx_tc_indr_netdev_type(struct net_device *net_dev)
25{
26	if (netif_is_vxlan(net_dev))
27		return EFX_ENCAP_TYPE_VXLAN;
28	if (netif_is_geneve(net_dev))
29		return EFX_ENCAP_TYPE_GENEVE;
30
31	return EFX_ENCAP_TYPE_NONE;
32}
33
34#define EFX_TC_HDR_TYPE_TTL_MASK ((u32)0xff)
35/* Hoplimit is stored in the most significant byte in the pedit ipv6 header action */
36#define EFX_TC_HDR_TYPE_HLIMIT_MASK ~((u32)0xff000000)
37#define EFX_EFV_PF	NULL
38/* Look up the representor information (efv) for a device.
39 * May return NULL for the PF (us), or an error pointer for a device that
40 * isn't supported as a TC offload endpoint
41 */
42struct efx_rep *efx_tc_flower_lookup_efv(struct efx_nic *efx,
43					 struct net_device *dev)
44{
45	struct efx_rep *efv;
46
47	if (!dev)
48		return ERR_PTR(-EOPNOTSUPP);
49	/* Is it us (the PF)? */
50	if (dev == efx->net_dev)
51		return EFX_EFV_PF;
52	/* Is it an efx vfrep at all? */
53	if (dev->netdev_ops != &efx_ef100_rep_netdev_ops)
54		return ERR_PTR(-EOPNOTSUPP);
55	/* Is it ours?  We don't support TC rules that include another
56	 * EF100's netdevices (not even on another port of the same NIC).
57	 */
58	efv = netdev_priv(dev);
59	if (efv->parent != efx)
60		return ERR_PTR(-EOPNOTSUPP);
61	return efv;
62}
63
64/* Convert a driver-internal vport ID into an internal device (PF or VF) */
65static s64 efx_tc_flower_internal_mport(struct efx_nic *efx, struct efx_rep *efv)
66{
67	u32 mport;
68
69	if (IS_ERR(efv))
70		return PTR_ERR(efv);
71	if (!efv) /* device is PF (us) */
72		efx_mae_mport_uplink(efx, &mport);
73	else /* device is repr */
74		efx_mae_mport_mport(efx, efv->mport, &mport);
75	return mport;
76}
77
78/* Convert a driver-internal vport ID into an external device (wire or VF) */
79s64 efx_tc_flower_external_mport(struct efx_nic *efx, struct efx_rep *efv)
80{
81	u32 mport;
82
83	if (IS_ERR(efv))
84		return PTR_ERR(efv);
85	if (!efv) /* device is PF (us) */
86		efx_mae_mport_wire(efx, &mport);
87	else /* device is repr */
88		efx_mae_mport_mport(efx, efv->mport, &mport);
89	return mport;
90}
91
92static const struct rhashtable_params efx_tc_mac_ht_params = {
93	.key_len	= offsetofend(struct efx_tc_mac_pedit_action, h_addr),
94	.key_offset	= 0,
95	.head_offset	= offsetof(struct efx_tc_mac_pedit_action, linkage),
96};
97
98static const struct rhashtable_params efx_tc_encap_match_ht_params = {
99	.key_len	= offsetof(struct efx_tc_encap_match, linkage),
100	.key_offset	= 0,
101	.head_offset	= offsetof(struct efx_tc_encap_match, linkage),
102};
103
104static const struct rhashtable_params efx_tc_match_action_ht_params = {
105	.key_len	= sizeof(unsigned long),
106	.key_offset	= offsetof(struct efx_tc_flow_rule, cookie),
107	.head_offset	= offsetof(struct efx_tc_flow_rule, linkage),
108};
109
110static const struct rhashtable_params efx_tc_lhs_rule_ht_params = {
111	.key_len	= sizeof(unsigned long),
112	.key_offset	= offsetof(struct efx_tc_lhs_rule, cookie),
113	.head_offset	= offsetof(struct efx_tc_lhs_rule, linkage),
114};
115
116static const struct rhashtable_params efx_tc_recirc_ht_params = {
117	.key_len	= offsetof(struct efx_tc_recirc_id, linkage),
118	.key_offset	= 0,
119	.head_offset	= offsetof(struct efx_tc_recirc_id, linkage),
120};
121
122static struct efx_tc_mac_pedit_action *efx_tc_flower_get_mac(struct efx_nic *efx,
123							     unsigned char h_addr[ETH_ALEN],
124							     struct netlink_ext_ack *extack)
125{
126	struct efx_tc_mac_pedit_action *ped, *old;
127	int rc;
128
129	ped = kzalloc(sizeof(*ped), GFP_USER);
130	if (!ped)
131		return ERR_PTR(-ENOMEM);
132	memcpy(ped->h_addr, h_addr, ETH_ALEN);
133	old = rhashtable_lookup_get_insert_fast(&efx->tc->mac_ht,
134						&ped->linkage,
135						efx_tc_mac_ht_params);
136	if (old) {
137		/* don't need our new entry */
138		kfree(ped);
139		if (IS_ERR(old)) /* oh dear, it's actually an error */
140			return ERR_CAST(old);
141		if (!refcount_inc_not_zero(&old->ref))
142			return ERR_PTR(-EAGAIN);
143		/* existing entry found, ref taken */
144		return old;
145	}
146
147	rc = efx_mae_allocate_pedit_mac(efx, ped);
148	if (rc < 0) {
149		NL_SET_ERR_MSG_MOD(extack, "Failed to store pedit MAC address in hw");
150		goto out_remove;
151	}
152
153	/* ref and return */
154	refcount_set(&ped->ref, 1);
155	return ped;
156out_remove:
157	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
158			       efx_tc_mac_ht_params);
159	kfree(ped);
160	return ERR_PTR(rc);
161}
162
163static void efx_tc_flower_put_mac(struct efx_nic *efx,
164				  struct efx_tc_mac_pedit_action *ped)
165{
166	if (!refcount_dec_and_test(&ped->ref))
167		return; /* still in use */
168	rhashtable_remove_fast(&efx->tc->mac_ht, &ped->linkage,
169			       efx_tc_mac_ht_params);
170	efx_mae_free_pedit_mac(efx, ped);
171	kfree(ped);
172}
173
174static void efx_tc_free_action_set(struct efx_nic *efx,
175				   struct efx_tc_action_set *act, bool in_hw)
176{
177	/* Failure paths calling this on the 'cursor' action set in_hw=false,
178	 * because if the alloc had succeeded we'd've put it in acts.list and
179	 * not still have it in act.
180	 */
181	if (in_hw) {
182		efx_mae_free_action_set(efx, act->fw_id);
183		/* in_hw is true iff we are on an acts.list; make sure to
184		 * remove ourselves from that list before we are freed.
185		 */
186		list_del(&act->list);
187	}
188	if (act->count) {
189		spin_lock_bh(&act->count->cnt->lock);
190		if (!list_empty(&act->count_user))
191			list_del(&act->count_user);
192		spin_unlock_bh(&act->count->cnt->lock);
193		efx_tc_flower_put_counter_index(efx, act->count);
194	}
195	if (act->encap_md) {
196		list_del(&act->encap_user);
197		efx_tc_flower_release_encap_md(efx, act->encap_md);
198	}
199	if (act->src_mac)
200		efx_tc_flower_put_mac(efx, act->src_mac);
201	if (act->dst_mac)
202		efx_tc_flower_put_mac(efx, act->dst_mac);
203	kfree(act);
204}
205
206static void efx_tc_free_action_set_list(struct efx_nic *efx,
207					struct efx_tc_action_set_list *acts,
208					bool in_hw)
209{
210	struct efx_tc_action_set *act, *next;
211
212	/* Failure paths set in_hw=false, because usually the acts didn't get
213	 * to efx_mae_alloc_action_set_list(); if they did, the failure tree
214	 * has a separate efx_mae_free_action_set_list() before calling us.
215	 */
216	if (in_hw)
217		efx_mae_free_action_set_list(efx, acts);
218	/* Any act that's on the list will be in_hw even if the list isn't */
219	list_for_each_entry_safe(act, next, &acts->list, list)
220		efx_tc_free_action_set(efx, act, true);
221	/* Don't kfree, as acts is embedded inside a struct efx_tc_flow_rule */
222}
223
224/* Boilerplate for the simple 'copy a field' cases */
225#define _MAP_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
226if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_##_name)) {		\
227	struct flow_match_##_type fm;					\
228									\
229	flow_rule_match_##_tcget(rule, &fm);				\
230	match->value._field = fm.key->_tcfield;				\
231	match->mask._field = fm.mask->_tcfield;				\
232}
233#define MAP_KEY_AND_MASK(_name, _type, _tcfield, _field)	\
234	_MAP_KEY_AND_MASK(_name, _type, _type, _tcfield, _field)
235#define MAP_ENC_KEY_AND_MASK(_name, _type, _tcget, _tcfield, _field)	\
236	_MAP_KEY_AND_MASK(ENC_##_name, _type, _tcget, _tcfield, _field)
237
238static int efx_tc_flower_parse_match(struct efx_nic *efx,
239				     struct flow_rule *rule,
240				     struct efx_tc_match *match,
241				     struct netlink_ext_ack *extack)
242{
243	struct flow_dissector *dissector = rule->match.dissector;
244	unsigned char ipv = 0;
245
246	/* Owing to internal TC infelicities, the IPV6_ADDRS key might be set
247	 * even on IPv4 filters; so rather than relying on dissector->used_keys
248	 * we check the addr_type in the CONTROL key.  If we don't find it (or
249	 * it's masked, which should never happen), we treat both IPV4_ADDRS
250	 * and IPV6_ADDRS as absent.
251	 */
252	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
253		struct flow_match_control fm;
254
255		flow_rule_match_control(rule, &fm);
256		if (IS_ALL_ONES(fm.mask->addr_type))
257			switch (fm.key->addr_type) {
258			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
259				ipv = 4;
260				break;
261			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
262				ipv = 6;
263				break;
264			default:
265				break;
266			}
267
268		if (fm.mask->flags & FLOW_DIS_IS_FRAGMENT) {
269			match->value.ip_frag = fm.key->flags & FLOW_DIS_IS_FRAGMENT;
270			match->mask.ip_frag = true;
271		}
272		if (fm.mask->flags & FLOW_DIS_FIRST_FRAG) {
273			match->value.ip_firstfrag = fm.key->flags & FLOW_DIS_FIRST_FRAG;
274			match->mask.ip_firstfrag = true;
275		}
276		if (fm.mask->flags & ~(FLOW_DIS_IS_FRAGMENT | FLOW_DIS_FIRST_FRAG)) {
277			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on control.flags %#x",
278					       fm.mask->flags);
279			return -EOPNOTSUPP;
280		}
281	}
282	if (dissector->used_keys &
283	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
284	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
285	      BIT_ULL(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
286	      BIT_ULL(FLOW_DISSECTOR_KEY_VLAN) |
287	      BIT_ULL(FLOW_DISSECTOR_KEY_CVLAN) |
288	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
289	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
290	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
291	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
292	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
293	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
294	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
295	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS) |
296	      BIT_ULL(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
297	      BIT_ULL(FLOW_DISSECTOR_KEY_CT) |
298	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
299	      BIT_ULL(FLOW_DISSECTOR_KEY_IP))) {
300		NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported flower keys %#llx",
301				       dissector->used_keys);
302		return -EOPNOTSUPP;
303	}
304
305	MAP_KEY_AND_MASK(BASIC, basic, n_proto, eth_proto);
306	/* Make sure we're IP if any L3/L4 keys used. */
307	if (!IS_ALL_ONES(match->mask.eth_proto) ||
308	    !(match->value.eth_proto == htons(ETH_P_IP) ||
309	      match->value.eth_proto == htons(ETH_P_IPV6)))
310		if (dissector->used_keys &
311		    (BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
312		     BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
313		     BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
314		     BIT_ULL(FLOW_DISSECTOR_KEY_IP) |
315		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
316			NL_SET_ERR_MSG_FMT_MOD(extack,
317					       "L3/L4 flower keys %#llx require protocol ipv[46]",
318					       dissector->used_keys);
319			return -EINVAL;
320		}
321
322	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
323		struct flow_match_vlan fm;
324
325		flow_rule_match_vlan(rule, &fm);
326		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
327			match->value.vlan_proto[0] = fm.key->vlan_tpid;
328			match->mask.vlan_proto[0] = fm.mask->vlan_tpid;
329			match->value.vlan_tci[0] = cpu_to_be16(fm.key->vlan_priority << 13 |
330							       fm.key->vlan_id);
331			match->mask.vlan_tci[0] = cpu_to_be16(fm.mask->vlan_priority << 13 |
332							      fm.mask->vlan_id);
333		}
334	}
335
336	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
337		struct flow_match_vlan fm;
338
339		flow_rule_match_cvlan(rule, &fm);
340		if (fm.mask->vlan_id || fm.mask->vlan_priority || fm.mask->vlan_tpid) {
341			match->value.vlan_proto[1] = fm.key->vlan_tpid;
342			match->mask.vlan_proto[1] = fm.mask->vlan_tpid;
343			match->value.vlan_tci[1] = cpu_to_be16(fm.key->vlan_priority << 13 |
344							       fm.key->vlan_id);
345			match->mask.vlan_tci[1] = cpu_to_be16(fm.mask->vlan_priority << 13 |
346							      fm.mask->vlan_id);
347		}
348	}
349
350	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
351		struct flow_match_eth_addrs fm;
352
353		flow_rule_match_eth_addrs(rule, &fm);
354		ether_addr_copy(match->value.eth_saddr, fm.key->src);
355		ether_addr_copy(match->value.eth_daddr, fm.key->dst);
356		ether_addr_copy(match->mask.eth_saddr, fm.mask->src);
357		ether_addr_copy(match->mask.eth_daddr, fm.mask->dst);
358	}
359
360	MAP_KEY_AND_MASK(BASIC, basic, ip_proto, ip_proto);
361	/* Make sure we're TCP/UDP if any L4 keys used. */
362	if ((match->value.ip_proto != IPPROTO_UDP &&
363	     match->value.ip_proto != IPPROTO_TCP) || !IS_ALL_ONES(match->mask.ip_proto))
364		if (dissector->used_keys &
365		    (BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
366		     BIT_ULL(FLOW_DISSECTOR_KEY_TCP))) {
367			NL_SET_ERR_MSG_FMT_MOD(extack,
368					       "L4 flower keys %#llx require ipproto udp or tcp",
369					       dissector->used_keys);
370			return -EINVAL;
371		}
372	MAP_KEY_AND_MASK(IP, ip, tos, ip_tos);
373	MAP_KEY_AND_MASK(IP, ip, ttl, ip_ttl);
374	if (ipv == 4) {
375		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, src, src_ip);
376		MAP_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, dst, dst_ip);
377	}
378#ifdef CONFIG_IPV6
379	else if (ipv == 6) {
380		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, src, src_ip6);
381		MAP_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, dst, dst_ip6);
382	}
383#endif
384	MAP_KEY_AND_MASK(PORTS, ports, src, l4_sport);
385	MAP_KEY_AND_MASK(PORTS, ports, dst, l4_dport);
386	MAP_KEY_AND_MASK(TCP, tcp, flags, tcp_flags);
387	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
388		struct flow_match_control fm;
389
390		flow_rule_match_enc_control(rule, &fm);
391		if (fm.mask->flags) {
392			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported match on enc_control.flags %#x",
393					       fm.mask->flags);
394			return -EOPNOTSUPP;
395		}
396		if (!IS_ALL_ONES(fm.mask->addr_type)) {
397			NL_SET_ERR_MSG_FMT_MOD(extack, "Unsupported enc addr_type mask %u (key %u)",
398					       fm.mask->addr_type,
399					       fm.key->addr_type);
400			return -EOPNOTSUPP;
401		}
402		switch (fm.key->addr_type) {
403		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
404			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
405					     src, enc_src_ip);
406			MAP_ENC_KEY_AND_MASK(IPV4_ADDRS, ipv4_addrs, enc_ipv4_addrs,
407					     dst, enc_dst_ip);
408			break;
409#ifdef CONFIG_IPV6
410		case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
411			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
412					     src, enc_src_ip6);
413			MAP_ENC_KEY_AND_MASK(IPV6_ADDRS, ipv6_addrs, enc_ipv6_addrs,
414					     dst, enc_dst_ip6);
415			break;
416#endif
417		default:
418			NL_SET_ERR_MSG_FMT_MOD(extack,
419					       "Unsupported enc addr_type %u (supported are IPv4, IPv6)",
420					       fm.key->addr_type);
421			return -EOPNOTSUPP;
422		}
423		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, tos, enc_ip_tos);
424		MAP_ENC_KEY_AND_MASK(IP, ip, enc_ip, ttl, enc_ip_ttl);
425		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, src, enc_sport);
426		MAP_ENC_KEY_AND_MASK(PORTS, ports, enc_ports, dst, enc_dport);
427		MAP_ENC_KEY_AND_MASK(KEYID, enc_keyid, enc_keyid, keyid, enc_keyid);
428	} else if (dissector->used_keys &
429		   (BIT_ULL(FLOW_DISSECTOR_KEY_ENC_KEYID) |
430		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
431		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
432		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_IP) |
433		    BIT_ULL(FLOW_DISSECTOR_KEY_ENC_PORTS))) {
434		NL_SET_ERR_MSG_FMT_MOD(extack,
435				       "Flower enc keys require enc_control (keys: %#llx)",
436				       dissector->used_keys);
437		return -EOPNOTSUPP;
438	}
439	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT)) {
440		struct flow_match_ct fm;
441
442		flow_rule_match_ct(rule, &fm);
443		match->value.ct_state_trk = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
444		match->mask.ct_state_trk = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_TRACKED);
445		match->value.ct_state_est = !!(fm.key->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
446		match->mask.ct_state_est = !!(fm.mask->ct_state & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED);
447		if (fm.mask->ct_state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
448					  TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED)) {
449			NL_SET_ERR_MSG_FMT_MOD(extack,
450					       "Unsupported ct_state match %#x",
451					       fm.mask->ct_state);
452			return -EOPNOTSUPP;
453		}
454		match->value.ct_mark = fm.key->ct_mark;
455		match->mask.ct_mark = fm.mask->ct_mark;
456		match->value.ct_zone = fm.key->ct_zone;
457		match->mask.ct_zone = fm.mask->ct_zone;
458
459		if (memchr_inv(fm.mask->ct_labels, 0, sizeof(fm.mask->ct_labels))) {
460			NL_SET_ERR_MSG_MOD(extack, "Matching on ct_label not supported");
461			return -EOPNOTSUPP;
462		}
463	}
464
465	return 0;
466}
467
468static void efx_tc_flower_release_encap_match(struct efx_nic *efx,
469					      struct efx_tc_encap_match *encap)
470{
471	int rc;
472
473	if (!refcount_dec_and_test(&encap->ref))
474		return; /* still in use */
475
476	if (encap->type == EFX_TC_EM_DIRECT) {
477		rc = efx_mae_unregister_encap_match(efx, encap);
478		if (rc)
479			/* Display message but carry on and remove entry from our
480			 * SW tables, because there's not much we can do about it.
481			 */
482			netif_err(efx, drv, efx->net_dev,
483				  "Failed to release encap match %#x, rc %d\n",
484				  encap->fw_id, rc);
485	}
486	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
487			       efx_tc_encap_match_ht_params);
488	if (encap->pseudo)
489		efx_tc_flower_release_encap_match(efx, encap->pseudo);
490	kfree(encap);
491}
492
493static int efx_tc_flower_record_encap_match(struct efx_nic *efx,
494					    struct efx_tc_match *match,
495					    enum efx_encap_type type,
496					    enum efx_tc_em_pseudo_type em_type,
497					    u8 child_ip_tos_mask,
498					    __be16 child_udp_sport_mask,
499					    struct netlink_ext_ack *extack)
500{
501	struct efx_tc_encap_match *encap, *old, *pseudo = NULL;
502	bool ipv6 = false;
503	int rc;
504
505	/* We require that the socket-defining fields (IP addrs and UDP dest
506	 * port) are present and exact-match.  Other fields may only be used
507	 * if the field-set (and any masks) are the same for all encap
508	 * matches on the same <sip,dip,dport> tuple; this is enforced by
509	 * pseudo encap matches.
510	 */
511	if (match->mask.enc_dst_ip | match->mask.enc_src_ip) {
512		if (!IS_ALL_ONES(match->mask.enc_dst_ip)) {
513			NL_SET_ERR_MSG_MOD(extack,
514					   "Egress encap match is not exact on dst IP address");
515			return -EOPNOTSUPP;
516		}
517		if (!IS_ALL_ONES(match->mask.enc_src_ip)) {
518			NL_SET_ERR_MSG_MOD(extack,
519					   "Egress encap match is not exact on src IP address");
520			return -EOPNOTSUPP;
521		}
522#ifdef CONFIG_IPV6
523		if (!ipv6_addr_any(&match->mask.enc_dst_ip6) ||
524		    !ipv6_addr_any(&match->mask.enc_src_ip6)) {
525			NL_SET_ERR_MSG_MOD(extack,
526					   "Egress encap match on both IPv4 and IPv6, don't understand");
527			return -EOPNOTSUPP;
528		}
529	} else {
530		ipv6 = true;
531		if (!efx_ipv6_addr_all_ones(&match->mask.enc_dst_ip6)) {
532			NL_SET_ERR_MSG_MOD(extack,
533					   "Egress encap match is not exact on dst IP address");
534			return -EOPNOTSUPP;
535		}
536		if (!efx_ipv6_addr_all_ones(&match->mask.enc_src_ip6)) {
537			NL_SET_ERR_MSG_MOD(extack,
538					   "Egress encap match is not exact on src IP address");
539			return -EOPNOTSUPP;
540		}
541#endif
542	}
543	if (!IS_ALL_ONES(match->mask.enc_dport)) {
544		NL_SET_ERR_MSG_MOD(extack, "Egress encap match is not exact on dst UDP port");
545		return -EOPNOTSUPP;
546	}
547	if (match->mask.enc_sport || match->mask.enc_ip_tos) {
548		struct efx_tc_match pmatch = *match;
549
550		if (em_type == EFX_TC_EM_PSEUDO_MASK) { /* can't happen */
551			NL_SET_ERR_MSG_MOD(extack, "Bad recursion in egress encap match handler");
552			return -EOPNOTSUPP;
553		}
554		pmatch.value.enc_ip_tos = 0;
555		pmatch.mask.enc_ip_tos = 0;
556		pmatch.value.enc_sport = 0;
557		pmatch.mask.enc_sport = 0;
558		rc = efx_tc_flower_record_encap_match(efx, &pmatch, type,
559						      EFX_TC_EM_PSEUDO_MASK,
560						      match->mask.enc_ip_tos,
561						      match->mask.enc_sport,
562						      extack);
563		if (rc)
564			return rc;
565		pseudo = pmatch.encap;
566	}
567	if (match->mask.enc_ip_ttl) {
568		NL_SET_ERR_MSG_MOD(extack, "Egress encap match on IP TTL not supported");
569		rc = -EOPNOTSUPP;
570		goto fail_pseudo;
571	}
572
573	rc = efx_mae_check_encap_match_caps(efx, ipv6, match->mask.enc_ip_tos,
574					    match->mask.enc_sport, extack);
575	if (rc)
576		goto fail_pseudo;
577
578	encap = kzalloc(sizeof(*encap), GFP_USER);
579	if (!encap) {
580		rc = -ENOMEM;
581		goto fail_pseudo;
582	}
583	encap->src_ip = match->value.enc_src_ip;
584	encap->dst_ip = match->value.enc_dst_ip;
585#ifdef CONFIG_IPV6
586	encap->src_ip6 = match->value.enc_src_ip6;
587	encap->dst_ip6 = match->value.enc_dst_ip6;
588#endif
589	encap->udp_dport = match->value.enc_dport;
590	encap->tun_type = type;
591	encap->ip_tos = match->value.enc_ip_tos;
592	encap->ip_tos_mask = match->mask.enc_ip_tos;
593	encap->child_ip_tos_mask = child_ip_tos_mask;
594	encap->udp_sport = match->value.enc_sport;
595	encap->udp_sport_mask = match->mask.enc_sport;
596	encap->child_udp_sport_mask = child_udp_sport_mask;
597	encap->type = em_type;
598	encap->pseudo = pseudo;
599	old = rhashtable_lookup_get_insert_fast(&efx->tc->encap_match_ht,
600						&encap->linkage,
601						efx_tc_encap_match_ht_params);
602	if (old) {
603		/* don't need our new entry */
604		kfree(encap);
605		if (pseudo) /* don't need our new pseudo either */
606			efx_tc_flower_release_encap_match(efx, pseudo);
607		if (IS_ERR(old)) /* oh dear, it's actually an error */
608			return PTR_ERR(old);
609		/* check old and new em_types are compatible */
610		switch (old->type) {
611		case EFX_TC_EM_DIRECT:
612			/* old EM is in hardware, so mustn't overlap with a
613			 * pseudo, but may be shared with another direct EM
614			 */
615			if (em_type == EFX_TC_EM_DIRECT)
616				break;
617			NL_SET_ERR_MSG_MOD(extack, "Pseudo encap match conflicts with existing direct entry");
618			return -EEXIST;
619		case EFX_TC_EM_PSEUDO_MASK:
620			/* old EM is protecting a ToS- or src port-qualified
621			 * filter, so may only be shared with another pseudo
622			 * for the same ToS and src port masks.
623			 */
624			if (em_type != EFX_TC_EM_PSEUDO_MASK) {
625				NL_SET_ERR_MSG_FMT_MOD(extack,
626						       "%s encap match conflicts with existing pseudo(MASK) entry",
627						       em_type ? "Pseudo" : "Direct");
628				return -EEXIST;
629			}
630			if (child_ip_tos_mask != old->child_ip_tos_mask) {
631				NL_SET_ERR_MSG_FMT_MOD(extack,
632						       "Pseudo encap match for TOS mask %#04x conflicts with existing mask %#04x",
633						       child_ip_tos_mask,
634						       old->child_ip_tos_mask);
635				return -EEXIST;
636			}
637			if (child_udp_sport_mask != old->child_udp_sport_mask) {
638				NL_SET_ERR_MSG_FMT_MOD(extack,
639						       "Pseudo encap match for UDP src port mask %#x conflicts with existing mask %#x",
640						       child_udp_sport_mask,
641						       old->child_udp_sport_mask);
642				return -EEXIST;
643			}
644			break;
645		default: /* Unrecognised pseudo-type.  Just say no */
646			NL_SET_ERR_MSG_FMT_MOD(extack,
647					       "%s encap match conflicts with existing pseudo(%d) entry",
648					       em_type ? "Pseudo" : "Direct",
649					       old->type);
650			return -EEXIST;
651		}
652		/* check old and new tun_types are compatible */
653		if (old->tun_type != type) {
654			NL_SET_ERR_MSG_FMT_MOD(extack,
655					       "Egress encap match with conflicting tun_type %u != %u",
656					       old->tun_type, type);
657			return -EEXIST;
658		}
659		if (!refcount_inc_not_zero(&old->ref))
660			return -EAGAIN;
661		/* existing entry found */
662		encap = old;
663	} else {
664		if (em_type == EFX_TC_EM_DIRECT) {
665			rc = efx_mae_register_encap_match(efx, encap);
666			if (rc) {
667				NL_SET_ERR_MSG_MOD(extack, "Failed to record egress encap match in HW");
668				goto fail;
669			}
670		}
671		refcount_set(&encap->ref, 1);
672	}
673	match->encap = encap;
674	return 0;
675fail:
676	rhashtable_remove_fast(&efx->tc->encap_match_ht, &encap->linkage,
677			       efx_tc_encap_match_ht_params);
678	kfree(encap);
679fail_pseudo:
680	if (pseudo)
681		efx_tc_flower_release_encap_match(efx, pseudo);
682	return rc;
683}
684
685static struct efx_tc_recirc_id *efx_tc_get_recirc_id(struct efx_nic *efx,
686						     u32 chain_index,
687						     struct net_device *net_dev)
688{
689	struct efx_tc_recirc_id *rid, *old;
690	int rc;
691
692	rid = kzalloc(sizeof(*rid), GFP_USER);
693	if (!rid)
694		return ERR_PTR(-ENOMEM);
695	rid->chain_index = chain_index;
696	/* We don't take a reference here, because it's implied - if there's
697	 * a rule on the net_dev that's been offloaded to us, then the net_dev
698	 * can't go away until the rule has been deoffloaded.
699	 */
700	rid->net_dev = net_dev;
701	old = rhashtable_lookup_get_insert_fast(&efx->tc->recirc_ht,
702						&rid->linkage,
703						efx_tc_recirc_ht_params);
704	if (old) {
705		/* don't need our new entry */
706		kfree(rid);
707		if (IS_ERR(old)) /* oh dear, it's actually an error */
708			return ERR_CAST(old);
709		if (!refcount_inc_not_zero(&old->ref))
710			return ERR_PTR(-EAGAIN);
711		/* existing entry found */
712		rid = old;
713	} else {
714		rc = ida_alloc_range(&efx->tc->recirc_ida, 1, U8_MAX, GFP_USER);
715		if (rc < 0) {
716			rhashtable_remove_fast(&efx->tc->recirc_ht,
717					       &rid->linkage,
718					       efx_tc_recirc_ht_params);
719			kfree(rid);
720			return ERR_PTR(rc);
721		}
722		rid->fw_id = rc;
723		refcount_set(&rid->ref, 1);
724	}
725	return rid;
726}
727
728static void efx_tc_put_recirc_id(struct efx_nic *efx, struct efx_tc_recirc_id *rid)
729{
730	if (!refcount_dec_and_test(&rid->ref))
731		return; /* still in use */
732	rhashtable_remove_fast(&efx->tc->recirc_ht, &rid->linkage,
733			       efx_tc_recirc_ht_params);
734	ida_free(&efx->tc->recirc_ida, rid->fw_id);
735	kfree(rid);
736}
737
738static void efx_tc_delete_rule(struct efx_nic *efx, struct efx_tc_flow_rule *rule)
739{
740	efx_mae_delete_rule(efx, rule->fw_id);
741
742	/* Release entries in subsidiary tables */
743	efx_tc_free_action_set_list(efx, &rule->acts, true);
744	if (rule->match.rid)
745		efx_tc_put_recirc_id(efx, rule->match.rid);
746	if (rule->match.encap)
747		efx_tc_flower_release_encap_match(efx, rule->match.encap);
748	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
749}
750
751static const char *efx_tc_encap_type_name(enum efx_encap_type typ)
752{
753	switch (typ) {
754	case EFX_ENCAP_TYPE_NONE:
755		return "none";
756	case EFX_ENCAP_TYPE_VXLAN:
757		return "vxlan";
758	case EFX_ENCAP_TYPE_GENEVE:
759		return "geneve";
760	default:
761		pr_warn_once("Unknown efx_encap_type %d encountered\n", typ);
762		return "unknown";
763	}
764}
765
766/* For details of action order constraints refer to SF-123102-TC-1§12.6.1 */
767enum efx_tc_action_order {
768	EFX_TC_AO_DECAP,
769	EFX_TC_AO_DEC_TTL,
770	EFX_TC_AO_PEDIT_MAC_ADDRS,
771	EFX_TC_AO_VLAN_POP,
772	EFX_TC_AO_VLAN_PUSH,
773	EFX_TC_AO_COUNT,
774	EFX_TC_AO_ENCAP,
775	EFX_TC_AO_DELIVER
776};
777/* Determine whether we can add @new action without violating order */
778static bool efx_tc_flower_action_order_ok(const struct efx_tc_action_set *act,
779					  enum efx_tc_action_order new)
780{
781	switch (new) {
782	case EFX_TC_AO_DECAP:
783		if (act->decap)
784			return false;
785		/* PEDIT_MAC_ADDRS must not happen before DECAP, though it
786		 * can wait until much later
787		 */
788		if (act->dst_mac || act->src_mac)
789			return false;
790
791		/* Decrementing ttl must not happen before DECAP */
792		if (act->do_ttl_dec)
793			return false;
794		fallthrough;
795	case EFX_TC_AO_VLAN_POP:
796		if (act->vlan_pop >= 2)
797			return false;
798		/* If we've already pushed a VLAN, we can't then pop it;
799		 * the hardware would instead try to pop an existing VLAN
800		 * before pushing the new one.
801		 */
802		if (act->vlan_push)
803			return false;
804		fallthrough;
805	case EFX_TC_AO_VLAN_PUSH:
806		if (act->vlan_push >= 2)
807			return false;
808		fallthrough;
809	case EFX_TC_AO_COUNT:
810		if (act->count)
811			return false;
812		fallthrough;
813	case EFX_TC_AO_PEDIT_MAC_ADDRS:
814	case EFX_TC_AO_ENCAP:
815		if (act->encap_md)
816			return false;
817		fallthrough;
818	case EFX_TC_AO_DELIVER:
819		return !act->deliver;
820	case EFX_TC_AO_DEC_TTL:
821		if (act->encap_md)
822			return false;
823		return !act->do_ttl_dec;
824	default:
825		/* Bad caller.  Whatever they wanted to do, say they can't. */
826		WARN_ON_ONCE(1);
827		return false;
828	}
829}
830
831/**
832 * DOC: TC conntrack sequences
833 *
834 * The MAE hardware can handle at most two rounds of action rule matching,
835 * consequently we support conntrack through the notion of a "left-hand side
836 * rule".  This is a rule which typically contains only the actions "ct" and
837 * "goto chain N", and corresponds to one or more "right-hand side rules" in
838 * chain N, which typically match on +trk+est, and may perform ct(nat) actions.
839 * RHS rules go in the Action Rule table as normal but with a nonzero recirc_id
840 * (the hardware equivalent of chain_index), while LHS rules may go in either
841 * the Action Rule or the Outer Rule table, the latter being preferred for
842 * performance reasons, and set both DO_CT and a recirc_id in their response.
843 *
844 * Besides the RHS rules, there are often also similar rules matching on
845 * +trk+new which perform the ct(commit) action.  These are not offloaded.
846 */
847
848static bool efx_tc_rule_is_lhs_rule(struct flow_rule *fr,
849				    struct efx_tc_match *match)
850{
851	const struct flow_action_entry *fa;
852	int i;
853
854	flow_action_for_each(i, fa, &fr->action) {
855		switch (fa->id) {
856		case FLOW_ACTION_GOTO:
857			return true;
858		case FLOW_ACTION_CT:
859			/* If rule is -trk, or doesn't mention trk at all, then
860			 * a CT action implies a conntrack lookup (hence it's an
861			 * LHS rule).  If rule is +trk, then a CT action could
862			 * just be ct(nat) or even ct(commit) (though the latter
863			 * can't be offloaded).
864			 */
865			if (!match->mask.ct_state_trk || !match->value.ct_state_trk)
866				return true;
867			break;
868		default:
869			break;
870		}
871	}
872	return false;
873}
874
875static int efx_tc_flower_handle_lhs_actions(struct efx_nic *efx,
876					    struct flow_cls_offload *tc,
877					    struct flow_rule *fr,
878					    struct net_device *net_dev,
879					    struct efx_tc_lhs_rule *rule)
880
881{
882	struct netlink_ext_ack *extack = tc->common.extack;
883	struct efx_tc_lhs_action *act = &rule->lhs_act;
884	const struct flow_action_entry *fa;
885	bool pipe = true;
886	int i;
887
888	flow_action_for_each(i, fa, &fr->action) {
889		struct efx_tc_ct_zone *ct_zone;
890		struct efx_tc_recirc_id *rid;
891
892		if (!pipe) {
893			/* more actions after a non-pipe action */
894			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
895			return -EINVAL;
896		}
897		switch (fa->id) {
898		case FLOW_ACTION_GOTO:
899			if (!fa->chain_index) {
900				NL_SET_ERR_MSG_MOD(extack, "Can't goto chain 0, no looping in hw");
901				return -EOPNOTSUPP;
902			}
903			rid = efx_tc_get_recirc_id(efx, fa->chain_index,
904						   net_dev);
905			if (IS_ERR(rid)) {
906				NL_SET_ERR_MSG_MOD(extack, "Failed to allocate a hardware recirculation ID for this chain_index");
907				return PTR_ERR(rid);
908			}
909			act->rid = rid;
910			if (fa->hw_stats) {
911				struct efx_tc_counter_index *cnt;
912
913				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
914					NL_SET_ERR_MSG_FMT_MOD(extack,
915							       "hw_stats_type %u not supported (only 'delayed')",
916							       fa->hw_stats);
917					return -EOPNOTSUPP;
918				}
919				cnt = efx_tc_flower_get_counter_index(efx, tc->cookie,
920								      EFX_TC_COUNTER_TYPE_OR);
921				if (IS_ERR(cnt)) {
922					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
923					return PTR_ERR(cnt);
924				}
925				WARN_ON(act->count); /* can't happen */
926				act->count = cnt;
927			}
928			pipe = false;
929			break;
930		case FLOW_ACTION_CT:
931			if (act->zone) {
932				NL_SET_ERR_MSG_MOD(extack, "Can't offload multiple ct actions");
933				return -EOPNOTSUPP;
934			}
935			if (fa->ct.action & (TCA_CT_ACT_COMMIT |
936					     TCA_CT_ACT_FORCE)) {
937				NL_SET_ERR_MSG_MOD(extack, "Can't offload ct commit/force");
938				return -EOPNOTSUPP;
939			}
940			if (fa->ct.action & TCA_CT_ACT_CLEAR) {
941				NL_SET_ERR_MSG_MOD(extack, "Can't clear ct in LHS rule");
942				return -EOPNOTSUPP;
943			}
944			if (fa->ct.action & (TCA_CT_ACT_NAT |
945					     TCA_CT_ACT_NAT_SRC |
946					     TCA_CT_ACT_NAT_DST)) {
947				NL_SET_ERR_MSG_MOD(extack, "Can't perform NAT in LHS rule - packet isn't conntracked yet");
948				return -EOPNOTSUPP;
949			}
950			if (fa->ct.action) {
951				NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled ct.action %u for LHS rule\n",
952						       fa->ct.action);
953				return -EOPNOTSUPP;
954			}
955			ct_zone = efx_tc_ct_register_zone(efx, fa->ct.zone,
956							  fa->ct.flow_table);
957			if (IS_ERR(ct_zone)) {
958				NL_SET_ERR_MSG_MOD(extack, "Failed to register for CT updates");
959				return PTR_ERR(ct_zone);
960			}
961			act->zone = ct_zone;
962			break;
963		default:
964			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u for LHS rule\n",
965					       fa->id);
966			return -EOPNOTSUPP;
967		}
968	}
969
970	if (pipe) {
971		NL_SET_ERR_MSG_MOD(extack, "Missing goto chain in LHS rule");
972		return -EOPNOTSUPP;
973	}
974	return 0;
975}
976
977static void efx_tc_flower_release_lhs_actions(struct efx_nic *efx,
978					      struct efx_tc_lhs_action *act)
979{
980	if (act->rid)
981		efx_tc_put_recirc_id(efx, act->rid);
982	if (act->zone)
983		efx_tc_ct_unregister_zone(efx, act->zone);
984	if (act->count)
985		efx_tc_flower_put_counter_index(efx, act->count);
986}
987
988/**
989 * struct efx_tc_mangler_state - accumulates 32-bit pedits into fields
990 *
991 * @dst_mac_32:	dst_mac[0:3] has been populated
992 * @dst_mac_16:	dst_mac[4:5] has been populated
993 * @src_mac_16:	src_mac[0:1] has been populated
994 * @src_mac_32:	src_mac[2:5] has been populated
995 * @dst_mac:	h_dest field of ethhdr
996 * @src_mac:	h_source field of ethhdr
997 *
998 * Since FLOW_ACTION_MANGLE comes in 32-bit chunks that do not
999 * necessarily equate to whole fields of the packet header, this
1000 * structure is used to hold the cumulative effect of the partial
1001 * field pedits that have been processed so far.
1002 */
1003struct efx_tc_mangler_state {
1004	u8 dst_mac_32:1; /* eth->h_dest[0:3] */
1005	u8 dst_mac_16:1; /* eth->h_dest[4:5] */
1006	u8 src_mac_16:1; /* eth->h_source[0:1] */
1007	u8 src_mac_32:1; /* eth->h_source[2:5] */
1008	unsigned char dst_mac[ETH_ALEN];
1009	unsigned char src_mac[ETH_ALEN];
1010};
1011
1012/** efx_tc_complete_mac_mangle() - pull complete field pedits out of @mung
1013 * @efx:	NIC we're installing a flow rule on
1014 * @act:	action set (cursor) to update
1015 * @mung:	accumulated partial mangles
1016 * @extack:	netlink extended ack for reporting errors
1017 *
1018 * Check @mung to find any combinations of partial mangles that can be
1019 * combined into a complete packet field edit, add that edit to @act,
1020 * and consume the partial mangles from @mung.
1021 */
1022
1023static int efx_tc_complete_mac_mangle(struct efx_nic *efx,
1024				      struct efx_tc_action_set *act,
1025				      struct efx_tc_mangler_state *mung,
1026				      struct netlink_ext_ack *extack)
1027{
1028	struct efx_tc_mac_pedit_action *ped;
1029
1030	if (mung->dst_mac_32 && mung->dst_mac_16) {
1031		ped = efx_tc_flower_get_mac(efx, mung->dst_mac, extack);
1032		if (IS_ERR(ped))
1033			return PTR_ERR(ped);
1034
1035		/* Check that we have not already populated dst_mac */
1036		if (act->dst_mac)
1037			efx_tc_flower_put_mac(efx, act->dst_mac);
1038
1039		act->dst_mac = ped;
1040
1041		/* consume the incomplete state */
1042		mung->dst_mac_32 = 0;
1043		mung->dst_mac_16 = 0;
1044	}
1045	if (mung->src_mac_16 && mung->src_mac_32) {
1046		ped = efx_tc_flower_get_mac(efx, mung->src_mac, extack);
1047		if (IS_ERR(ped))
1048			return PTR_ERR(ped);
1049
1050		/* Check that we have not already populated src_mac */
1051		if (act->src_mac)
1052			efx_tc_flower_put_mac(efx, act->src_mac);
1053
1054		act->src_mac = ped;
1055
1056		/* consume the incomplete state */
1057		mung->src_mac_32 = 0;
1058		mung->src_mac_16 = 0;
1059	}
1060	return 0;
1061}
1062
1063static int efx_tc_pedit_add(struct efx_nic *efx, struct efx_tc_action_set *act,
1064			    const struct flow_action_entry *fa,
1065			    struct netlink_ext_ack *extack)
1066{
1067	switch (fa->mangle.htype) {
1068	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1069		switch (fa->mangle.offset) {
1070		case offsetof(struct iphdr, ttl):
1071			/* check that pedit applies to ttl only */
1072			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK)
1073				break;
1074
1075			/* Adding 0xff is equivalent to decrementing the ttl.
1076			 * Other added values are not supported.
1077			 */
1078			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) != U8_MAX)
1079				break;
1080
1081			/* check that we do not decrement ttl twice */
1082			if (!efx_tc_flower_action_order_ok(act,
1083							   EFX_TC_AO_DEC_TTL)) {
1084				NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1085				return -EOPNOTSUPP;
1086			}
1087			act->do_ttl_dec = 1;
1088			return 0;
1089		default:
1090			break;
1091		}
1092		break;
1093	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1094		switch (fa->mangle.offset) {
1095		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1096			/* check that pedit applies to hoplimit only */
1097			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK)
1098				break;
1099
1100			/* Adding 0xff is equivalent to decrementing the hoplimit.
1101			 * Other added values are not supported.
1102			 */
1103			if ((fa->mangle.val >> 24) != U8_MAX)
1104				break;
1105
1106			/* check that we do not decrement hoplimit twice */
1107			if (!efx_tc_flower_action_order_ok(act,
1108							   EFX_TC_AO_DEC_TTL)) {
1109				NL_SET_ERR_MSG_MOD(extack, "multiple dec ttl are not supported");
1110				return -EOPNOTSUPP;
1111			}
1112			act->do_ttl_dec = 1;
1113			return 0;
1114		default:
1115			break;
1116		}
1117		break;
1118	default:
1119		break;
1120	}
1121
1122	NL_SET_ERR_MSG_FMT_MOD(extack,
1123			       "ttl add action type %x %x %x/%x is not supported",
1124			       fa->mangle.htype, fa->mangle.offset,
1125			       fa->mangle.val, fa->mangle.mask);
1126	return -EOPNOTSUPP;
1127}
1128
1129/**
1130 * efx_tc_mangle() - handle a single 32-bit (or less) pedit
1131 * @efx:	NIC we're installing a flow rule on
1132 * @act:	action set (cursor) to update
1133 * @fa:		FLOW_ACTION_MANGLE action metadata
1134 * @mung:	accumulator for partial mangles
1135 * @extack:	netlink extended ack for reporting errors
1136 * @match:	original match used along with the mangle action
1137 *
1138 * Identify the fields written by a FLOW_ACTION_MANGLE, and record
1139 * the partial mangle state in @mung.  If this mangle completes an
1140 * earlier partial mangle, consume and apply to @act by calling
1141 * efx_tc_complete_mac_mangle().
1142 */
1143
1144static int efx_tc_mangle(struct efx_nic *efx, struct efx_tc_action_set *act,
1145			 const struct flow_action_entry *fa,
1146			 struct efx_tc_mangler_state *mung,
1147			 struct netlink_ext_ack *extack,
1148			 struct efx_tc_match *match)
1149{
1150	__le32 mac32;
1151	__le16 mac16;
1152	u8 tr_ttl;
1153
1154	switch (fa->mangle.htype) {
1155	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
1156		BUILD_BUG_ON(offsetof(struct ethhdr, h_dest) != 0);
1157		BUILD_BUG_ON(offsetof(struct ethhdr, h_source) != 6);
1158		if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_PEDIT_MAC_ADDRS)) {
1159			NL_SET_ERR_MSG_MOD(extack,
1160					   "Pedit mangle mac action violates action order");
1161			return -EOPNOTSUPP;
1162		}
1163		switch (fa->mangle.offset) {
1164		case 0:
1165			if (fa->mangle.mask) {
1166				NL_SET_ERR_MSG_FMT_MOD(extack,
1167						       "mask (%#x) of eth.dst32 mangle is not supported",
1168						       fa->mangle.mask);
1169				return -EOPNOTSUPP;
1170			}
1171			/* Ethernet address is little-endian */
1172			mac32 = cpu_to_le32(fa->mangle.val);
1173			memcpy(mung->dst_mac, &mac32, sizeof(mac32));
1174			mung->dst_mac_32 = 1;
1175			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1176		case 4:
1177			if (fa->mangle.mask == 0xffff) {
1178				mac16 = cpu_to_le16(fa->mangle.val >> 16);
1179				memcpy(mung->src_mac, &mac16, sizeof(mac16));
1180				mung->src_mac_16 = 1;
1181			} else if (fa->mangle.mask == 0xffff0000) {
1182				mac16 = cpu_to_le16((u16)fa->mangle.val);
1183				memcpy(mung->dst_mac + 4, &mac16, sizeof(mac16));
1184				mung->dst_mac_16 = 1;
1185			} else {
1186				NL_SET_ERR_MSG_FMT_MOD(extack,
1187						       "mask (%#x) of eth+4 mangle is not high or low 16b",
1188						       fa->mangle.mask);
1189				return -EOPNOTSUPP;
1190			}
1191			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1192		case 8:
1193			if (fa->mangle.mask) {
1194				NL_SET_ERR_MSG_FMT_MOD(extack,
1195						       "mask (%#x) of eth.src32 mangle is not supported",
1196						       fa->mangle.mask);
1197				return -EOPNOTSUPP;
1198			}
1199			mac32 = cpu_to_le32(fa->mangle.val);
1200			memcpy(mung->src_mac + 2, &mac32, sizeof(mac32));
1201			mung->src_mac_32 = 1;
1202			return efx_tc_complete_mac_mangle(efx, act, mung, extack);
1203		default:
1204			NL_SET_ERR_MSG_FMT_MOD(extack, "mangle eth+%u %x/%x is not supported",
1205					       fa->mangle.offset, fa->mangle.val, fa->mangle.mask);
1206			return -EOPNOTSUPP;
1207		}
1208		break;
1209	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
1210		switch (fa->mangle.offset) {
1211		case offsetof(struct iphdr, ttl):
1212			/* we currently only support pedit IP4 when it applies
1213			 * to TTL and then only when it can be achieved with a
1214			 * decrement ttl action
1215			 */
1216
1217			/* check that pedit applies to ttl only */
1218			if (fa->mangle.mask != ~EFX_TC_HDR_TYPE_TTL_MASK) {
1219				NL_SET_ERR_MSG_FMT_MOD(extack,
1220						       "mask (%#x) out of range, only support mangle action on ipv4.ttl",
1221						       fa->mangle.mask);
1222				return -EOPNOTSUPP;
1223			}
1224
1225			/* we can only convert to a dec ttl when we have an
1226			 * exact match on the ttl field
1227			 */
1228			if (match->mask.ip_ttl != U8_MAX) {
1229				NL_SET_ERR_MSG_FMT_MOD(extack,
1230						       "only support mangle ttl when we have an exact match, current mask (%#x)",
1231						       match->mask.ip_ttl);
1232				return -EOPNOTSUPP;
1233			}
1234
1235			/* check that we don't try to decrement 0, which equates
1236			 * to setting the ttl to 0xff
1237			 */
1238			if (match->value.ip_ttl == 0) {
1239				NL_SET_ERR_MSG_MOD(extack,
1240						   "decrement ttl past 0 is not supported");
1241				return -EOPNOTSUPP;
1242			}
1243
1244			/* check that we do not decrement ttl twice */
1245			if (!efx_tc_flower_action_order_ok(act,
1246							   EFX_TC_AO_DEC_TTL)) {
1247				NL_SET_ERR_MSG_MOD(extack,
1248						   "multiple dec ttl is not supported");
1249				return -EOPNOTSUPP;
1250			}
1251
1252			/* check pedit can be achieved with decrement action */
1253			tr_ttl = match->value.ip_ttl - 1;
1254			if ((fa->mangle.val & EFX_TC_HDR_TYPE_TTL_MASK) == tr_ttl) {
1255				act->do_ttl_dec = 1;
1256				return 0;
1257			}
1258
1259			fallthrough;
1260		default:
1261			NL_SET_ERR_MSG_FMT_MOD(extack,
1262					       "only support mangle on the ttl field (offset is %u)",
1263					       fa->mangle.offset);
1264			return -EOPNOTSUPP;
1265		}
1266		break;
1267	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
1268		switch (fa->mangle.offset) {
1269		case round_down(offsetof(struct ipv6hdr, hop_limit), 4):
1270			/* we currently only support pedit IP6 when it applies
1271			 * to the hoplimit and then only when it can be achieved
1272			 * with a decrement hoplimit action
1273			 */
1274
1275			/* check that pedit applies to ttl only */
1276			if (fa->mangle.mask != EFX_TC_HDR_TYPE_HLIMIT_MASK) {
1277				NL_SET_ERR_MSG_FMT_MOD(extack,
1278						       "mask (%#x) out of range, only support mangle action on ipv6.hop_limit",
1279						       fa->mangle.mask);
1280
1281				return -EOPNOTSUPP;
1282			}
1283
1284			/* we can only convert to a dec ttl when we have an
1285			 * exact match on the ttl field
1286			 */
1287			if (match->mask.ip_ttl != U8_MAX) {
1288				NL_SET_ERR_MSG_FMT_MOD(extack,
1289						       "only support hop_limit when we have an exact match, current mask (%#x)",
1290						       match->mask.ip_ttl);
1291				return -EOPNOTSUPP;
1292			}
1293
1294			/* check that we don't try to decrement 0, which equates
1295			 * to setting the ttl to 0xff
1296			 */
1297			if (match->value.ip_ttl == 0) {
1298				NL_SET_ERR_MSG_MOD(extack,
1299						   "decrementing hop_limit past 0 is not supported");
1300				return -EOPNOTSUPP;
1301			}
1302
1303			/* check that we do not decrement hoplimit twice */
1304			if (!efx_tc_flower_action_order_ok(act,
1305							   EFX_TC_AO_DEC_TTL)) {
1306				NL_SET_ERR_MSG_MOD(extack,
1307						   "multiple dec ttl is not supported");
1308				return -EOPNOTSUPP;
1309			}
1310
1311			/* check pedit can be achieved with decrement action */
1312			tr_ttl = match->value.ip_ttl - 1;
1313			if ((fa->mangle.val >> 24) == tr_ttl) {
1314				act->do_ttl_dec = 1;
1315				return 0;
1316			}
1317
1318			fallthrough;
1319		default:
1320			NL_SET_ERR_MSG_FMT_MOD(extack,
1321					       "only support mangle on the hop_limit field");
1322			return -EOPNOTSUPP;
1323		}
1324	default:
1325		NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled mangle htype %u for action rule",
1326				       fa->mangle.htype);
1327		return -EOPNOTSUPP;
1328	}
1329	return 0;
1330}
1331
1332/**
1333 * efx_tc_incomplete_mangle() - check for leftover partial pedits
1334 * @mung:	accumulator for partial mangles
1335 * @extack:	netlink extended ack for reporting errors
1336 *
1337 * Since the MAE can only overwrite whole fields, any partial
1338 * field mangle left over on reaching packet delivery (mirred or
1339 * end of TC actions) cannot be offloaded.  Check for any such
1340 * and reject them with -%EOPNOTSUPP.
1341 */
1342
1343static int efx_tc_incomplete_mangle(struct efx_tc_mangler_state *mung,
1344				    struct netlink_ext_ack *extack)
1345{
1346	if (mung->dst_mac_32 || mung->dst_mac_16) {
1347		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of destination MAC address");
1348		return -EOPNOTSUPP;
1349	}
1350	if (mung->src_mac_16 || mung->src_mac_32) {
1351		NL_SET_ERR_MSG_MOD(extack, "Incomplete pedit of source MAC address");
1352		return -EOPNOTSUPP;
1353	}
1354	return 0;
1355}
1356
1357static int efx_tc_flower_replace_foreign(struct efx_nic *efx,
1358					 struct net_device *net_dev,
1359					 struct flow_cls_offload *tc)
1360{
1361	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1362	struct netlink_ext_ack *extack = tc->common.extack;
1363	struct efx_tc_flow_rule *rule = NULL, *old = NULL;
1364	struct efx_tc_action_set *act = NULL;
1365	bool found = false, uplinked = false;
1366	const struct flow_action_entry *fa;
1367	struct efx_tc_match match;
1368	struct efx_rep *to_efv;
1369	s64 rc;
1370	int i;
1371
1372	/* Parse match */
1373	memset(&match, 0, sizeof(match));
1374	rc = efx_tc_flower_parse_match(efx, fr, &match, NULL);
1375	if (rc)
1376		return rc;
1377	/* The rule as given to us doesn't specify a source netdevice.
1378	 * But, determining whether packets from a VF should match it is
1379	 * complicated, so leave those to the software slowpath: qualify
1380	 * the filter with source m-port == wire.
1381	 */
1382	rc = efx_tc_flower_external_mport(efx, EFX_EFV_PF);
1383	if (rc < 0) {
1384		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port for foreign filter");
1385		return rc;
1386	}
1387	match.value.ingress_port = rc;
1388	match.mask.ingress_port = ~0;
1389
1390	if (tc->common.chain_index) {
1391		struct efx_tc_recirc_id *rid;
1392
1393		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index, net_dev);
1394		if (IS_ERR(rid)) {
1395			NL_SET_ERR_MSG_FMT_MOD(extack,
1396					       "Failed to allocate a hardware recirculation ID for chain_index %u",
1397					       tc->common.chain_index);
1398			return PTR_ERR(rid);
1399		}
1400		match.rid = rid;
1401		match.value.recirc_id = rid->fw_id;
1402	}
1403	match.mask.recirc_id = 0xff;
1404
1405	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1406	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
1407	 */
1408	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1409	    match.mask.ct_state_est && match.value.ct_state_est)
1410		match.mask.ct_state_trk = 0;
1411	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1412	 * match +trk-est (CT_HIT=0) despite being on an established connection.
1413	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1414	 * still hit the software path.
1415	 */
1416	if (match.mask.ct_state_est && !match.value.ct_state_est) {
1417		if (match.value.tcp_syn_fin_rst) {
1418			/* Can't offload this combination */
1419			rc = -EOPNOTSUPP;
1420			goto release;
1421		}
1422		match.mask.tcp_syn_fin_rst = true;
1423	}
1424
1425	flow_action_for_each(i, fa, &fr->action) {
1426		switch (fa->id) {
1427		case FLOW_ACTION_REDIRECT:
1428		case FLOW_ACTION_MIRRED: /* mirred means mirror here */
1429			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1430			if (IS_ERR(to_efv))
1431				continue;
1432			found = true;
1433			break;
1434		default:
1435			break;
1436		}
1437	}
1438	if (!found) { /* We don't care. */
1439		netif_dbg(efx, drv, efx->net_dev,
1440			  "Ignoring foreign filter that doesn't egdev us\n");
1441		rc = -EOPNOTSUPP;
1442		goto release;
1443	}
1444
1445	rc = efx_mae_match_check_caps(efx, &match.mask, NULL);
1446	if (rc)
1447		goto release;
1448
1449	if (efx_tc_match_is_encap(&match.mask)) {
1450		enum efx_encap_type type;
1451
1452		type = efx_tc_indr_netdev_type(net_dev);
1453		if (type == EFX_ENCAP_TYPE_NONE) {
1454			NL_SET_ERR_MSG_MOD(extack,
1455					   "Egress encap match on unsupported tunnel device");
1456			rc = -EOPNOTSUPP;
1457			goto release;
1458		}
1459
1460		rc = efx_mae_check_encap_type_supported(efx, type);
1461		if (rc) {
1462			NL_SET_ERR_MSG_FMT_MOD(extack,
1463					       "Firmware reports no support for %s encap match",
1464					       efx_tc_encap_type_name(type));
1465			goto release;
1466		}
1467
1468		rc = efx_tc_flower_record_encap_match(efx, &match, type,
1469						      EFX_TC_EM_DIRECT, 0, 0,
1470						      extack);
1471		if (rc)
1472			goto release;
1473	} else {
1474		/* This is not a tunnel decap rule, ignore it */
1475		netif_dbg(efx, drv, efx->net_dev,
1476			  "Ignoring foreign filter without encap match\n");
1477		rc = -EOPNOTSUPP;
1478		goto release;
1479	}
1480
1481	rule = kzalloc(sizeof(*rule), GFP_USER);
1482	if (!rule) {
1483		rc = -ENOMEM;
1484		goto release;
1485	}
1486	INIT_LIST_HEAD(&rule->acts.list);
1487	rule->cookie = tc->cookie;
1488	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1489						&rule->linkage,
1490						efx_tc_match_action_ht_params);
1491	if (IS_ERR(old)) {
1492		rc = PTR_ERR(old);
1493		goto release;
1494	} else if (old) {
1495		netif_dbg(efx, drv, efx->net_dev,
1496			  "Ignoring already-offloaded rule (cookie %lx)\n",
1497			  tc->cookie);
1498		rc = -EEXIST;
1499		goto release;
1500	}
1501
1502	act = kzalloc(sizeof(*act), GFP_USER);
1503	if (!act) {
1504		rc = -ENOMEM;
1505		goto release;
1506	}
1507
1508	/* Parse actions.  For foreign rules we only support decap & redirect.
1509	 * See corresponding code in efx_tc_flower_replace() for theory of
1510	 * operation & how 'act' cursor is used.
1511	 */
1512	flow_action_for_each(i, fa, &fr->action) {
1513		struct efx_tc_action_set save;
1514
1515		switch (fa->id) {
1516		case FLOW_ACTION_REDIRECT:
1517		case FLOW_ACTION_MIRRED:
1518			/* See corresponding code in efx_tc_flower_replace() for
1519			 * long explanations of what's going on here.
1520			 */
1521			save = *act;
1522			if (fa->hw_stats) {
1523				struct efx_tc_counter_index *ctr;
1524
1525				if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1526					NL_SET_ERR_MSG_FMT_MOD(extack,
1527							       "hw_stats_type %u not supported (only 'delayed')",
1528							       fa->hw_stats);
1529					rc = -EOPNOTSUPP;
1530					goto release;
1531				}
1532				if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1533					rc = -EOPNOTSUPP;
1534					goto release;
1535				}
1536
1537				ctr = efx_tc_flower_get_counter_index(efx,
1538								      tc->cookie,
1539								      EFX_TC_COUNTER_TYPE_AR);
1540				if (IS_ERR(ctr)) {
1541					rc = PTR_ERR(ctr);
1542					NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1543					goto release;
1544				}
1545				act->count = ctr;
1546				INIT_LIST_HEAD(&act->count_user);
1547			}
1548
1549			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
1550				/* can't happen */
1551				rc = -EOPNOTSUPP;
1552				NL_SET_ERR_MSG_MOD(extack,
1553						   "Deliver action violates action order (can't happen)");
1554				goto release;
1555			}
1556			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
1557			/* PF implies egdev is us, in which case we really
1558			 * want to deliver to the uplink (because this is an
1559			 * ingress filter).  If we don't recognise the egdev
1560			 * at all, then we'd better trap so SW can handle it.
1561			 */
1562			if (IS_ERR(to_efv))
1563				to_efv = EFX_EFV_PF;
1564			if (to_efv == EFX_EFV_PF) {
1565				if (uplinked)
1566					break;
1567				uplinked = true;
1568			}
1569			rc = efx_tc_flower_internal_mport(efx, to_efv);
1570			if (rc < 0) {
1571				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
1572				goto release;
1573			}
1574			act->dest_mport = rc;
1575			act->deliver = 1;
1576			rc = efx_mae_alloc_action_set(efx, act);
1577			if (rc) {
1578				NL_SET_ERR_MSG_MOD(extack,
1579						   "Failed to write action set to hw (mirred)");
1580				goto release;
1581			}
1582			list_add_tail(&act->list, &rule->acts.list);
1583			act = NULL;
1584			if (fa->id == FLOW_ACTION_REDIRECT)
1585				break; /* end of the line */
1586			/* Mirror, so continue on with saved act */
1587			act = kzalloc(sizeof(*act), GFP_USER);
1588			if (!act) {
1589				rc = -ENOMEM;
1590				goto release;
1591			}
1592			*act = save;
1593			break;
1594		case FLOW_ACTION_TUNNEL_DECAP:
1595			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DECAP)) {
1596				rc = -EINVAL;
1597				NL_SET_ERR_MSG_MOD(extack, "Decap action violates action order");
1598				goto release;
1599			}
1600			act->decap = 1;
1601			/* If we previously delivered/trapped to uplink, now
1602			 * that we've decapped we'll want another copy if we
1603			 * try to deliver/trap to uplink again.
1604			 */
1605			uplinked = false;
1606			break;
1607		default:
1608			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
1609					       fa->id);
1610			rc = -EOPNOTSUPP;
1611			goto release;
1612		}
1613	}
1614
1615	if (act) {
1616		if (!uplinked) {
1617			/* Not shot/redirected, so deliver to default dest (which is
1618			 * the uplink, as this is an ingress filter)
1619			 */
1620			efx_mae_mport_uplink(efx, &act->dest_mport);
1621			act->deliver = 1;
1622		}
1623		rc = efx_mae_alloc_action_set(efx, act);
1624		if (rc) {
1625			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
1626			goto release;
1627		}
1628		list_add_tail(&act->list, &rule->acts.list);
1629		act = NULL; /* Prevent double-free in error path */
1630	}
1631
1632	rule->match = match;
1633
1634	netif_dbg(efx, drv, efx->net_dev,
1635		  "Successfully parsed foreign filter (cookie %lx)\n",
1636		  tc->cookie);
1637
1638	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
1639	if (rc) {
1640		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
1641		goto release;
1642	}
1643	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
1644				 rule->acts.fw_id, &rule->fw_id);
1645	if (rc) {
1646		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1647		goto release_acts;
1648	}
1649	return 0;
1650
1651release_acts:
1652	efx_mae_free_action_set_list(efx, &rule->acts);
1653release:
1654	/* We failed to insert the rule, so free up any entries we created in
1655	 * subsidiary tables.
1656	 */
1657	if (match.rid)
1658		efx_tc_put_recirc_id(efx, match.rid);
1659	if (act)
1660		efx_tc_free_action_set(efx, act, false);
1661	if (rule) {
1662		if (!old)
1663			rhashtable_remove_fast(&efx->tc->match_action_ht,
1664					       &rule->linkage,
1665					       efx_tc_match_action_ht_params);
1666		efx_tc_free_action_set_list(efx, &rule->acts, false);
1667	}
1668	kfree(rule);
1669	if (match.encap)
1670		efx_tc_flower_release_encap_match(efx, match.encap);
1671	return rc;
1672}
1673
1674static int efx_tc_flower_replace_lhs(struct efx_nic *efx,
1675				     struct flow_cls_offload *tc,
1676				     struct flow_rule *fr,
1677				     struct efx_tc_match *match,
1678				     struct efx_rep *efv,
1679				     struct net_device *net_dev)
1680{
1681	struct netlink_ext_ack *extack = tc->common.extack;
1682	struct efx_tc_lhs_rule *rule, *old;
1683	int rc;
1684
1685	if (tc->common.chain_index) {
1686		NL_SET_ERR_MSG_MOD(extack, "LHS rule only allowed in chain 0");
1687		return -EOPNOTSUPP;
1688	}
1689
1690	if (match->mask.ct_state_trk && match->value.ct_state_trk) {
1691		NL_SET_ERR_MSG_MOD(extack, "LHS rule can never match +trk");
1692		return -EOPNOTSUPP;
1693	}
1694	/* LHS rules are always -trk, so we don't need to match on that */
1695	match->mask.ct_state_trk = 0;
1696	match->value.ct_state_trk = 0;
1697
1698	rc = efx_mae_match_check_caps_lhs(efx, &match->mask, extack);
1699	if (rc)
1700		return rc;
1701
1702	rule = kzalloc(sizeof(*rule), GFP_USER);
1703	if (!rule)
1704		return -ENOMEM;
1705	rule->cookie = tc->cookie;
1706	old = rhashtable_lookup_get_insert_fast(&efx->tc->lhs_rule_ht,
1707						&rule->linkage,
1708						efx_tc_lhs_rule_ht_params);
1709	if (IS_ERR(old)) {
1710		rc = PTR_ERR(old);
1711		goto release;
1712	} else if (old) {
1713		netif_dbg(efx, drv, efx->net_dev,
1714			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1715		rc = -EEXIST;
1716		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1717		goto release;
1718	}
1719
1720	/* Parse actions */
1721	/* See note in efx_tc_flower_replace() regarding passed net_dev
1722	 * (used for efx_tc_get_recirc_id()).
1723	 */
1724	rc = efx_tc_flower_handle_lhs_actions(efx, tc, fr, efx->net_dev, rule);
1725	if (rc)
1726		goto release;
1727
1728	rule->match = *match;
1729
1730	rc = efx_mae_insert_lhs_rule(efx, rule, EFX_TC_PRIO_TC);
1731	if (rc) {
1732		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
1733		goto release;
1734	}
1735	netif_dbg(efx, drv, efx->net_dev,
1736		  "Successfully parsed lhs rule (cookie %lx)\n",
1737		  tc->cookie);
1738	return 0;
1739
1740release:
1741	efx_tc_flower_release_lhs_actions(efx, &rule->lhs_act);
1742	if (!old)
1743		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &rule->linkage,
1744				       efx_tc_lhs_rule_ht_params);
1745	kfree(rule);
1746	return rc;
1747}
1748
1749static int efx_tc_flower_replace(struct efx_nic *efx,
1750				 struct net_device *net_dev,
1751				 struct flow_cls_offload *tc,
1752				 struct efx_rep *efv)
1753{
1754	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
1755	struct netlink_ext_ack *extack = tc->common.extack;
1756	const struct ip_tunnel_info *encap_info = NULL;
1757	struct efx_tc_flow_rule *rule = NULL, *old;
1758	struct efx_tc_mangler_state mung = {};
1759	struct efx_tc_action_set *act = NULL;
1760	const struct flow_action_entry *fa;
1761	struct efx_rep *from_efv, *to_efv;
1762	struct efx_tc_match match;
1763	u32 acts_id;
1764	s64 rc;
1765	int i;
1766
1767	if (!tc_can_offload_extack(efx->net_dev, extack))
1768		return -EOPNOTSUPP;
1769	if (WARN_ON(!efx->tc))
1770		return -ENETDOWN;
1771	if (WARN_ON(!efx->tc->up))
1772		return -ENETDOWN;
1773
1774	from_efv = efx_tc_flower_lookup_efv(efx, net_dev);
1775	if (IS_ERR(from_efv)) {
1776		/* Not from our PF or representors, so probably a tunnel dev */
1777		return efx_tc_flower_replace_foreign(efx, net_dev, tc);
1778	}
1779
1780	if (efv != from_efv) {
1781		/* can't happen */
1782		NL_SET_ERR_MSG_FMT_MOD(extack, "for %s efv is %snull but from_efv is %snull (can't happen)",
1783				       netdev_name(net_dev), efv ? "non-" : "",
1784				       from_efv ? "non-" : "");
1785		return -EINVAL;
1786	}
1787
1788	/* Parse match */
1789	memset(&match, 0, sizeof(match));
1790	rc = efx_tc_flower_external_mport(efx, from_efv);
1791	if (rc < 0) {
1792		NL_SET_ERR_MSG_MOD(extack, "Failed to identify ingress m-port");
1793		return rc;
1794	}
1795	match.value.ingress_port = rc;
1796	match.mask.ingress_port = ~0;
1797	rc = efx_tc_flower_parse_match(efx, fr, &match, extack);
1798	if (rc)
1799		return rc;
1800	if (efx_tc_match_is_encap(&match.mask)) {
1801		NL_SET_ERR_MSG_MOD(extack, "Ingress enc_key matches not supported");
1802		return -EOPNOTSUPP;
1803	}
1804
1805	if (efx_tc_rule_is_lhs_rule(fr, &match))
1806		return efx_tc_flower_replace_lhs(efx, tc, fr, &match, efv,
1807						 net_dev);
1808
1809	/* chain_index 0 is always recirc_id 0 (and does not appear in recirc_ht).
1810	 * Conveniently, match.rid == NULL and match.value.recirc_id == 0 owing
1811	 * to the initial memset(), so we don't need to do anything in that case.
1812	 */
1813	if (tc->common.chain_index) {
1814		struct efx_tc_recirc_id *rid;
1815
1816		/* Note regarding passed net_dev:
1817		 * VFreps and PF can share chain namespace, as they have
1818		 * distinct ingress_mports.  So we don't need to burn an
1819		 * extra recirc_id if both use the same chain_index.
1820		 * (Strictly speaking, we could give each VFrep its own
1821		 * recirc_id namespace that doesn't take IDs away from the
1822		 * PF, but that would require a bunch of additional IDAs -
1823		 * one for each representor - and that's not likely to be
1824		 * the main cause of recirc_id exhaustion anyway.)
1825		 */
1826		rid = efx_tc_get_recirc_id(efx, tc->common.chain_index,
1827					   efx->net_dev);
1828		if (IS_ERR(rid)) {
1829			NL_SET_ERR_MSG_FMT_MOD(extack,
1830					       "Failed to allocate a hardware recirculation ID for chain_index %u",
1831					       tc->common.chain_index);
1832			return PTR_ERR(rid);
1833		}
1834		match.rid = rid;
1835		match.value.recirc_id = rid->fw_id;
1836	}
1837	match.mask.recirc_id = 0xff;
1838
1839	/* AR table can't match on DO_CT (+trk).  But a commonly used pattern is
1840	 * +trk+est, which is strictly implied by +est, so rewrite it to that.
1841	 */
1842	if (match.mask.ct_state_trk && match.value.ct_state_trk &&
1843	    match.mask.ct_state_est && match.value.ct_state_est)
1844		match.mask.ct_state_trk = 0;
1845	/* Thanks to CT_TCP_FLAGS_INHIBIT, packets with interesting flags could
1846	 * match +trk-est (CT_HIT=0) despite being on an established connection.
1847	 * So make -est imply -tcp_syn_fin_rst match to ensure these packets
1848	 * still hit the software path.
1849	 */
1850	if (match.mask.ct_state_est && !match.value.ct_state_est) {
1851		if (match.value.tcp_syn_fin_rst) {
1852			/* Can't offload this combination */
1853			rc = -EOPNOTSUPP;
1854			goto release;
1855		}
1856		match.mask.tcp_syn_fin_rst = true;
1857	}
1858
1859	rc = efx_mae_match_check_caps(efx, &match.mask, extack);
1860	if (rc)
1861		goto release;
1862
1863	rule = kzalloc(sizeof(*rule), GFP_USER);
1864	if (!rule) {
1865		rc = -ENOMEM;
1866		goto release;
1867	}
1868	INIT_LIST_HEAD(&rule->acts.list);
1869	rule->cookie = tc->cookie;
1870	old = rhashtable_lookup_get_insert_fast(&efx->tc->match_action_ht,
1871						&rule->linkage,
1872						efx_tc_match_action_ht_params);
1873	if (IS_ERR(old)) {
1874		rc = PTR_ERR(old);
1875		goto release;
1876	} else if (old) {
1877		netif_dbg(efx, drv, efx->net_dev,
1878			  "Already offloaded rule (cookie %lx)\n", tc->cookie);
1879		NL_SET_ERR_MSG_MOD(extack, "Rule already offloaded");
1880		rc = -EEXIST;
1881		goto release;
1882	}
1883
1884	/* Parse actions */
1885	act = kzalloc(sizeof(*act), GFP_USER);
1886	if (!act) {
1887		rc = -ENOMEM;
1888		goto release;
1889	}
1890
1891	/**
1892	 * DOC: TC action translation
1893	 *
1894	 * Actions in TC are sequential and cumulative, with delivery actions
1895	 * potentially anywhere in the order.  The EF100 MAE, however, takes
1896	 * an 'action set list' consisting of 'action sets', each of which is
1897	 * applied to the _original_ packet, and consists of a set of optional
1898	 * actions in a fixed order with delivery at the end.
1899	 * To translate between these two models, we maintain a 'cursor', @act,
1900	 * which describes the cumulative effect of all the packet-mutating
1901	 * actions encountered so far; on handling a delivery (mirred or drop)
1902	 * action, once the action-set has been inserted into hardware, we
1903	 * append @act to the action-set list (@rule->acts); if this is a pipe
1904	 * action (mirred mirror) we then allocate a new @act with a copy of
1905	 * the cursor state _before_ the delivery action, otherwise we set @act
1906	 * to %NULL.
1907	 * This ensures that every allocated action-set is either attached to
1908	 * @rule->acts or pointed to by @act (and never both), and that only
1909	 * those action-sets in @rule->acts exist in hardware.  Consequently,
1910	 * in the failure path, @act only needs to be freed in memory, whereas
1911	 * for @rule->acts we remove each action-set from hardware before
1912	 * freeing it (efx_tc_free_action_set_list()), even if the action-set
1913	 * list itself is not in hardware.
1914	 */
1915	flow_action_for_each(i, fa, &fr->action) {
1916		struct efx_tc_action_set save;
1917		u16 tci;
1918
1919		if (!act) {
1920			/* more actions after a non-pipe action */
1921			NL_SET_ERR_MSG_MOD(extack, "Action follows non-pipe action");
1922			rc = -EINVAL;
1923			goto release;
1924		}
1925
1926		if ((fa->id == FLOW_ACTION_REDIRECT ||
1927		     fa->id == FLOW_ACTION_MIRRED ||
1928		     fa->id == FLOW_ACTION_DROP) && fa->hw_stats) {
1929			struct efx_tc_counter_index *ctr;
1930
1931			/* Currently the only actions that want stats are
1932			 * mirred and gact (ok, shot, trap, goto-chain), which
1933			 * means we want stats just before delivery.  Also,
1934			 * note that tunnel_key set shouldn't change the length
1935			 * — it's only the subsequent mirred that does that,
1936			 * and the stats are taken _before_ the mirred action
1937			 * happens.
1938			 */
1939			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_COUNT)) {
1940				/* All supported actions that count either steal
1941				 * (gact shot, mirred redirect) or clone act
1942				 * (mirred mirror), so we should never get two
1943				 * count actions on one action_set.
1944				 */
1945				NL_SET_ERR_MSG_MOD(extack, "Count-action conflict (can't happen)");
1946				rc = -EOPNOTSUPP;
1947				goto release;
1948			}
1949
1950			if (!(fa->hw_stats & FLOW_ACTION_HW_STATS_DELAYED)) {
1951				NL_SET_ERR_MSG_FMT_MOD(extack, "hw_stats_type %u not supported (only 'delayed')",
1952						       fa->hw_stats);
1953				rc = -EOPNOTSUPP;
1954				goto release;
1955			}
1956
1957			ctr = efx_tc_flower_get_counter_index(efx, tc->cookie,
1958							      EFX_TC_COUNTER_TYPE_AR);
1959			if (IS_ERR(ctr)) {
1960				rc = PTR_ERR(ctr);
1961				NL_SET_ERR_MSG_MOD(extack, "Failed to obtain a counter");
1962				goto release;
1963			}
1964			act->count = ctr;
1965			INIT_LIST_HEAD(&act->count_user);
1966		}
1967
1968		switch (fa->id) {
1969		case FLOW_ACTION_DROP:
1970			rc = efx_mae_alloc_action_set(efx, act);
1971			if (rc) {
1972				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (drop)");
1973				goto release;
1974			}
1975			list_add_tail(&act->list, &rule->acts.list);
1976			act = NULL; /* end of the line */
1977			break;
1978		case FLOW_ACTION_REDIRECT:
1979		case FLOW_ACTION_MIRRED:
1980			save = *act;
1981
1982			if (encap_info) {
1983				struct efx_tc_encap_action *encap;
1984
1985				if (!efx_tc_flower_action_order_ok(act,
1986								   EFX_TC_AO_ENCAP)) {
1987					rc = -EOPNOTSUPP;
1988					NL_SET_ERR_MSG_MOD(extack, "Encap action violates action order");
1989					goto release;
1990				}
1991				encap = efx_tc_flower_create_encap_md(
1992						efx, encap_info, fa->dev, extack);
1993				if (IS_ERR_OR_NULL(encap)) {
1994					rc = PTR_ERR(encap);
1995					if (!rc)
1996						rc = -EIO; /* arbitrary */
1997					goto release;
1998				}
1999				act->encap_md = encap;
2000				list_add_tail(&act->encap_user, &encap->users);
2001				act->dest_mport = encap->dest_mport;
2002				act->deliver = 1;
2003				if (act->count && !WARN_ON(!act->count->cnt)) {
2004					/* This counter is used by an encap
2005					 * action, which needs a reference back
2006					 * so it can prod neighbouring whenever
2007					 * traffic is seen.
2008					 */
2009					spin_lock_bh(&act->count->cnt->lock);
2010					list_add_tail(&act->count_user,
2011						      &act->count->cnt->users);
2012					spin_unlock_bh(&act->count->cnt->lock);
2013				}
2014				rc = efx_mae_alloc_action_set(efx, act);
2015				if (rc) {
2016					NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (encap)");
2017					goto release;
2018				}
2019				list_add_tail(&act->list, &rule->acts.list);
2020				act->user = &rule->acts;
2021				act = NULL;
2022				if (fa->id == FLOW_ACTION_REDIRECT)
2023					break; /* end of the line */
2024				/* Mirror, so continue on with saved act */
2025				save.count = NULL;
2026				act = kzalloc(sizeof(*act), GFP_USER);
2027				if (!act) {
2028					rc = -ENOMEM;
2029					goto release;
2030				}
2031				*act = save;
2032				break;
2033			}
2034
2035			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_DELIVER)) {
2036				/* can't happen */
2037				rc = -EOPNOTSUPP;
2038				NL_SET_ERR_MSG_MOD(extack, "Deliver action violates action order (can't happen)");
2039				goto release;
2040			}
2041
2042			to_efv = efx_tc_flower_lookup_efv(efx, fa->dev);
2043			if (IS_ERR(to_efv)) {
2044				NL_SET_ERR_MSG_MOD(extack, "Mirred egress device not on switch");
2045				rc = PTR_ERR(to_efv);
2046				goto release;
2047			}
2048			rc = efx_tc_flower_external_mport(efx, to_efv);
2049			if (rc < 0) {
2050				NL_SET_ERR_MSG_MOD(extack, "Failed to identify egress m-port");
2051				goto release;
2052			}
2053			act->dest_mport = rc;
2054			act->deliver = 1;
2055			rc = efx_mae_alloc_action_set(efx, act);
2056			if (rc) {
2057				NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (mirred)");
2058				goto release;
2059			}
2060			list_add_tail(&act->list, &rule->acts.list);
2061			act = NULL;
2062			if (fa->id == FLOW_ACTION_REDIRECT)
2063				break; /* end of the line */
2064			/* Mirror, so continue on with saved act */
2065			save.count = NULL;
2066			act = kzalloc(sizeof(*act), GFP_USER);
2067			if (!act) {
2068				rc = -ENOMEM;
2069				goto release;
2070			}
2071			*act = save;
2072			break;
2073		case FLOW_ACTION_VLAN_POP:
2074			if (act->vlan_push) {
2075				act->vlan_push--;
2076			} else if (efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_POP)) {
2077				act->vlan_pop++;
2078			} else {
2079				NL_SET_ERR_MSG_MOD(extack,
2080						   "More than two VLAN pops, or action order violated");
2081				rc = -EINVAL;
2082				goto release;
2083			}
2084			break;
2085		case FLOW_ACTION_VLAN_PUSH:
2086			if (!efx_tc_flower_action_order_ok(act, EFX_TC_AO_VLAN_PUSH)) {
2087				rc = -EINVAL;
2088				NL_SET_ERR_MSG_MOD(extack,
2089						   "More than two VLAN pushes, or action order violated");
2090				goto release;
2091			}
2092			tci = fa->vlan.vid & VLAN_VID_MASK;
2093			tci |= fa->vlan.prio << VLAN_PRIO_SHIFT;
2094			act->vlan_tci[act->vlan_push] = cpu_to_be16(tci);
2095			act->vlan_proto[act->vlan_push] = fa->vlan.proto;
2096			act->vlan_push++;
2097			break;
2098		case FLOW_ACTION_ADD:
2099			rc = efx_tc_pedit_add(efx, act, fa, extack);
2100			if (rc < 0)
2101				goto release;
2102			break;
2103		case FLOW_ACTION_MANGLE:
2104			rc = efx_tc_mangle(efx, act, fa, &mung, extack, &match);
2105			if (rc < 0)
2106				goto release;
2107			break;
2108		case FLOW_ACTION_TUNNEL_ENCAP:
2109			if (encap_info) {
2110				/* Can't specify encap multiple times.
2111				 * If you want to overwrite an existing
2112				 * encap_info, use an intervening
2113				 * FLOW_ACTION_TUNNEL_DECAP to clear it.
2114				 */
2115				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set when already set");
2116				rc = -EINVAL;
2117				goto release;
2118			}
2119			if (!fa->tunnel) {
2120				NL_SET_ERR_MSG_MOD(extack, "Tunnel key set is missing key");
2121				rc = -EOPNOTSUPP;
2122				goto release;
2123			}
2124			encap_info = fa->tunnel;
2125			break;
2126		case FLOW_ACTION_TUNNEL_DECAP:
2127			if (encap_info) {
2128				encap_info = NULL;
2129				break;
2130			}
2131			/* Since we don't support enc_key matches on ingress
2132			 * (and if we did there'd be no tunnel-device to give
2133			 * us a type), we can't offload a decap that's not
2134			 * just undoing a previous encap action.
2135			 */
2136			NL_SET_ERR_MSG_MOD(extack, "Cannot offload tunnel decap action without tunnel device");
2137			rc = -EOPNOTSUPP;
2138			goto release;
2139		default:
2140			NL_SET_ERR_MSG_FMT_MOD(extack, "Unhandled action %u",
2141					       fa->id);
2142			rc = -EOPNOTSUPP;
2143			goto release;
2144		}
2145	}
2146
2147	rc = efx_tc_incomplete_mangle(&mung, extack);
2148	if (rc < 0)
2149		goto release;
2150	if (act) {
2151		/* Not shot/redirected, so deliver to default dest */
2152		if (from_efv == EFX_EFV_PF)
2153			/* Rule applies to traffic from the wire,
2154			 * and default dest is thus the PF
2155			 */
2156			efx_mae_mport_uplink(efx, &act->dest_mport);
2157		else
2158			/* Representor, so rule applies to traffic from
2159			 * representee, and default dest is thus the rep.
2160			 * All reps use the same mport for delivery
2161			 */
2162			efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2163					    &act->dest_mport);
2164		act->deliver = 1;
2165		rc = efx_mae_alloc_action_set(efx, act);
2166		if (rc) {
2167			NL_SET_ERR_MSG_MOD(extack, "Failed to write action set to hw (deliver)");
2168			goto release;
2169		}
2170		list_add_tail(&act->list, &rule->acts.list);
2171		act = NULL; /* Prevent double-free in error path */
2172	}
2173
2174	netif_dbg(efx, drv, efx->net_dev,
2175		  "Successfully parsed filter (cookie %lx)\n",
2176		  tc->cookie);
2177
2178	rule->match = match;
2179
2180	rc = efx_mae_alloc_action_set_list(efx, &rule->acts);
2181	if (rc) {
2182		NL_SET_ERR_MSG_MOD(extack, "Failed to write action set list to hw");
2183		goto release;
2184	}
2185	if (from_efv == EFX_EFV_PF)
2186		/* PF netdev, so rule applies to traffic from wire */
2187		rule->fallback = &efx->tc->facts.pf;
2188	else
2189		/* repdev, so rule applies to traffic from representee */
2190		rule->fallback = &efx->tc->facts.reps;
2191	if (!efx_tc_check_ready(efx, rule)) {
2192		netif_dbg(efx, drv, efx->net_dev, "action not ready for hw\n");
2193		acts_id = rule->fallback->fw_id;
2194	} else {
2195		netif_dbg(efx, drv, efx->net_dev, "ready for hw\n");
2196		acts_id = rule->acts.fw_id;
2197	}
2198	rc = efx_mae_insert_rule(efx, &rule->match, EFX_TC_PRIO_TC,
2199				 acts_id, &rule->fw_id);
2200	if (rc) {
2201		NL_SET_ERR_MSG_MOD(extack, "Failed to insert rule in hw");
2202		goto release_acts;
2203	}
2204	return 0;
2205
2206release_acts:
2207	efx_mae_free_action_set_list(efx, &rule->acts);
2208release:
2209	/* We failed to insert the rule, so free up any entries we created in
2210	 * subsidiary tables.
2211	 */
2212	if (match.rid)
2213		efx_tc_put_recirc_id(efx, match.rid);
2214	if (act)
2215		efx_tc_free_action_set(efx, act, false);
2216	if (rule) {
2217		if (!old)
2218			rhashtable_remove_fast(&efx->tc->match_action_ht,
2219					       &rule->linkage,
2220					       efx_tc_match_action_ht_params);
2221		efx_tc_free_action_set_list(efx, &rule->acts, false);
2222	}
2223	kfree(rule);
2224	return rc;
2225}
2226
2227static int efx_tc_flower_destroy(struct efx_nic *efx,
2228				 struct net_device *net_dev,
2229				 struct flow_cls_offload *tc)
2230{
2231	struct netlink_ext_ack *extack = tc->common.extack;
2232	struct efx_tc_lhs_rule *lhs_rule;
2233	struct efx_tc_flow_rule *rule;
2234
2235	lhs_rule = rhashtable_lookup_fast(&efx->tc->lhs_rule_ht, &tc->cookie,
2236					  efx_tc_lhs_rule_ht_params);
2237	if (lhs_rule) {
2238		/* Remove it from HW */
2239		efx_mae_remove_lhs_rule(efx, lhs_rule);
2240		/* Delete it from SW */
2241		efx_tc_flower_release_lhs_actions(efx, &lhs_rule->lhs_act);
2242		rhashtable_remove_fast(&efx->tc->lhs_rule_ht, &lhs_rule->linkage,
2243				       efx_tc_lhs_rule_ht_params);
2244		if (lhs_rule->match.encap)
2245			efx_tc_flower_release_encap_match(efx, lhs_rule->match.encap);
2246		netif_dbg(efx, drv, efx->net_dev, "Removed (lhs) filter %lx\n",
2247			  lhs_rule->cookie);
2248		kfree(lhs_rule);
2249		return 0;
2250	}
2251
2252	rule = rhashtable_lookup_fast(&efx->tc->match_action_ht, &tc->cookie,
2253				      efx_tc_match_action_ht_params);
2254	if (!rule) {
2255		/* Only log a message if we're the ingress device.  Otherwise
2256		 * it's a foreign filter and we might just not have been
2257		 * interested (e.g. we might not have been the egress device
2258		 * either).
2259		 */
2260		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2261			netif_warn(efx, drv, efx->net_dev,
2262				   "Filter %lx not found to remove\n", tc->cookie);
2263		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2264		return -ENOENT;
2265	}
2266
2267	/* Remove it from HW */
2268	efx_tc_delete_rule(efx, rule);
2269	/* Delete it from SW */
2270	rhashtable_remove_fast(&efx->tc->match_action_ht, &rule->linkage,
2271			       efx_tc_match_action_ht_params);
2272	netif_dbg(efx, drv, efx->net_dev, "Removed filter %lx\n", rule->cookie);
2273	kfree(rule);
2274	return 0;
2275}
2276
2277static int efx_tc_flower_stats(struct efx_nic *efx, struct net_device *net_dev,
2278			       struct flow_cls_offload *tc)
2279{
2280	struct netlink_ext_ack *extack = tc->common.extack;
2281	struct efx_tc_counter_index *ctr;
2282	struct efx_tc_counter *cnt;
2283	u64 packets, bytes;
2284
2285	ctr = efx_tc_flower_find_counter_index(efx, tc->cookie);
2286	if (!ctr) {
2287		/* See comment in efx_tc_flower_destroy() */
2288		if (!IS_ERR(efx_tc_flower_lookup_efv(efx, net_dev)))
2289			if (net_ratelimit())
2290				netif_warn(efx, drv, efx->net_dev,
2291					   "Filter %lx not found for stats\n",
2292					   tc->cookie);
2293		NL_SET_ERR_MSG_MOD(extack, "Flow cookie not found in offloaded rules");
2294		return -ENOENT;
2295	}
2296	if (WARN_ON(!ctr->cnt)) /* can't happen */
2297		return -EIO;
2298	cnt = ctr->cnt;
2299
2300	spin_lock_bh(&cnt->lock);
2301	/* Report only new pkts/bytes since last time TC asked */
2302	packets = cnt->packets;
2303	bytes = cnt->bytes;
2304	flow_stats_update(&tc->stats, bytes - cnt->old_bytes,
2305			  packets - cnt->old_packets, 0, cnt->touched,
2306			  FLOW_ACTION_HW_STATS_DELAYED);
2307	cnt->old_packets = packets;
2308	cnt->old_bytes = bytes;
2309	spin_unlock_bh(&cnt->lock);
2310	return 0;
2311}
2312
2313int efx_tc_flower(struct efx_nic *efx, struct net_device *net_dev,
2314		  struct flow_cls_offload *tc, struct efx_rep *efv)
2315{
2316	int rc;
2317
2318	if (!efx->tc)
2319		return -EOPNOTSUPP;
2320
2321	mutex_lock(&efx->tc->mutex);
2322	switch (tc->command) {
2323	case FLOW_CLS_REPLACE:
2324		rc = efx_tc_flower_replace(efx, net_dev, tc, efv);
2325		break;
2326	case FLOW_CLS_DESTROY:
2327		rc = efx_tc_flower_destroy(efx, net_dev, tc);
2328		break;
2329	case FLOW_CLS_STATS:
2330		rc = efx_tc_flower_stats(efx, net_dev, tc);
2331		break;
2332	default:
2333		rc = -EOPNOTSUPP;
2334		break;
2335	}
2336	mutex_unlock(&efx->tc->mutex);
2337	return rc;
2338}
2339
2340static int efx_tc_configure_default_rule(struct efx_nic *efx, u32 ing_port,
2341					 u32 eg_port, struct efx_tc_flow_rule *rule)
2342{
2343	struct efx_tc_action_set_list *acts = &rule->acts;
2344	struct efx_tc_match *match = &rule->match;
2345	struct efx_tc_action_set *act;
2346	int rc;
2347
2348	match->value.ingress_port = ing_port;
2349	match->mask.ingress_port = ~0;
2350	act = kzalloc(sizeof(*act), GFP_KERNEL);
2351	if (!act)
2352		return -ENOMEM;
2353	act->deliver = 1;
2354	act->dest_mport = eg_port;
2355	rc = efx_mae_alloc_action_set(efx, act);
2356	if (rc)
2357		goto fail1;
2358	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2359	list_add_tail(&act->list, &acts->list);
2360	rc = efx_mae_alloc_action_set_list(efx, acts);
2361	if (rc)
2362		goto fail2;
2363	rc = efx_mae_insert_rule(efx, match, EFX_TC_PRIO_DFLT,
2364				 acts->fw_id, &rule->fw_id);
2365	if (rc)
2366		goto fail3;
2367	return 0;
2368fail3:
2369	efx_mae_free_action_set_list(efx, acts);
2370fail2:
2371	list_del(&act->list);
2372	efx_mae_free_action_set(efx, act->fw_id);
2373fail1:
2374	kfree(act);
2375	return rc;
2376}
2377
2378static int efx_tc_configure_default_rule_pf(struct efx_nic *efx)
2379{
2380	struct efx_tc_flow_rule *rule = &efx->tc->dflt.pf;
2381	u32 ing_port, eg_port;
2382
2383	efx_mae_mport_uplink(efx, &ing_port);
2384	efx_mae_mport_wire(efx, &eg_port);
2385	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2386}
2387
2388static int efx_tc_configure_default_rule_wire(struct efx_nic *efx)
2389{
2390	struct efx_tc_flow_rule *rule = &efx->tc->dflt.wire;
2391	u32 ing_port, eg_port;
2392
2393	efx_mae_mport_wire(efx, &ing_port);
2394	efx_mae_mport_uplink(efx, &eg_port);
2395	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2396}
2397
2398int efx_tc_configure_default_rule_rep(struct efx_rep *efv)
2399{
2400	struct efx_tc_flow_rule *rule = &efv->dflt;
2401	struct efx_nic *efx = efv->parent;
2402	u32 ing_port, eg_port;
2403
2404	efx_mae_mport_mport(efx, efv->mport, &ing_port);
2405	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2406	return efx_tc_configure_default_rule(efx, ing_port, eg_port, rule);
2407}
2408
2409void efx_tc_deconfigure_default_rule(struct efx_nic *efx,
2410				     struct efx_tc_flow_rule *rule)
2411{
2412	if (rule->fw_id != MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL)
2413		efx_tc_delete_rule(efx, rule);
2414	rule->fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2415}
2416
2417static int efx_tc_configure_fallback_acts(struct efx_nic *efx, u32 eg_port,
2418					  struct efx_tc_action_set_list *acts)
2419{
2420	struct efx_tc_action_set *act;
2421	int rc;
2422
2423	act = kzalloc(sizeof(*act), GFP_KERNEL);
2424	if (!act)
2425		return -ENOMEM;
2426	act->deliver = 1;
2427	act->dest_mport = eg_port;
2428	rc = efx_mae_alloc_action_set(efx, act);
2429	if (rc)
2430		goto fail1;
2431	EFX_WARN_ON_PARANOID(!list_empty(&acts->list));
2432	list_add_tail(&act->list, &acts->list);
2433	rc = efx_mae_alloc_action_set_list(efx, acts);
2434	if (rc)
2435		goto fail2;
2436	return 0;
2437fail2:
2438	list_del(&act->list);
2439	efx_mae_free_action_set(efx, act->fw_id);
2440fail1:
2441	kfree(act);
2442	return rc;
2443}
2444
2445static int efx_tc_configure_fallback_acts_pf(struct efx_nic *efx)
2446{
2447	struct efx_tc_action_set_list *acts = &efx->tc->facts.pf;
2448	u32 eg_port;
2449
2450	efx_mae_mport_uplink(efx, &eg_port);
2451	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2452}
2453
2454static int efx_tc_configure_fallback_acts_reps(struct efx_nic *efx)
2455{
2456	struct efx_tc_action_set_list *acts = &efx->tc->facts.reps;
2457	u32 eg_port;
2458
2459	efx_mae_mport_mport(efx, efx->tc->reps_mport_id, &eg_port);
2460	return efx_tc_configure_fallback_acts(efx, eg_port, acts);
2461}
2462
2463static void efx_tc_deconfigure_fallback_acts(struct efx_nic *efx,
2464					     struct efx_tc_action_set_list *acts)
2465{
2466	efx_tc_free_action_set_list(efx, acts, true);
2467}
2468
2469static int efx_tc_configure_rep_mport(struct efx_nic *efx)
2470{
2471	u32 rep_mport_label;
2472	int rc;
2473
2474	rc = efx_mae_allocate_mport(efx, &efx->tc->reps_mport_id, &rep_mport_label);
2475	if (rc)
2476		return rc;
2477	pci_dbg(efx->pci_dev, "created rep mport 0x%08x (0x%04x)\n",
2478		efx->tc->reps_mport_id, rep_mport_label);
2479	/* Use mport *selector* as vport ID */
2480	efx_mae_mport_mport(efx, efx->tc->reps_mport_id,
2481			    &efx->tc->reps_mport_vport_id);
2482	return 0;
2483}
2484
2485static void efx_tc_deconfigure_rep_mport(struct efx_nic *efx)
2486{
2487	efx_mae_free_mport(efx, efx->tc->reps_mport_id);
2488	efx->tc->reps_mport_id = MAE_MPORT_SELECTOR_NULL;
2489}
2490
2491int efx_tc_insert_rep_filters(struct efx_nic *efx)
2492{
2493	struct efx_filter_spec promisc, allmulti;
2494	int rc;
2495
2496	if (efx->type->is_vf)
2497		return 0;
2498	if (!efx->tc)
2499		return 0;
2500	efx_filter_init_rx(&promisc, EFX_FILTER_PRI_REQUIRED, 0, 0);
2501	efx_filter_set_uc_def(&promisc);
2502	efx_filter_set_vport_id(&promisc, efx->tc->reps_mport_vport_id);
2503	rc = efx_filter_insert_filter(efx, &promisc, false);
2504	if (rc < 0)
2505		return rc;
2506	efx->tc->reps_filter_uc = rc;
2507	efx_filter_init_rx(&allmulti, EFX_FILTER_PRI_REQUIRED, 0, 0);
2508	efx_filter_set_mc_def(&allmulti);
2509	efx_filter_set_vport_id(&allmulti, efx->tc->reps_mport_vport_id);
2510	rc = efx_filter_insert_filter(efx, &allmulti, false);
2511	if (rc < 0)
2512		return rc;
2513	efx->tc->reps_filter_mc = rc;
2514	return 0;
2515}
2516
2517void efx_tc_remove_rep_filters(struct efx_nic *efx)
2518{
2519	if (efx->type->is_vf)
2520		return;
2521	if (!efx->tc)
2522		return;
2523	if (efx->tc->reps_filter_mc >= 0)
2524		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_mc);
2525	efx->tc->reps_filter_mc = -1;
2526	if (efx->tc->reps_filter_uc >= 0)
2527		efx_filter_remove_id_safe(efx, EFX_FILTER_PRI_REQUIRED, efx->tc->reps_filter_uc);
2528	efx->tc->reps_filter_uc = -1;
2529}
2530
2531int efx_init_tc(struct efx_nic *efx)
2532{
2533	int rc;
2534
2535	rc = efx_mae_get_caps(efx, efx->tc->caps);
2536	if (rc)
2537		return rc;
2538	if (efx->tc->caps->match_field_count > MAE_NUM_FIELDS)
2539		/* Firmware supports some match fields the driver doesn't know
2540		 * about.  Not fatal, unless any of those fields are required
2541		 * (MAE_FIELD_SUPPORTED_MATCH_ALWAYS) but if so we don't know.
2542		 */
2543		netif_warn(efx, probe, efx->net_dev,
2544			   "FW reports additional match fields %u\n",
2545			   efx->tc->caps->match_field_count);
2546	if (efx->tc->caps->action_prios < EFX_TC_PRIO__NUM) {
2547		netif_err(efx, probe, efx->net_dev,
2548			  "Too few action prios supported (have %u, need %u)\n",
2549			  efx->tc->caps->action_prios, EFX_TC_PRIO__NUM);
2550		return -EIO;
2551	}
2552	rc = efx_tc_configure_default_rule_pf(efx);
2553	if (rc)
2554		return rc;
2555	rc = efx_tc_configure_default_rule_wire(efx);
2556	if (rc)
2557		return rc;
2558	rc = efx_tc_configure_rep_mport(efx);
2559	if (rc)
2560		return rc;
2561	rc = efx_tc_configure_fallback_acts_pf(efx);
2562	if (rc)
2563		return rc;
2564	rc = efx_tc_configure_fallback_acts_reps(efx);
2565	if (rc)
2566		return rc;
2567	rc = efx_mae_get_tables(efx);
2568	if (rc)
2569		return rc;
2570	rc = flow_indr_dev_register(efx_tc_indr_setup_cb, efx);
2571	if (rc)
2572		goto out_free;
2573	efx->tc->up = true;
2574	return 0;
2575out_free:
2576	efx_mae_free_tables(efx);
2577	return rc;
2578}
2579
2580void efx_fini_tc(struct efx_nic *efx)
2581{
2582	/* We can get called even if efx_init_struct_tc() failed */
2583	if (!efx->tc)
2584		return;
2585	if (efx->tc->up)
2586		flow_indr_dev_unregister(efx_tc_indr_setup_cb, efx, efx_tc_block_unbind);
2587	efx_tc_deconfigure_rep_mport(efx);
2588	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.pf);
2589	efx_tc_deconfigure_default_rule(efx, &efx->tc->dflt.wire);
2590	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.pf);
2591	efx_tc_deconfigure_fallback_acts(efx, &efx->tc->facts.reps);
2592	efx->tc->up = false;
2593	efx_mae_free_tables(efx);
2594}
2595
2596/* At teardown time, all TC filter rules (and thus all resources they created)
2597 * should already have been removed.  If we find any in our hashtables, make a
2598 * cursory attempt to clean up the software side.
2599 */
2600static void efx_tc_encap_match_free(void *ptr, void *__unused)
2601{
2602	struct efx_tc_encap_match *encap = ptr;
2603
2604	WARN_ON(refcount_read(&encap->ref));
2605	kfree(encap);
2606}
2607
2608static void efx_tc_recirc_free(void *ptr, void *arg)
2609{
2610	struct efx_tc_recirc_id *rid = ptr;
2611	struct efx_nic *efx = arg;
2612
2613	WARN_ON(refcount_read(&rid->ref));
2614	ida_free(&efx->tc->recirc_ida, rid->fw_id);
2615	kfree(rid);
2616}
2617
2618static void efx_tc_lhs_free(void *ptr, void *arg)
2619{
2620	struct efx_tc_lhs_rule *rule = ptr;
2621	struct efx_nic *efx = arg;
2622
2623	netif_err(efx, drv, efx->net_dev,
2624		  "tc lhs_rule %lx still present at teardown, removing\n",
2625		  rule->cookie);
2626
2627	if (rule->lhs_act.zone)
2628		efx_tc_ct_unregister_zone(efx, rule->lhs_act.zone);
2629	if (rule->lhs_act.count)
2630		efx_tc_flower_put_counter_index(efx, rule->lhs_act.count);
2631	efx_mae_remove_lhs_rule(efx, rule);
2632
2633	kfree(rule);
2634}
2635
2636static void efx_tc_mac_free(void *ptr, void *__unused)
2637{
2638	struct efx_tc_mac_pedit_action *ped = ptr;
2639
2640	WARN_ON(refcount_read(&ped->ref));
2641	kfree(ped);
2642}
2643
2644static void efx_tc_flow_free(void *ptr, void *arg)
2645{
2646	struct efx_tc_flow_rule *rule = ptr;
2647	struct efx_nic *efx = arg;
2648
2649	netif_err(efx, drv, efx->net_dev,
2650		  "tc rule %lx still present at teardown, removing\n",
2651		  rule->cookie);
2652
2653	/* Also releases entries in subsidiary tables */
2654	efx_tc_delete_rule(efx, rule);
2655
2656	kfree(rule);
2657}
2658
2659int efx_init_struct_tc(struct efx_nic *efx)
2660{
2661	int rc;
2662
2663	if (efx->type->is_vf)
2664		return 0;
2665
2666	efx->tc = kzalloc(sizeof(*efx->tc), GFP_KERNEL);
2667	if (!efx->tc)
2668		return -ENOMEM;
2669	efx->tc->caps = kzalloc(sizeof(struct mae_caps), GFP_KERNEL);
2670	if (!efx->tc->caps) {
2671		rc = -ENOMEM;
2672		goto fail_alloc_caps;
2673	}
2674	INIT_LIST_HEAD(&efx->tc->block_list);
2675
2676	mutex_init(&efx->tc->mutex);
2677	init_waitqueue_head(&efx->tc->flush_wq);
2678	rc = efx_tc_init_encap_actions(efx);
2679	if (rc < 0)
2680		goto fail_encap_actions;
2681	rc = efx_tc_init_counters(efx);
2682	if (rc < 0)
2683		goto fail_counters;
2684	rc = rhashtable_init(&efx->tc->mac_ht, &efx_tc_mac_ht_params);
2685	if (rc < 0)
2686		goto fail_mac_ht;
2687	rc = rhashtable_init(&efx->tc->encap_match_ht, &efx_tc_encap_match_ht_params);
2688	if (rc < 0)
2689		goto fail_encap_match_ht;
2690	rc = rhashtable_init(&efx->tc->match_action_ht, &efx_tc_match_action_ht_params);
2691	if (rc < 0)
2692		goto fail_match_action_ht;
2693	rc = rhashtable_init(&efx->tc->lhs_rule_ht, &efx_tc_lhs_rule_ht_params);
2694	if (rc < 0)
2695		goto fail_lhs_rule_ht;
2696	rc = efx_tc_init_conntrack(efx);
2697	if (rc < 0)
2698		goto fail_conntrack;
2699	rc = rhashtable_init(&efx->tc->recirc_ht, &efx_tc_recirc_ht_params);
2700	if (rc < 0)
2701		goto fail_recirc_ht;
2702	ida_init(&efx->tc->recirc_ida);
2703	efx->tc->reps_filter_uc = -1;
2704	efx->tc->reps_filter_mc = -1;
2705	INIT_LIST_HEAD(&efx->tc->dflt.pf.acts.list);
2706	efx->tc->dflt.pf.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2707	INIT_LIST_HEAD(&efx->tc->dflt.wire.acts.list);
2708	efx->tc->dflt.wire.fw_id = MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL;
2709	INIT_LIST_HEAD(&efx->tc->facts.pf.list);
2710	efx->tc->facts.pf.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2711	INIT_LIST_HEAD(&efx->tc->facts.reps.list);
2712	efx->tc->facts.reps.fw_id = MC_CMD_MAE_ACTION_SET_ALLOC_OUT_ACTION_SET_ID_NULL;
2713	efx->extra_channel_type[EFX_EXTRA_CHANNEL_TC] = &efx_tc_channel_type;
2714	return 0;
2715fail_recirc_ht:
2716	efx_tc_destroy_conntrack(efx);
2717fail_conntrack:
2718	rhashtable_destroy(&efx->tc->lhs_rule_ht);
2719fail_lhs_rule_ht:
2720	rhashtable_destroy(&efx->tc->match_action_ht);
2721fail_match_action_ht:
2722	rhashtable_destroy(&efx->tc->encap_match_ht);
2723fail_encap_match_ht:
2724	rhashtable_destroy(&efx->tc->mac_ht);
2725fail_mac_ht:
2726	efx_tc_destroy_counters(efx);
2727fail_counters:
2728	efx_tc_destroy_encap_actions(efx);
2729fail_encap_actions:
2730	mutex_destroy(&efx->tc->mutex);
2731	kfree(efx->tc->caps);
2732fail_alloc_caps:
2733	kfree(efx->tc);
2734	efx->tc = NULL;
2735	return rc;
2736}
2737
2738void efx_fini_struct_tc(struct efx_nic *efx)
2739{
2740	if (!efx->tc)
2741		return;
2742
2743	mutex_lock(&efx->tc->mutex);
2744	EFX_WARN_ON_PARANOID(efx->tc->dflt.pf.fw_id !=
2745			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2746	EFX_WARN_ON_PARANOID(efx->tc->dflt.wire.fw_id !=
2747			     MC_CMD_MAE_ACTION_RULE_INSERT_OUT_ACTION_RULE_ID_NULL);
2748	EFX_WARN_ON_PARANOID(efx->tc->facts.pf.fw_id !=
2749			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2750	EFX_WARN_ON_PARANOID(efx->tc->facts.reps.fw_id !=
2751			     MC_CMD_MAE_ACTION_SET_LIST_ALLOC_OUT_ACTION_SET_LIST_ID_NULL);
2752	rhashtable_free_and_destroy(&efx->tc->lhs_rule_ht, efx_tc_lhs_free, efx);
2753	rhashtable_free_and_destroy(&efx->tc->match_action_ht, efx_tc_flow_free,
2754				    efx);
2755	rhashtable_free_and_destroy(&efx->tc->encap_match_ht,
2756				    efx_tc_encap_match_free, NULL);
2757	efx_tc_fini_conntrack(efx);
2758	rhashtable_free_and_destroy(&efx->tc->recirc_ht, efx_tc_recirc_free, efx);
2759	WARN_ON(!ida_is_empty(&efx->tc->recirc_ida));
2760	ida_destroy(&efx->tc->recirc_ida);
2761	rhashtable_free_and_destroy(&efx->tc->mac_ht, efx_tc_mac_free, NULL);
2762	efx_tc_fini_counters(efx);
2763	efx_tc_fini_encap_actions(efx);
2764	mutex_unlock(&efx->tc->mutex);
2765	mutex_destroy(&efx->tc->mutex);
2766	kfree(efx->tc->caps);
2767	kfree(efx->tc);
2768	efx->tc = NULL;
2769}
2770