1// SPDX-License-Identifier: GPL-2.0-only
2/****************************************************************************
3 * Driver for Solarflare network controllers and boards
4 * Copyright 2023, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include "tc_conntrack.h"
12#include "tc.h"
13#include "mae.h"
14
15static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
16			     void *cb_priv);
17
18static const struct rhashtable_params efx_tc_ct_zone_ht_params = {
19	.key_len	= offsetof(struct efx_tc_ct_zone, linkage),
20	.key_offset	= 0,
21	.head_offset	= offsetof(struct efx_tc_ct_zone, linkage),
22};
23
24static const struct rhashtable_params efx_tc_ct_ht_params = {
25	.key_len	= offsetof(struct efx_tc_ct_entry, linkage),
26	.key_offset	= 0,
27	.head_offset	= offsetof(struct efx_tc_ct_entry, linkage),
28};
29
30static void efx_tc_ct_zone_free(void *ptr, void *arg)
31{
32	struct efx_tc_ct_zone *zone = ptr;
33	struct efx_nic *efx = zone->efx;
34
35	netif_err(efx, drv, efx->net_dev,
36		  "tc ct_zone %u still present at teardown, removing\n",
37		  zone->zone);
38
39	nf_flow_table_offload_del_cb(zone->nf_ft, efx_tc_flow_block, zone);
40	kfree(zone);
41}
42
43static void efx_tc_ct_free(void *ptr, void *arg)
44{
45	struct efx_tc_ct_entry *conn = ptr;
46	struct efx_nic *efx = arg;
47
48	netif_err(efx, drv, efx->net_dev,
49		  "tc ct_entry %lx still present at teardown\n",
50		  conn->cookie);
51
52	/* We can release the counter, but we can't remove the CT itself
53	 * from hardware because the table meta is already gone.
54	 */
55	efx_tc_flower_release_counter(efx, conn->cnt);
56	kfree(conn);
57}
58
59int efx_tc_init_conntrack(struct efx_nic *efx)
60{
61	int rc;
62
63	rc = rhashtable_init(&efx->tc->ct_zone_ht, &efx_tc_ct_zone_ht_params);
64	if (rc < 0)
65		goto fail_ct_zone_ht;
66	rc = rhashtable_init(&efx->tc->ct_ht, &efx_tc_ct_ht_params);
67	if (rc < 0)
68		goto fail_ct_ht;
69	return 0;
70fail_ct_ht:
71	rhashtable_destroy(&efx->tc->ct_zone_ht);
72fail_ct_zone_ht:
73	return rc;
74}
75
76/* Only call this in init failure teardown.
77 * Normal exit should fini instead as there may be entries in the table.
78 */
79void efx_tc_destroy_conntrack(struct efx_nic *efx)
80{
81	rhashtable_destroy(&efx->tc->ct_ht);
82	rhashtable_destroy(&efx->tc->ct_zone_ht);
83}
84
85void efx_tc_fini_conntrack(struct efx_nic *efx)
86{
87	rhashtable_free_and_destroy(&efx->tc->ct_zone_ht, efx_tc_ct_zone_free, NULL);
88	rhashtable_free_and_destroy(&efx->tc->ct_ht, efx_tc_ct_free, efx);
89}
90
91#define EFX_NF_TCP_FLAG(flg)	cpu_to_be16(be32_to_cpu(TCP_FLAG_##flg) >> 16)
92
93static int efx_tc_ct_parse_match(struct efx_nic *efx, struct flow_rule *fr,
94				 struct efx_tc_ct_entry *conn)
95{
96	struct flow_dissector *dissector = fr->match.dissector;
97	unsigned char ipv = 0;
98	bool tcp = false;
99
100	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_CONTROL)) {
101		struct flow_match_control fm;
102
103		flow_rule_match_control(fr, &fm);
104		if (IS_ALL_ONES(fm.mask->addr_type))
105			switch (fm.key->addr_type) {
106			case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
107				ipv = 4;
108				break;
109			case FLOW_DISSECTOR_KEY_IPV6_ADDRS:
110				ipv = 6;
111				break;
112			default:
113				break;
114			}
115	}
116
117	if (!ipv) {
118		netif_dbg(efx, drv, efx->net_dev,
119			  "Conntrack missing ipv specification\n");
120		return -EOPNOTSUPP;
121	}
122
123	if (dissector->used_keys &
124	    ~(BIT_ULL(FLOW_DISSECTOR_KEY_CONTROL) |
125	      BIT_ULL(FLOW_DISSECTOR_KEY_BASIC) |
126	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
127	      BIT_ULL(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
128	      BIT_ULL(FLOW_DISSECTOR_KEY_PORTS) |
129	      BIT_ULL(FLOW_DISSECTOR_KEY_TCP) |
130	      BIT_ULL(FLOW_DISSECTOR_KEY_META))) {
131		netif_dbg(efx, drv, efx->net_dev,
132			  "Unsupported conntrack keys %#llx\n",
133			  dissector->used_keys);
134		return -EOPNOTSUPP;
135	}
136
137	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_BASIC)) {
138		struct flow_match_basic fm;
139
140		flow_rule_match_basic(fr, &fm);
141		if (!IS_ALL_ONES(fm.mask->n_proto)) {
142			netif_dbg(efx, drv, efx->net_dev,
143				  "Conntrack eth_proto is not exact-match; mask %04x\n",
144				   ntohs(fm.mask->n_proto));
145			return -EOPNOTSUPP;
146		}
147		conn->eth_proto = fm.key->n_proto;
148		if (conn->eth_proto != (ipv == 4 ? htons(ETH_P_IP)
149						 : htons(ETH_P_IPV6))) {
150			netif_dbg(efx, drv, efx->net_dev,
151				  "Conntrack eth_proto is not IPv%u, is %04x\n",
152				   ipv, ntohs(conn->eth_proto));
153			return -EOPNOTSUPP;
154		}
155		if (!IS_ALL_ONES(fm.mask->ip_proto)) {
156			netif_dbg(efx, drv, efx->net_dev,
157				  "Conntrack ip_proto is not exact-match; mask %02x\n",
158				   fm.mask->ip_proto);
159			return -EOPNOTSUPP;
160		}
161		conn->ip_proto = fm.key->ip_proto;
162		switch (conn->ip_proto) {
163		case IPPROTO_TCP:
164			tcp = true;
165			break;
166		case IPPROTO_UDP:
167			break;
168		default:
169			netif_dbg(efx, drv, efx->net_dev,
170				  "Conntrack ip_proto not TCP or UDP, is %02x\n",
171				   conn->ip_proto);
172			return -EOPNOTSUPP;
173		}
174	} else {
175		netif_dbg(efx, drv, efx->net_dev,
176			  "Conntrack missing eth_proto, ip_proto\n");
177		return -EOPNOTSUPP;
178	}
179
180	if (ipv == 4 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
181		struct flow_match_ipv4_addrs fm;
182
183		flow_rule_match_ipv4_addrs(fr, &fm);
184		if (!IS_ALL_ONES(fm.mask->src)) {
185			netif_dbg(efx, drv, efx->net_dev,
186				  "Conntrack ipv4.src is not exact-match; mask %08x\n",
187				   ntohl(fm.mask->src));
188			return -EOPNOTSUPP;
189		}
190		conn->src_ip = fm.key->src;
191		if (!IS_ALL_ONES(fm.mask->dst)) {
192			netif_dbg(efx, drv, efx->net_dev,
193				  "Conntrack ipv4.dst is not exact-match; mask %08x\n",
194				   ntohl(fm.mask->dst));
195			return -EOPNOTSUPP;
196		}
197		conn->dst_ip = fm.key->dst;
198	} else if (ipv == 6 && flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
199		struct flow_match_ipv6_addrs fm;
200
201		flow_rule_match_ipv6_addrs(fr, &fm);
202		if (!efx_ipv6_addr_all_ones(&fm.mask->src)) {
203			netif_dbg(efx, drv, efx->net_dev,
204				  "Conntrack ipv6.src is not exact-match; mask %pI6\n",
205				   &fm.mask->src);
206			return -EOPNOTSUPP;
207		}
208		conn->src_ip6 = fm.key->src;
209		if (!efx_ipv6_addr_all_ones(&fm.mask->dst)) {
210			netif_dbg(efx, drv, efx->net_dev,
211				  "Conntrack ipv6.dst is not exact-match; mask %pI6\n",
212				   &fm.mask->dst);
213			return -EOPNOTSUPP;
214		}
215		conn->dst_ip6 = fm.key->dst;
216	} else {
217		netif_dbg(efx, drv, efx->net_dev,
218			  "Conntrack missing IPv%u addrs\n", ipv);
219		return -EOPNOTSUPP;
220	}
221
222	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_PORTS)) {
223		struct flow_match_ports fm;
224
225		flow_rule_match_ports(fr, &fm);
226		if (!IS_ALL_ONES(fm.mask->src)) {
227			netif_dbg(efx, drv, efx->net_dev,
228				  "Conntrack ports.src is not exact-match; mask %04x\n",
229				   ntohs(fm.mask->src));
230			return -EOPNOTSUPP;
231		}
232		conn->l4_sport = fm.key->src;
233		if (!IS_ALL_ONES(fm.mask->dst)) {
234			netif_dbg(efx, drv, efx->net_dev,
235				  "Conntrack ports.dst is not exact-match; mask %04x\n",
236				   ntohs(fm.mask->dst));
237			return -EOPNOTSUPP;
238		}
239		conn->l4_dport = fm.key->dst;
240	} else {
241		netif_dbg(efx, drv, efx->net_dev, "Conntrack missing L4 ports\n");
242		return -EOPNOTSUPP;
243	}
244
245	if (flow_rule_match_key(fr, FLOW_DISSECTOR_KEY_TCP)) {
246		__be16 tcp_interesting_flags;
247		struct flow_match_tcp fm;
248
249		if (!tcp) {
250			netif_dbg(efx, drv, efx->net_dev,
251				  "Conntrack matching on TCP keys but ipproto is not tcp\n");
252			return -EOPNOTSUPP;
253		}
254		flow_rule_match_tcp(fr, &fm);
255		tcp_interesting_flags = EFX_NF_TCP_FLAG(SYN) |
256					EFX_NF_TCP_FLAG(RST) |
257					EFX_NF_TCP_FLAG(FIN);
258		/* If any of the tcp_interesting_flags is set, we always
259		 * inhibit CT lookup in LHS (so SW can update CT table).
260		 */
261		if (fm.key->flags & tcp_interesting_flags) {
262			netif_dbg(efx, drv, efx->net_dev,
263				  "Unsupported conntrack tcp.flags %04x/%04x\n",
264				   ntohs(fm.key->flags), ntohs(fm.mask->flags));
265			return -EOPNOTSUPP;
266		}
267		/* Other TCP flags cannot be filtered at CT */
268		if (fm.mask->flags & ~tcp_interesting_flags) {
269			netif_dbg(efx, drv, efx->net_dev,
270				  "Unsupported conntrack tcp.flags %04x/%04x\n",
271				   ntohs(fm.key->flags), ntohs(fm.mask->flags));
272			return -EOPNOTSUPP;
273		}
274	}
275
276	return 0;
277}
278
279static int efx_tc_ct_replace(struct efx_tc_ct_zone *ct_zone,
280			     struct flow_cls_offload *tc)
281{
282	struct flow_rule *fr = flow_cls_offload_flow_rule(tc);
283	struct efx_tc_ct_entry *conn, *old;
284	struct efx_nic *efx = ct_zone->efx;
285	const struct flow_action_entry *fa;
286	struct efx_tc_counter *cnt;
287	int rc, i;
288
289	if (WARN_ON(!efx->tc))
290		return -ENETDOWN;
291	if (WARN_ON(!efx->tc->up))
292		return -ENETDOWN;
293
294	conn = kzalloc(sizeof(*conn), GFP_USER);
295	if (!conn)
296		return -ENOMEM;
297	conn->cookie = tc->cookie;
298	old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_ht,
299						&conn->linkage,
300						efx_tc_ct_ht_params);
301	if (IS_ERR(old)) {
302		rc = PTR_ERR(old);
303		goto release;
304	} else if (old) {
305		netif_dbg(efx, drv, efx->net_dev,
306			  "Already offloaded conntrack (cookie %lx)\n", tc->cookie);
307		rc = -EEXIST;
308		goto release;
309	}
310
311	/* Parse match */
312	conn->zone = ct_zone;
313	rc = efx_tc_ct_parse_match(efx, fr, conn);
314	if (rc)
315		goto release;
316
317	/* Parse actions */
318	flow_action_for_each(i, fa, &fr->action) {
319		switch (fa->id) {
320		case FLOW_ACTION_CT_METADATA:
321			conn->mark = fa->ct_metadata.mark;
322			if (memchr_inv(fa->ct_metadata.labels, 0, sizeof(fa->ct_metadata.labels))) {
323				netif_dbg(efx, drv, efx->net_dev,
324					  "Setting CT label not supported\n");
325				rc = -EOPNOTSUPP;
326				goto release;
327			}
328			break;
329		default:
330			netif_dbg(efx, drv, efx->net_dev,
331				  "Unhandled action %u for conntrack\n", fa->id);
332			rc = -EOPNOTSUPP;
333			goto release;
334		}
335	}
336
337	/* fill in defaults for unmangled values */
338	conn->nat_ip = conn->dnat ? conn->dst_ip : conn->src_ip;
339	conn->l4_natport = conn->dnat ? conn->l4_dport : conn->l4_sport;
340
341	cnt = efx_tc_flower_allocate_counter(efx, EFX_TC_COUNTER_TYPE_CT);
342	if (IS_ERR(cnt)) {
343		rc = PTR_ERR(cnt);
344		goto release;
345	}
346	conn->cnt = cnt;
347
348	rc = efx_mae_insert_ct(efx, conn);
349	if (rc) {
350		netif_dbg(efx, drv, efx->net_dev,
351			  "Failed to insert conntrack, %d\n", rc);
352		goto release;
353	}
354	mutex_lock(&ct_zone->mutex);
355	list_add_tail(&conn->list, &ct_zone->cts);
356	mutex_unlock(&ct_zone->mutex);
357	return 0;
358release:
359	if (conn->cnt)
360		efx_tc_flower_release_counter(efx, conn->cnt);
361	if (!old)
362		rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
363				       efx_tc_ct_ht_params);
364	kfree(conn);
365	return rc;
366}
367
368/* Caller must follow with efx_tc_ct_remove_finish() after RCU grace period! */
369static void efx_tc_ct_remove(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
370{
371	int rc;
372
373	/* Remove it from HW */
374	rc = efx_mae_remove_ct(efx, conn);
375	/* Delete it from SW */
376	rhashtable_remove_fast(&efx->tc->ct_ht, &conn->linkage,
377			       efx_tc_ct_ht_params);
378	if (rc) {
379		netif_err(efx, drv, efx->net_dev,
380			  "Failed to remove conntrack %lx from hw, rc %d\n",
381			  conn->cookie, rc);
382	} else {
383		netif_dbg(efx, drv, efx->net_dev, "Removed conntrack %lx\n",
384			  conn->cookie);
385	}
386}
387
388static void efx_tc_ct_remove_finish(struct efx_nic *efx, struct efx_tc_ct_entry *conn)
389{
390	/* Remove related CT counter.  This is delayed after the conn object we
391	 * are working with has been successfully removed.  This protects the
392	 * counter from being used-after-free inside efx_tc_ct_stats.
393	 */
394	efx_tc_flower_release_counter(efx, conn->cnt);
395	kfree(conn);
396}
397
398static int efx_tc_ct_destroy(struct efx_tc_ct_zone *ct_zone,
399			     struct flow_cls_offload *tc)
400{
401	struct efx_nic *efx = ct_zone->efx;
402	struct efx_tc_ct_entry *conn;
403
404	conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
405				      efx_tc_ct_ht_params);
406	if (!conn) {
407		netif_warn(efx, drv, efx->net_dev,
408			   "Conntrack %lx not found to remove\n", tc->cookie);
409		return -ENOENT;
410	}
411
412	mutex_lock(&ct_zone->mutex);
413	list_del(&conn->list);
414	efx_tc_ct_remove(efx, conn);
415	mutex_unlock(&ct_zone->mutex);
416	synchronize_rcu();
417	efx_tc_ct_remove_finish(efx, conn);
418	return 0;
419}
420
421static int efx_tc_ct_stats(struct efx_tc_ct_zone *ct_zone,
422			   struct flow_cls_offload *tc)
423{
424	struct efx_nic *efx = ct_zone->efx;
425	struct efx_tc_ct_entry *conn;
426	struct efx_tc_counter *cnt;
427
428	rcu_read_lock();
429	conn = rhashtable_lookup_fast(&efx->tc->ct_ht, &tc->cookie,
430				      efx_tc_ct_ht_params);
431	if (!conn) {
432		netif_warn(efx, drv, efx->net_dev,
433			   "Conntrack %lx not found for stats\n", tc->cookie);
434		rcu_read_unlock();
435		return -ENOENT;
436	}
437
438	cnt = conn->cnt;
439	spin_lock_bh(&cnt->lock);
440	/* Report only last use */
441	flow_stats_update(&tc->stats, 0, 0, 0, cnt->touched,
442			  FLOW_ACTION_HW_STATS_DELAYED);
443	spin_unlock_bh(&cnt->lock);
444	rcu_read_unlock();
445
446	return 0;
447}
448
449static int efx_tc_flow_block(enum tc_setup_type type, void *type_data,
450			     void *cb_priv)
451{
452	struct flow_cls_offload *tcb = type_data;
453	struct efx_tc_ct_zone *ct_zone = cb_priv;
454
455	if (type != TC_SETUP_CLSFLOWER)
456		return -EOPNOTSUPP;
457
458	switch (tcb->command) {
459	case FLOW_CLS_REPLACE:
460		return efx_tc_ct_replace(ct_zone, tcb);
461	case FLOW_CLS_DESTROY:
462		return efx_tc_ct_destroy(ct_zone, tcb);
463	case FLOW_CLS_STATS:
464		return efx_tc_ct_stats(ct_zone, tcb);
465	default:
466		break;
467	}
468
469	return -EOPNOTSUPP;
470}
471
472struct efx_tc_ct_zone *efx_tc_ct_register_zone(struct efx_nic *efx, u16 zone,
473					       struct nf_flowtable *ct_ft)
474{
475	struct efx_tc_ct_zone *ct_zone, *old;
476	int rc;
477
478	ct_zone = kzalloc(sizeof(*ct_zone), GFP_USER);
479	if (!ct_zone)
480		return ERR_PTR(-ENOMEM);
481	ct_zone->zone = zone;
482	old = rhashtable_lookup_get_insert_fast(&efx->tc->ct_zone_ht,
483						&ct_zone->linkage,
484						efx_tc_ct_zone_ht_params);
485	if (old) {
486		/* don't need our new entry */
487		kfree(ct_zone);
488		if (IS_ERR(old)) /* oh dear, it's actually an error */
489			return ERR_CAST(old);
490		if (!refcount_inc_not_zero(&old->ref))
491			return ERR_PTR(-EAGAIN);
492		/* existing entry found */
493		WARN_ON_ONCE(old->nf_ft != ct_ft);
494		netif_dbg(efx, drv, efx->net_dev,
495			  "Found existing ct_zone for %u\n", zone);
496		return old;
497	}
498	ct_zone->nf_ft = ct_ft;
499	ct_zone->efx = efx;
500	INIT_LIST_HEAD(&ct_zone->cts);
501	mutex_init(&ct_zone->mutex);
502	rc = nf_flow_table_offload_add_cb(ct_ft, efx_tc_flow_block, ct_zone);
503	netif_dbg(efx, drv, efx->net_dev, "Adding new ct_zone for %u, rc %d\n",
504		  zone, rc);
505	if (rc < 0)
506		goto fail;
507	refcount_set(&ct_zone->ref, 1);
508	return ct_zone;
509fail:
510	rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
511			       efx_tc_ct_zone_ht_params);
512	kfree(ct_zone);
513	return ERR_PTR(rc);
514}
515
516void efx_tc_ct_unregister_zone(struct efx_nic *efx,
517			       struct efx_tc_ct_zone *ct_zone)
518{
519	struct efx_tc_ct_entry *conn, *next;
520
521	if (!refcount_dec_and_test(&ct_zone->ref))
522		return; /* still in use */
523	nf_flow_table_offload_del_cb(ct_zone->nf_ft, efx_tc_flow_block, ct_zone);
524	rhashtable_remove_fast(&efx->tc->ct_zone_ht, &ct_zone->linkage,
525			       efx_tc_ct_zone_ht_params);
526	mutex_lock(&ct_zone->mutex);
527	list_for_each_entry(conn, &ct_zone->cts, list)
528		efx_tc_ct_remove(efx, conn);
529	synchronize_rcu();
530	/* need to use _safe because efx_tc_ct_remove_finish() frees conn */
531	list_for_each_entry_safe(conn, next, &ct_zone->cts, list)
532		efx_tc_ct_remove_finish(efx, conn);
533	mutex_unlock(&ct_zone->mutex);
534	mutex_destroy(&ct_zone->mutex);
535	netif_dbg(efx, drv, efx->net_dev, "Removed ct_zone for %u\n",
536		  ct_zone->zone);
537	kfree(ct_zone);
538}
539