1/* Broadcom NetXtreme-C/E network driver.
2 *
3 * Copyright (c) 2017 Broadcom Limited
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation.
8 */
9
10#include <linux/netdevice.h>
11#include <linux/inetdevice.h>
12#include <linux/if_vlan.h>
13#include <net/flow_dissector.h>
14#include <net/pkt_cls.h>
15#include <net/tc_act/tc_gact.h>
16#include <net/tc_act/tc_skbedit.h>
17#include <net/tc_act/tc_mirred.h>
18#include <net/tc_act/tc_vlan.h>
19#include <net/tc_act/tc_pedit.h>
20#include <net/tc_act/tc_tunnel_key.h>
21#include <net/vxlan.h>
22
23#include "bnxt_hsi.h"
24#include "bnxt.h"
25#include "bnxt_sriov.h"
26#include "bnxt_tc.h"
27#include "bnxt_vfr.h"
28
29#define BNXT_FID_INVALID			0xffff
30#define VLAN_TCI(vid, prio)	((vid) | ((prio) << VLAN_PRIO_SHIFT))
31
32#define is_vlan_pcp_wildcarded(vlan_tci_mask)	\
33	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == 0x0000)
34#define is_vlan_pcp_exactmatch(vlan_tci_mask)	\
35	((ntohs(vlan_tci_mask) & VLAN_PRIO_MASK) == VLAN_PRIO_MASK)
36#define is_vlan_pcp_zero(vlan_tci)	\
37	((ntohs(vlan_tci) & VLAN_PRIO_MASK) == 0x0000)
38#define is_vid_exactmatch(vlan_tci_mask)	\
39	((ntohs(vlan_tci_mask) & VLAN_VID_MASK) == VLAN_VID_MASK)
40
41static bool is_wildcard(void *mask, int len);
42static bool is_exactmatch(void *mask, int len);
43/* Return the dst fid of the func for flow forwarding
44 * For PFs: src_fid is the fid of the PF
45 * For VF-reps: src_fid the fid of the VF
46 */
47static u16 bnxt_flow_get_dst_fid(struct bnxt *pf_bp, struct net_device *dev)
48{
49	struct bnxt *bp;
50
51	/* check if dev belongs to the same switch */
52	if (!netdev_port_same_parent_id(pf_bp->dev, dev)) {
53		netdev_info(pf_bp->dev, "dev(ifindex=%d) not on same switch\n",
54			    dev->ifindex);
55		return BNXT_FID_INVALID;
56	}
57
58	/* Is dev a VF-rep? */
59	if (bnxt_dev_is_vf_rep(dev))
60		return bnxt_vf_rep_get_fid(dev);
61
62	bp = netdev_priv(dev);
63	return bp->pf.fw_fid;
64}
65
66static int bnxt_tc_parse_redir(struct bnxt *bp,
67			       struct bnxt_tc_actions *actions,
68			       const struct flow_action_entry *act)
69{
70	struct net_device *dev = act->dev;
71
72	if (!dev) {
73		netdev_info(bp->dev, "no dev in mirred action\n");
74		return -EINVAL;
75	}
76
77	actions->flags |= BNXT_TC_ACTION_FLAG_FWD;
78	actions->dst_dev = dev;
79	return 0;
80}
81
82static int bnxt_tc_parse_vlan(struct bnxt *bp,
83			      struct bnxt_tc_actions *actions,
84			      const struct flow_action_entry *act)
85{
86	switch (act->id) {
87	case FLOW_ACTION_VLAN_POP:
88		actions->flags |= BNXT_TC_ACTION_FLAG_POP_VLAN;
89		break;
90	case FLOW_ACTION_VLAN_PUSH:
91		actions->flags |= BNXT_TC_ACTION_FLAG_PUSH_VLAN;
92		actions->push_vlan_tci = htons(act->vlan.vid);
93		actions->push_vlan_tpid = act->vlan.proto;
94		break;
95	default:
96		return -EOPNOTSUPP;
97	}
98	return 0;
99}
100
101static int bnxt_tc_parse_tunnel_set(struct bnxt *bp,
102				    struct bnxt_tc_actions *actions,
103				    const struct flow_action_entry *act)
104{
105	const struct ip_tunnel_info *tun_info = act->tunnel;
106	const struct ip_tunnel_key *tun_key = &tun_info->key;
107
108	if (ip_tunnel_info_af(tun_info) != AF_INET) {
109		netdev_info(bp->dev, "only IPv4 tunnel-encap is supported\n");
110		return -EOPNOTSUPP;
111	}
112
113	actions->tun_encap_key = *tun_key;
114	actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP;
115	return 0;
116}
117
118/* Key & Mask from the stack comes unaligned in multiple iterations of 4 bytes
119 * each(u32).
120 * This routine consolidates such multiple unaligned values into one
121 * field each for Key & Mask (for src and dst macs separately)
122 * For example,
123 *			Mask/Key	Offset	Iteration
124 *			==========	======	=========
125 *	dst mac		0xffffffff	0	1
126 *	dst mac		0x0000ffff	4	2
127 *
128 *	src mac		0xffff0000	4	1
129 *	src mac		0xffffffff	8	2
130 *
131 * The above combination coming from the stack will be consolidated as
132 *			Mask/Key
133 *			==============
134 *	src mac:	0xffffffffffff
135 *	dst mac:	0xffffffffffff
136 */
137static void bnxt_set_l2_key_mask(u32 part_key, u32 part_mask,
138				 u8 *actual_key, u8 *actual_mask)
139{
140	u32 key = get_unaligned((u32 *)actual_key);
141	u32 mask = get_unaligned((u32 *)actual_mask);
142
143	part_key &= part_mask;
144	part_key |= key & ~part_mask;
145
146	put_unaligned(mask | part_mask, (u32 *)actual_mask);
147	put_unaligned(part_key, (u32 *)actual_key);
148}
149
150static int
151bnxt_fill_l2_rewrite_fields(struct bnxt_tc_actions *actions,
152			    u16 *eth_addr, u16 *eth_addr_mask)
153{
154	u16 *p;
155	int j;
156
157	if (unlikely(bnxt_eth_addr_key_mask_invalid(eth_addr, eth_addr_mask)))
158		return -EINVAL;
159
160	if (!is_wildcard(&eth_addr_mask[0], ETH_ALEN)) {
161		if (!is_exactmatch(&eth_addr_mask[0], ETH_ALEN))
162			return -EINVAL;
163		/* FW expects dmac to be in u16 array format */
164		p = eth_addr;
165		for (j = 0; j < 3; j++)
166			actions->l2_rewrite_dmac[j] = cpu_to_be16(*(p + j));
167	}
168
169	if (!is_wildcard(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN)) {
170		if (!is_exactmatch(&eth_addr_mask[ETH_ALEN / 2], ETH_ALEN))
171			return -EINVAL;
172		/* FW expects smac to be in u16 array format */
173		p = &eth_addr[ETH_ALEN / 2];
174		for (j = 0; j < 3; j++)
175			actions->l2_rewrite_smac[j] = cpu_to_be16(*(p + j));
176	}
177
178	return 0;
179}
180
181static int
182bnxt_tc_parse_pedit(struct bnxt *bp, struct bnxt_tc_actions *actions,
183		    struct flow_action_entry *act, int act_idx, u8 *eth_addr,
184		    u8 *eth_addr_mask)
185{
186	size_t offset_of_ip6_daddr = offsetof(struct ipv6hdr, daddr);
187	size_t offset_of_ip6_saddr = offsetof(struct ipv6hdr, saddr);
188	u32 mask, val, offset, idx;
189	u8 htype;
190
191	offset = act->mangle.offset;
192	htype = act->mangle.htype;
193	mask = ~act->mangle.mask;
194	val = act->mangle.val;
195
196	switch (htype) {
197	case FLOW_ACT_MANGLE_HDR_TYPE_ETH:
198		if (offset > PEDIT_OFFSET_SMAC_LAST_4_BYTES) {
199			netdev_err(bp->dev,
200				   "%s: eth_hdr: Invalid pedit field\n",
201				   __func__);
202			return -EINVAL;
203		}
204		actions->flags |= BNXT_TC_ACTION_FLAG_L2_REWRITE;
205
206		bnxt_set_l2_key_mask(val, mask, &eth_addr[offset],
207				     &eth_addr_mask[offset]);
208		break;
209	case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
210		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
211		actions->nat.l3_is_ipv4 = true;
212		if (offset ==  offsetof(struct iphdr, saddr)) {
213			actions->nat.src_xlate = true;
214			actions->nat.l3.ipv4.saddr.s_addr = htonl(val);
215		} else if (offset ==  offsetof(struct iphdr, daddr)) {
216			actions->nat.src_xlate = false;
217			actions->nat.l3.ipv4.daddr.s_addr = htonl(val);
218		} else {
219			netdev_err(bp->dev,
220				   "%s: IPv4_hdr: Invalid pedit field\n",
221				   __func__);
222			return -EINVAL;
223		}
224
225		netdev_dbg(bp->dev, "nat.src_xlate = %d src IP: %pI4 dst ip : %pI4\n",
226			   actions->nat.src_xlate, &actions->nat.l3.ipv4.saddr,
227			   &actions->nat.l3.ipv4.daddr);
228		break;
229
230	case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
231		actions->flags |= BNXT_TC_ACTION_FLAG_NAT_XLATE;
232		actions->nat.l3_is_ipv4 = false;
233		if (offset >= offsetof(struct ipv6hdr, saddr) &&
234		    offset < offset_of_ip6_daddr) {
235			/* 16 byte IPv6 address comes in 4 iterations of
236			 * 4byte chunks each
237			 */
238			actions->nat.src_xlate = true;
239			idx = (offset - offset_of_ip6_saddr) / 4;
240			/* First 4bytes will be copied to idx 0 and so on */
241			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
242		} else if (offset >= offset_of_ip6_daddr &&
243			   offset < offset_of_ip6_daddr + 16) {
244			actions->nat.src_xlate = false;
245			idx = (offset - offset_of_ip6_daddr) / 4;
246			actions->nat.l3.ipv6.saddr.s6_addr32[idx] = htonl(val);
247		} else {
248			netdev_err(bp->dev,
249				   "%s: IPv6_hdr: Invalid pedit field\n",
250				   __func__);
251			return -EINVAL;
252		}
253		break;
254	case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
255	case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
256		/* HW does not support L4 rewrite alone without L3
257		 * rewrite
258		 */
259		if (!(actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE)) {
260			netdev_err(bp->dev,
261				   "Need to specify L3 rewrite as well\n");
262			return -EINVAL;
263		}
264		if (actions->nat.src_xlate)
265			actions->nat.l4.ports.sport = htons(val);
266		else
267			actions->nat.l4.ports.dport = htons(val);
268		netdev_dbg(bp->dev, "actions->nat.sport = %d dport = %d\n",
269			   actions->nat.l4.ports.sport,
270			   actions->nat.l4.ports.dport);
271		break;
272	default:
273		netdev_err(bp->dev, "%s: Unsupported pedit hdr type\n",
274			   __func__);
275		return -EINVAL;
276	}
277	return 0;
278}
279
280static int bnxt_tc_parse_actions(struct bnxt *bp,
281				 struct bnxt_tc_actions *actions,
282				 struct flow_action *flow_action,
283				 struct netlink_ext_ack *extack)
284{
285	/* Used to store the L2 rewrite mask for dmac (6 bytes) followed by
286	 * smac (6 bytes) if rewrite of both is specified, otherwise either
287	 * dmac or smac
288	 */
289	u16 eth_addr_mask[ETH_ALEN] = { 0 };
290	/* Used to store the L2 rewrite key for dmac (6 bytes) followed by
291	 * smac (6 bytes) if rewrite of both is specified, otherwise either
292	 * dmac or smac
293	 */
294	u16 eth_addr[ETH_ALEN] = { 0 };
295	struct flow_action_entry *act;
296	int i, rc;
297
298	if (!flow_action_has_entries(flow_action)) {
299		netdev_info(bp->dev, "no actions\n");
300		return -EINVAL;
301	}
302
303	if (!flow_action_basic_hw_stats_check(flow_action, extack))
304		return -EOPNOTSUPP;
305
306	flow_action_for_each(i, act, flow_action) {
307		switch (act->id) {
308		case FLOW_ACTION_DROP:
309			actions->flags |= BNXT_TC_ACTION_FLAG_DROP;
310			return 0; /* don't bother with other actions */
311		case FLOW_ACTION_REDIRECT:
312			rc = bnxt_tc_parse_redir(bp, actions, act);
313			if (rc)
314				return rc;
315			break;
316		case FLOW_ACTION_VLAN_POP:
317		case FLOW_ACTION_VLAN_PUSH:
318		case FLOW_ACTION_VLAN_MANGLE:
319			rc = bnxt_tc_parse_vlan(bp, actions, act);
320			if (rc)
321				return rc;
322			break;
323		case FLOW_ACTION_TUNNEL_ENCAP:
324			rc = bnxt_tc_parse_tunnel_set(bp, actions, act);
325			if (rc)
326				return rc;
327			break;
328		case FLOW_ACTION_TUNNEL_DECAP:
329			actions->flags |= BNXT_TC_ACTION_FLAG_TUNNEL_DECAP;
330			break;
331		/* Packet edit: L2 rewrite, NAT, NAPT */
332		case FLOW_ACTION_MANGLE:
333			rc = bnxt_tc_parse_pedit(bp, actions, act, i,
334						 (u8 *)eth_addr,
335						 (u8 *)eth_addr_mask);
336			if (rc)
337				return rc;
338			break;
339		default:
340			break;
341		}
342	}
343
344	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
345		rc = bnxt_fill_l2_rewrite_fields(actions, eth_addr,
346						 eth_addr_mask);
347		if (rc)
348			return rc;
349	}
350
351	if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
352		if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
353			/* dst_fid is PF's fid */
354			actions->dst_fid = bp->pf.fw_fid;
355		} else {
356			/* find the FID from dst_dev */
357			actions->dst_fid =
358				bnxt_flow_get_dst_fid(bp, actions->dst_dev);
359			if (actions->dst_fid == BNXT_FID_INVALID)
360				return -EINVAL;
361		}
362	}
363
364	return 0;
365}
366
367static int bnxt_tc_parse_flow(struct bnxt *bp,
368			      struct flow_cls_offload *tc_flow_cmd,
369			      struct bnxt_tc_flow *flow)
370{
371	struct flow_rule *rule = flow_cls_offload_flow_rule(tc_flow_cmd);
372	struct flow_dissector *dissector = rule->match.dissector;
373
374	/* KEY_CONTROL and KEY_BASIC are needed for forming a meaningful key */
375	if ((dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_CONTROL)) == 0 ||
376	    (dissector->used_keys & BIT(FLOW_DISSECTOR_KEY_BASIC)) == 0) {
377		netdev_info(bp->dev, "cannot form TC key: used_keys = 0x%x\n",
378			    dissector->used_keys);
379		return -EOPNOTSUPP;
380	}
381
382	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
383		struct flow_match_basic match;
384
385		flow_rule_match_basic(rule, &match);
386		flow->l2_key.ether_type = match.key->n_proto;
387		flow->l2_mask.ether_type = match.mask->n_proto;
388
389		if (match.key->n_proto == htons(ETH_P_IP) ||
390		    match.key->n_proto == htons(ETH_P_IPV6)) {
391			flow->l4_key.ip_proto = match.key->ip_proto;
392			flow->l4_mask.ip_proto = match.mask->ip_proto;
393		}
394	}
395
396	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
397		struct flow_match_eth_addrs match;
398
399		flow_rule_match_eth_addrs(rule, &match);
400		flow->flags |= BNXT_TC_FLOW_FLAGS_ETH_ADDRS;
401		ether_addr_copy(flow->l2_key.dmac, match.key->dst);
402		ether_addr_copy(flow->l2_mask.dmac, match.mask->dst);
403		ether_addr_copy(flow->l2_key.smac, match.key->src);
404		ether_addr_copy(flow->l2_mask.smac, match.mask->src);
405	}
406
407	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
408		struct flow_match_vlan match;
409
410		flow_rule_match_vlan(rule, &match);
411		flow->l2_key.inner_vlan_tci =
412			cpu_to_be16(VLAN_TCI(match.key->vlan_id,
413					     match.key->vlan_priority));
414		flow->l2_mask.inner_vlan_tci =
415			cpu_to_be16((VLAN_TCI(match.mask->vlan_id,
416					      match.mask->vlan_priority)));
417		flow->l2_key.inner_vlan_tpid = htons(ETH_P_8021Q);
418		flow->l2_mask.inner_vlan_tpid = htons(0xffff);
419		flow->l2_key.num_vlans = 1;
420	}
421
422	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS)) {
423		struct flow_match_ipv4_addrs match;
424
425		flow_rule_match_ipv4_addrs(rule, &match);
426		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV4_ADDRS;
427		flow->l3_key.ipv4.daddr.s_addr = match.key->dst;
428		flow->l3_mask.ipv4.daddr.s_addr = match.mask->dst;
429		flow->l3_key.ipv4.saddr.s_addr = match.key->src;
430		flow->l3_mask.ipv4.saddr.s_addr = match.mask->src;
431	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS)) {
432		struct flow_match_ipv6_addrs match;
433
434		flow_rule_match_ipv6_addrs(rule, &match);
435		flow->flags |= BNXT_TC_FLOW_FLAGS_IPV6_ADDRS;
436		flow->l3_key.ipv6.daddr = match.key->dst;
437		flow->l3_mask.ipv6.daddr = match.mask->dst;
438		flow->l3_key.ipv6.saddr = match.key->src;
439		flow->l3_mask.ipv6.saddr = match.mask->src;
440	}
441
442	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
443		struct flow_match_ports match;
444
445		flow_rule_match_ports(rule, &match);
446		flow->flags |= BNXT_TC_FLOW_FLAGS_PORTS;
447		flow->l4_key.ports.dport = match.key->dst;
448		flow->l4_mask.ports.dport = match.mask->dst;
449		flow->l4_key.ports.sport = match.key->src;
450		flow->l4_mask.ports.sport = match.mask->src;
451	}
452
453	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
454		struct flow_match_icmp match;
455
456		flow_rule_match_icmp(rule, &match);
457		flow->flags |= BNXT_TC_FLOW_FLAGS_ICMP;
458		flow->l4_key.icmp.type = match.key->type;
459		flow->l4_key.icmp.code = match.key->code;
460		flow->l4_mask.icmp.type = match.mask->type;
461		flow->l4_mask.icmp.code = match.mask->code;
462	}
463
464	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
465		struct flow_match_ipv4_addrs match;
466
467		flow_rule_match_enc_ipv4_addrs(rule, &match);
468		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS;
469		flow->tun_key.u.ipv4.dst = match.key->dst;
470		flow->tun_mask.u.ipv4.dst = match.mask->dst;
471		flow->tun_key.u.ipv4.src = match.key->src;
472		flow->tun_mask.u.ipv4.src = match.mask->src;
473	} else if (flow_rule_match_key(rule,
474				      FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) {
475		return -EOPNOTSUPP;
476	}
477
478	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
479		struct flow_match_enc_keyid match;
480
481		flow_rule_match_enc_keyid(rule, &match);
482		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ID;
483		flow->tun_key.tun_id = key32_to_tunnel_id(match.key->keyid);
484		flow->tun_mask.tun_id = key32_to_tunnel_id(match.mask->keyid);
485	}
486
487	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
488		struct flow_match_ports match;
489
490		flow_rule_match_enc_ports(rule, &match);
491		flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_PORTS;
492		flow->tun_key.tp_dst = match.key->dst;
493		flow->tun_mask.tp_dst = match.mask->dst;
494		flow->tun_key.tp_src = match.key->src;
495		flow->tun_mask.tp_src = match.mask->src;
496	}
497
498	return bnxt_tc_parse_actions(bp, &flow->actions, &rule->action,
499				     tc_flow_cmd->common.extack);
500}
501
502static int bnxt_hwrm_cfa_flow_free(struct bnxt *bp,
503				   struct bnxt_tc_flow_node *flow_node)
504{
505	struct hwrm_cfa_flow_free_input req = { 0 };
506	int rc;
507
508	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_FREE, -1, -1);
509	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE)
510		req.ext_flow_handle = flow_node->ext_flow_handle;
511	else
512		req.flow_handle = flow_node->flow_handle;
513
514	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
515	if (rc)
516		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
517
518	return rc;
519}
520
521static int ipv6_mask_len(struct in6_addr *mask)
522{
523	int mask_len = 0, i;
524
525	for (i = 0; i < 4; i++)
526		mask_len += inet_mask_len(mask->s6_addr32[i]);
527
528	return mask_len;
529}
530
531static bool is_wildcard(void *mask, int len)
532{
533	const u8 *p = mask;
534	int i;
535
536	for (i = 0; i < len; i++) {
537		if (p[i] != 0)
538			return false;
539	}
540	return true;
541}
542
543static bool is_exactmatch(void *mask, int len)
544{
545	const u8 *p = mask;
546	int i;
547
548	for (i = 0; i < len; i++)
549		if (p[i] != 0xff)
550			return false;
551
552	return true;
553}
554
555static bool is_vlan_tci_allowed(__be16  vlan_tci_mask,
556				__be16  vlan_tci)
557{
558	/* VLAN priority must be either exactly zero or fully wildcarded and
559	 * VLAN id must be exact match.
560	 */
561	if (is_vid_exactmatch(vlan_tci_mask) &&
562	    ((is_vlan_pcp_exactmatch(vlan_tci_mask) &&
563	      is_vlan_pcp_zero(vlan_tci)) ||
564	     is_vlan_pcp_wildcarded(vlan_tci_mask)))
565		return true;
566
567	return false;
568}
569
570static bool bits_set(void *key, int len)
571{
572	const u8 *p = key;
573	int i;
574
575	for (i = 0; i < len; i++)
576		if (p[i] != 0)
577			return true;
578
579	return false;
580}
581
582static int bnxt_hwrm_cfa_flow_alloc(struct bnxt *bp, struct bnxt_tc_flow *flow,
583				    __le16 ref_flow_handle,
584				    __le32 tunnel_handle,
585				    struct bnxt_tc_flow_node *flow_node)
586{
587	struct bnxt_tc_actions *actions = &flow->actions;
588	struct bnxt_tc_l3_key *l3_mask = &flow->l3_mask;
589	struct bnxt_tc_l3_key *l3_key = &flow->l3_key;
590	struct hwrm_cfa_flow_alloc_input req = { 0 };
591	struct hwrm_cfa_flow_alloc_output *resp;
592	u16 flow_flags = 0, action_flags = 0;
593	int rc;
594
595	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_ALLOC, -1, -1);
596
597	req.src_fid = cpu_to_le16(flow->src_fid);
598	req.ref_flow_handle = ref_flow_handle;
599
600	if (actions->flags & BNXT_TC_ACTION_FLAG_L2_REWRITE) {
601		memcpy(req.l2_rewrite_dmac, actions->l2_rewrite_dmac,
602		       ETH_ALEN);
603		memcpy(req.l2_rewrite_smac, actions->l2_rewrite_smac,
604		       ETH_ALEN);
605		action_flags |=
606			CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
607	}
608
609	if (actions->flags & BNXT_TC_ACTION_FLAG_NAT_XLATE) {
610		if (actions->nat.l3_is_ipv4) {
611			action_flags |=
612				CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_IPV4_ADDRESS;
613
614			if (actions->nat.src_xlate) {
615				action_flags |=
616					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
617				/* L3 source rewrite */
618				req.nat_ip_address[0] =
619					actions->nat.l3.ipv4.saddr.s_addr;
620				/* L4 source port */
621				if (actions->nat.l4.ports.sport)
622					req.nat_port =
623						actions->nat.l4.ports.sport;
624			} else {
625				action_flags |=
626					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
627				/* L3 destination rewrite */
628				req.nat_ip_address[0] =
629					actions->nat.l3.ipv4.daddr.s_addr;
630				/* L4 destination port */
631				if (actions->nat.l4.ports.dport)
632					req.nat_port =
633						actions->nat.l4.ports.dport;
634			}
635			netdev_dbg(bp->dev,
636				   "req.nat_ip_address: %pI4 src_xlate: %d req.nat_port: %x\n",
637				   req.nat_ip_address, actions->nat.src_xlate,
638				   req.nat_port);
639		} else {
640			if (actions->nat.src_xlate) {
641				action_flags |=
642					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_SRC;
643				/* L3 source rewrite */
644				memcpy(req.nat_ip_address,
645				       actions->nat.l3.ipv6.saddr.s6_addr32,
646				       sizeof(req.nat_ip_address));
647				/* L4 source port */
648				if (actions->nat.l4.ports.sport)
649					req.nat_port =
650						actions->nat.l4.ports.sport;
651			} else {
652				action_flags |=
653					CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_NAT_DEST;
654				/* L3 destination rewrite */
655				memcpy(req.nat_ip_address,
656				       actions->nat.l3.ipv6.daddr.s6_addr32,
657				       sizeof(req.nat_ip_address));
658				/* L4 destination port */
659				if (actions->nat.l4.ports.dport)
660					req.nat_port =
661						actions->nat.l4.ports.dport;
662			}
663			netdev_dbg(bp->dev,
664				   "req.nat_ip_address: %pI6 src_xlate: %d req.nat_port: %x\n",
665				   req.nat_ip_address, actions->nat.src_xlate,
666				   req.nat_port);
667		}
668	}
669
670	if (actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP ||
671	    actions->flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP) {
672		req.tunnel_handle = tunnel_handle;
673		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_TUNNEL;
674		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_TUNNEL;
675	}
676
677	req.ethertype = flow->l2_key.ether_type;
678	req.ip_proto = flow->l4_key.ip_proto;
679
680	if (flow->flags & BNXT_TC_FLOW_FLAGS_ETH_ADDRS) {
681		memcpy(req.dmac, flow->l2_key.dmac, ETH_ALEN);
682		memcpy(req.smac, flow->l2_key.smac, ETH_ALEN);
683	}
684
685	if (flow->l2_key.num_vlans > 0) {
686		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_NUM_VLAN_ONE;
687		/* FW expects the inner_vlan_tci value to be set
688		 * in outer_vlan_tci when num_vlans is 1 (which is
689		 * always the case in TC.)
690		 */
691		req.outer_vlan_tci = flow->l2_key.inner_vlan_tci;
692	}
693
694	/* If all IP and L4 fields are wildcarded then this is an L2 flow */
695	if (is_wildcard(l3_mask, sizeof(*l3_mask)) &&
696	    is_wildcard(&flow->l4_mask, sizeof(flow->l4_mask))) {
697		flow_flags |= CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_L2;
698	} else {
699		flow_flags |= flow->l2_key.ether_type == htons(ETH_P_IP) ?
700				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV4 :
701				CFA_FLOW_ALLOC_REQ_FLAGS_FLOWTYPE_IPV6;
702
703		if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV4_ADDRS) {
704			req.ip_dst[0] = l3_key->ipv4.daddr.s_addr;
705			req.ip_dst_mask_len =
706				inet_mask_len(l3_mask->ipv4.daddr.s_addr);
707			req.ip_src[0] = l3_key->ipv4.saddr.s_addr;
708			req.ip_src_mask_len =
709				inet_mask_len(l3_mask->ipv4.saddr.s_addr);
710		} else if (flow->flags & BNXT_TC_FLOW_FLAGS_IPV6_ADDRS) {
711			memcpy(req.ip_dst, l3_key->ipv6.daddr.s6_addr32,
712			       sizeof(req.ip_dst));
713			req.ip_dst_mask_len =
714					ipv6_mask_len(&l3_mask->ipv6.daddr);
715			memcpy(req.ip_src, l3_key->ipv6.saddr.s6_addr32,
716			       sizeof(req.ip_src));
717			req.ip_src_mask_len =
718					ipv6_mask_len(&l3_mask->ipv6.saddr);
719		}
720	}
721
722	if (flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) {
723		req.l4_src_port = flow->l4_key.ports.sport;
724		req.l4_src_port_mask = flow->l4_mask.ports.sport;
725		req.l4_dst_port = flow->l4_key.ports.dport;
726		req.l4_dst_port_mask = flow->l4_mask.ports.dport;
727	} else if (flow->flags & BNXT_TC_FLOW_FLAGS_ICMP) {
728		/* l4 ports serve as type/code when ip_proto is ICMP */
729		req.l4_src_port = htons(flow->l4_key.icmp.type);
730		req.l4_src_port_mask = htons(flow->l4_mask.icmp.type);
731		req.l4_dst_port = htons(flow->l4_key.icmp.code);
732		req.l4_dst_port_mask = htons(flow->l4_mask.icmp.code);
733	}
734	req.flags = cpu_to_le16(flow_flags);
735
736	if (actions->flags & BNXT_TC_ACTION_FLAG_DROP) {
737		action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_DROP;
738	} else {
739		if (actions->flags & BNXT_TC_ACTION_FLAG_FWD) {
740			action_flags |= CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_FWD;
741			req.dst_fid = cpu_to_le16(actions->dst_fid);
742		}
743		if (actions->flags & BNXT_TC_ACTION_FLAG_PUSH_VLAN) {
744			action_flags |=
745			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
746			req.l2_rewrite_vlan_tpid = actions->push_vlan_tpid;
747			req.l2_rewrite_vlan_tci = actions->push_vlan_tci;
748			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
749			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
750		}
751		if (actions->flags & BNXT_TC_ACTION_FLAG_POP_VLAN) {
752			action_flags |=
753			    CFA_FLOW_ALLOC_REQ_ACTION_FLAGS_L2_HEADER_REWRITE;
754			/* Rewrite config with tpid = 0 implies vlan pop */
755			req.l2_rewrite_vlan_tpid = 0;
756			memcpy(&req.l2_rewrite_dmac, &req.dmac, ETH_ALEN);
757			memcpy(&req.l2_rewrite_smac, &req.smac, ETH_ALEN);
758		}
759	}
760	req.action_flags = cpu_to_le16(action_flags);
761
762	mutex_lock(&bp->hwrm_cmd_lock);
763	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
764	if (!rc) {
765		resp = bnxt_get_hwrm_resp_addr(bp, &req);
766		/* CFA_FLOW_ALLOC response interpretation:
767		 *		    fw with	     fw with
768		 *		    16-bit	     64-bit
769		 *		    flow handle      flow handle
770		 *		    ===========	     ===========
771		 * flow_handle      flow handle      flow context id
772		 * ext_flow_handle  INVALID	     flow handle
773		 * flow_id	    INVALID	     flow counter id
774		 */
775		flow_node->flow_handle = resp->flow_handle;
776		if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
777			flow_node->ext_flow_handle = resp->ext_flow_handle;
778			flow_node->flow_id = resp->flow_id;
779		}
780	}
781	mutex_unlock(&bp->hwrm_cmd_lock);
782	return rc;
783}
784
785static int hwrm_cfa_decap_filter_alloc(struct bnxt *bp,
786				       struct bnxt_tc_flow *flow,
787				       struct bnxt_tc_l2_key *l2_info,
788				       __le32 ref_decap_handle,
789				       __le32 *decap_filter_handle)
790{
791	struct hwrm_cfa_decap_filter_alloc_input req = { 0 };
792	struct hwrm_cfa_decap_filter_alloc_output *resp;
793	struct ip_tunnel_key *tun_key = &flow->tun_key;
794	u32 enables = 0;
795	int rc;
796
797	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_ALLOC, -1, -1);
798
799	req.flags = cpu_to_le32(CFA_DECAP_FILTER_ALLOC_REQ_FLAGS_OVS_TUNNEL);
800	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_TYPE |
801		   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IP_PROTOCOL;
802	req.tunnel_type = CFA_DECAP_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN;
803	req.ip_protocol = CFA_DECAP_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP;
804
805	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ID) {
806		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_TUNNEL_ID;
807		/* tunnel_id is wrongly defined in hsi defn. as __le32 */
808		req.tunnel_id = tunnel_id_to_key32(tun_key->tun_id);
809	}
810
811	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS) {
812		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_MACADDR;
813		ether_addr_copy(req.dst_macaddr, l2_info->dmac);
814	}
815	if (l2_info->num_vlans) {
816		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_T_IVLAN_VID;
817		req.t_ivlan_vid = l2_info->inner_vlan_tci;
818	}
819
820	enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_ETHERTYPE;
821	req.ethertype = htons(ETH_P_IP);
822
823	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_IPV4_ADDRS) {
824		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_SRC_IPADDR |
825			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_IPADDR |
826			   CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_IPADDR_TYPE;
827		req.ip_addr_type = CFA_DECAP_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4;
828		req.dst_ipaddr[0] = tun_key->u.ipv4.dst;
829		req.src_ipaddr[0] = tun_key->u.ipv4.src;
830	}
831
832	if (flow->flags & BNXT_TC_FLOW_FLAGS_TUNL_PORTS) {
833		enables |= CFA_DECAP_FILTER_ALLOC_REQ_ENABLES_DST_PORT;
834		req.dst_port = tun_key->tp_dst;
835	}
836
837	/* Eventhough the decap_handle returned by hwrm_cfa_decap_filter_alloc
838	 * is defined as __le32, l2_ctxt_ref_id is defined in HSI as __le16.
839	 */
840	req.l2_ctxt_ref_id = (__force __le16)ref_decap_handle;
841	req.enables = cpu_to_le32(enables);
842
843	mutex_lock(&bp->hwrm_cmd_lock);
844	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
845	if (!rc) {
846		resp = bnxt_get_hwrm_resp_addr(bp, &req);
847		*decap_filter_handle = resp->decap_filter_id;
848	} else {
849		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
850	}
851	mutex_unlock(&bp->hwrm_cmd_lock);
852
853	return rc;
854}
855
856static int hwrm_cfa_decap_filter_free(struct bnxt *bp,
857				      __le32 decap_filter_handle)
858{
859	struct hwrm_cfa_decap_filter_free_input req = { 0 };
860	int rc;
861
862	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_DECAP_FILTER_FREE, -1, -1);
863	req.decap_filter_id = decap_filter_handle;
864
865	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
866	if (rc)
867		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
868
869	return rc;
870}
871
872static int hwrm_cfa_encap_record_alloc(struct bnxt *bp,
873				       struct ip_tunnel_key *encap_key,
874				       struct bnxt_tc_l2_key *l2_info,
875				       __le32 *encap_record_handle)
876{
877	struct hwrm_cfa_encap_record_alloc_input req = { 0 };
878	struct hwrm_cfa_encap_record_alloc_output *resp;
879	struct hwrm_cfa_encap_data_vxlan *encap =
880			(struct hwrm_cfa_encap_data_vxlan *)&req.encap_data;
881	struct hwrm_vxlan_ipv4_hdr *encap_ipv4 =
882				(struct hwrm_vxlan_ipv4_hdr *)encap->l3;
883	int rc;
884
885	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_ALLOC, -1, -1);
886
887	req.encap_type = CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN;
888
889	ether_addr_copy(encap->dst_mac_addr, l2_info->dmac);
890	ether_addr_copy(encap->src_mac_addr, l2_info->smac);
891	if (l2_info->num_vlans) {
892		encap->num_vlan_tags = l2_info->num_vlans;
893		encap->ovlan_tci = l2_info->inner_vlan_tci;
894		encap->ovlan_tpid = l2_info->inner_vlan_tpid;
895	}
896
897	encap_ipv4->ver_hlen = 4 << VXLAN_IPV4_HDR_VER_HLEN_VERSION_SFT;
898	encap_ipv4->ver_hlen |= 5 << VXLAN_IPV4_HDR_VER_HLEN_HEADER_LENGTH_SFT;
899	encap_ipv4->ttl = encap_key->ttl;
900
901	encap_ipv4->dest_ip_addr = encap_key->u.ipv4.dst;
902	encap_ipv4->src_ip_addr = encap_key->u.ipv4.src;
903	encap_ipv4->protocol = IPPROTO_UDP;
904
905	encap->dst_port = encap_key->tp_dst;
906	encap->vni = tunnel_id_to_key32(encap_key->tun_id);
907
908	mutex_lock(&bp->hwrm_cmd_lock);
909	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
910	if (!rc) {
911		resp = bnxt_get_hwrm_resp_addr(bp, &req);
912		*encap_record_handle = resp->encap_record_id;
913	} else {
914		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
915	}
916	mutex_unlock(&bp->hwrm_cmd_lock);
917
918	return rc;
919}
920
921static int hwrm_cfa_encap_record_free(struct bnxt *bp,
922				      __le32 encap_record_handle)
923{
924	struct hwrm_cfa_encap_record_free_input req = { 0 };
925	int rc;
926
927	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_ENCAP_RECORD_FREE, -1, -1);
928	req.encap_record_id = encap_record_handle;
929
930	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
931	if (rc)
932		netdev_info(bp->dev, "%s: Error rc=%d\n", __func__, rc);
933
934	return rc;
935}
936
937static int bnxt_tc_put_l2_node(struct bnxt *bp,
938			       struct bnxt_tc_flow_node *flow_node)
939{
940	struct bnxt_tc_l2_node *l2_node = flow_node->l2_node;
941	struct bnxt_tc_info *tc_info = bp->tc_info;
942	int rc;
943
944	/* remove flow_node from the L2 shared flow list */
945	list_del(&flow_node->l2_list_node);
946	if (--l2_node->refcount == 0) {
947		rc =  rhashtable_remove_fast(&tc_info->l2_table, &l2_node->node,
948					     tc_info->l2_ht_params);
949		if (rc)
950			netdev_err(bp->dev,
951				   "Error: %s: rhashtable_remove_fast: %d\n",
952				   __func__, rc);
953		kfree_rcu(l2_node, rcu);
954	}
955	return 0;
956}
957
958static struct bnxt_tc_l2_node *
959bnxt_tc_get_l2_node(struct bnxt *bp, struct rhashtable *l2_table,
960		    struct rhashtable_params ht_params,
961		    struct bnxt_tc_l2_key *l2_key)
962{
963	struct bnxt_tc_l2_node *l2_node;
964	int rc;
965
966	l2_node = rhashtable_lookup_fast(l2_table, l2_key, ht_params);
967	if (!l2_node) {
968		l2_node = kzalloc(sizeof(*l2_node), GFP_KERNEL);
969		if (!l2_node) {
970			rc = -ENOMEM;
971			return NULL;
972		}
973
974		l2_node->key = *l2_key;
975		rc = rhashtable_insert_fast(l2_table, &l2_node->node,
976					    ht_params);
977		if (rc) {
978			kfree_rcu(l2_node, rcu);
979			netdev_err(bp->dev,
980				   "Error: %s: rhashtable_insert_fast: %d\n",
981				   __func__, rc);
982			return NULL;
983		}
984		INIT_LIST_HEAD(&l2_node->common_l2_flows);
985	}
986	return l2_node;
987}
988
989/* Get the ref_flow_handle for a flow by checking if there are any other
990 * flows that share the same L2 key as this flow.
991 */
992static int
993bnxt_tc_get_ref_flow_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
994			    struct bnxt_tc_flow_node *flow_node,
995			    __le16 *ref_flow_handle)
996{
997	struct bnxt_tc_info *tc_info = bp->tc_info;
998	struct bnxt_tc_flow_node *ref_flow_node;
999	struct bnxt_tc_l2_node *l2_node;
1000
1001	l2_node = bnxt_tc_get_l2_node(bp, &tc_info->l2_table,
1002				      tc_info->l2_ht_params,
1003				      &flow->l2_key);
1004	if (!l2_node)
1005		return -1;
1006
1007	/* If any other flow is using this l2_node, use it's flow_handle
1008	 * as the ref_flow_handle
1009	 */
1010	if (l2_node->refcount > 0) {
1011		ref_flow_node = list_first_entry(&l2_node->common_l2_flows,
1012						 struct bnxt_tc_flow_node,
1013						 l2_list_node);
1014		*ref_flow_handle = ref_flow_node->flow_handle;
1015	} else {
1016		*ref_flow_handle = cpu_to_le16(0xffff);
1017	}
1018
1019	/* Insert the l2_node into the flow_node so that subsequent flows
1020	 * with a matching l2 key can use the flow_handle of this flow
1021	 * as their ref_flow_handle
1022	 */
1023	flow_node->l2_node = l2_node;
1024	list_add(&flow_node->l2_list_node, &l2_node->common_l2_flows);
1025	l2_node->refcount++;
1026	return 0;
1027}
1028
1029/* After the flow parsing is done, this routine is used for checking
1030 * if there are any aspects of the flow that prevent it from being
1031 * offloaded.
1032 */
1033static bool bnxt_tc_can_offload(struct bnxt *bp, struct bnxt_tc_flow *flow)
1034{
1035	/* If L4 ports are specified then ip_proto must be TCP or UDP */
1036	if ((flow->flags & BNXT_TC_FLOW_FLAGS_PORTS) &&
1037	    (flow->l4_key.ip_proto != IPPROTO_TCP &&
1038	     flow->l4_key.ip_proto != IPPROTO_UDP)) {
1039		netdev_info(bp->dev, "Cannot offload non-TCP/UDP (%d) ports\n",
1040			    flow->l4_key.ip_proto);
1041		return false;
1042	}
1043
1044	/* Currently source/dest MAC cannot be partial wildcard  */
1045	if (bits_set(&flow->l2_key.smac, sizeof(flow->l2_key.smac)) &&
1046	    !is_exactmatch(flow->l2_mask.smac, sizeof(flow->l2_mask.smac))) {
1047		netdev_info(bp->dev, "Wildcard match unsupported for Source MAC\n");
1048		return false;
1049	}
1050	if (bits_set(&flow->l2_key.dmac, sizeof(flow->l2_key.dmac)) &&
1051	    !is_exactmatch(&flow->l2_mask.dmac, sizeof(flow->l2_mask.dmac))) {
1052		netdev_info(bp->dev, "Wildcard match unsupported for Dest MAC\n");
1053		return false;
1054	}
1055
1056	/* Currently VLAN fields cannot be partial wildcard */
1057	if (bits_set(&flow->l2_key.inner_vlan_tci,
1058		     sizeof(flow->l2_key.inner_vlan_tci)) &&
1059	    !is_vlan_tci_allowed(flow->l2_mask.inner_vlan_tci,
1060				 flow->l2_key.inner_vlan_tci)) {
1061		netdev_info(bp->dev, "Unsupported VLAN TCI\n");
1062		return false;
1063	}
1064	if (bits_set(&flow->l2_key.inner_vlan_tpid,
1065		     sizeof(flow->l2_key.inner_vlan_tpid)) &&
1066	    !is_exactmatch(&flow->l2_mask.inner_vlan_tpid,
1067			   sizeof(flow->l2_mask.inner_vlan_tpid))) {
1068		netdev_info(bp->dev, "Wildcard match unsupported for VLAN TPID\n");
1069		return false;
1070	}
1071
1072	/* Currently Ethertype must be set */
1073	if (!is_exactmatch(&flow->l2_mask.ether_type,
1074			   sizeof(flow->l2_mask.ether_type))) {
1075		netdev_info(bp->dev, "Wildcard match unsupported for Ethertype\n");
1076		return false;
1077	}
1078
1079	return true;
1080}
1081
1082/* Returns the final refcount of the node on success
1083 * or a -ve error code on failure
1084 */
1085static int bnxt_tc_put_tunnel_node(struct bnxt *bp,
1086				   struct rhashtable *tunnel_table,
1087				   struct rhashtable_params *ht_params,
1088				   struct bnxt_tc_tunnel_node *tunnel_node)
1089{
1090	int rc;
1091
1092	if (--tunnel_node->refcount == 0) {
1093		rc =  rhashtable_remove_fast(tunnel_table, &tunnel_node->node,
1094					     *ht_params);
1095		if (rc) {
1096			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1097			rc = -1;
1098		}
1099		kfree_rcu(tunnel_node, rcu);
1100		return rc;
1101	} else {
1102		return tunnel_node->refcount;
1103	}
1104}
1105
1106/* Get (or add) either encap or decap tunnel node from/to the supplied
1107 * hash table.
1108 */
1109static struct bnxt_tc_tunnel_node *
1110bnxt_tc_get_tunnel_node(struct bnxt *bp, struct rhashtable *tunnel_table,
1111			struct rhashtable_params *ht_params,
1112			struct ip_tunnel_key *tun_key)
1113{
1114	struct bnxt_tc_tunnel_node *tunnel_node;
1115	int rc;
1116
1117	tunnel_node = rhashtable_lookup_fast(tunnel_table, tun_key, *ht_params);
1118	if (!tunnel_node) {
1119		tunnel_node = kzalloc(sizeof(*tunnel_node), GFP_KERNEL);
1120		if (!tunnel_node) {
1121			rc = -ENOMEM;
1122			goto err;
1123		}
1124
1125		tunnel_node->key = *tun_key;
1126		tunnel_node->tunnel_handle = INVALID_TUNNEL_HANDLE;
1127		rc = rhashtable_insert_fast(tunnel_table, &tunnel_node->node,
1128					    *ht_params);
1129		if (rc) {
1130			kfree_rcu(tunnel_node, rcu);
1131			goto err;
1132		}
1133	}
1134	tunnel_node->refcount++;
1135	return tunnel_node;
1136err:
1137	netdev_info(bp->dev, "error rc=%d\n", rc);
1138	return NULL;
1139}
1140
1141static int bnxt_tc_get_ref_decap_handle(struct bnxt *bp,
1142					struct bnxt_tc_flow *flow,
1143					struct bnxt_tc_l2_key *l2_key,
1144					struct bnxt_tc_flow_node *flow_node,
1145					__le32 *ref_decap_handle)
1146{
1147	struct bnxt_tc_info *tc_info = bp->tc_info;
1148	struct bnxt_tc_flow_node *ref_flow_node;
1149	struct bnxt_tc_l2_node *decap_l2_node;
1150
1151	decap_l2_node = bnxt_tc_get_l2_node(bp, &tc_info->decap_l2_table,
1152					    tc_info->decap_l2_ht_params,
1153					    l2_key);
1154	if (!decap_l2_node)
1155		return -1;
1156
1157	/* If any other flow is using this decap_l2_node, use it's decap_handle
1158	 * as the ref_decap_handle
1159	 */
1160	if (decap_l2_node->refcount > 0) {
1161		ref_flow_node =
1162			list_first_entry(&decap_l2_node->common_l2_flows,
1163					 struct bnxt_tc_flow_node,
1164					 decap_l2_list_node);
1165		*ref_decap_handle = ref_flow_node->decap_node->tunnel_handle;
1166	} else {
1167		*ref_decap_handle = INVALID_TUNNEL_HANDLE;
1168	}
1169
1170	/* Insert the l2_node into the flow_node so that subsequent flows
1171	 * with a matching decap l2 key can use the decap_filter_handle of
1172	 * this flow as their ref_decap_handle
1173	 */
1174	flow_node->decap_l2_node = decap_l2_node;
1175	list_add(&flow_node->decap_l2_list_node,
1176		 &decap_l2_node->common_l2_flows);
1177	decap_l2_node->refcount++;
1178	return 0;
1179}
1180
1181static void bnxt_tc_put_decap_l2_node(struct bnxt *bp,
1182				      struct bnxt_tc_flow_node *flow_node)
1183{
1184	struct bnxt_tc_l2_node *decap_l2_node = flow_node->decap_l2_node;
1185	struct bnxt_tc_info *tc_info = bp->tc_info;
1186	int rc;
1187
1188	/* remove flow_node from the decap L2 sharing flow list */
1189	list_del(&flow_node->decap_l2_list_node);
1190	if (--decap_l2_node->refcount == 0) {
1191		rc =  rhashtable_remove_fast(&tc_info->decap_l2_table,
1192					     &decap_l2_node->node,
1193					     tc_info->decap_l2_ht_params);
1194		if (rc)
1195			netdev_err(bp->dev, "rhashtable_remove_fast rc=%d\n", rc);
1196		kfree_rcu(decap_l2_node, rcu);
1197	}
1198}
1199
1200static void bnxt_tc_put_decap_handle(struct bnxt *bp,
1201				     struct bnxt_tc_flow_node *flow_node)
1202{
1203	__le32 decap_handle = flow_node->decap_node->tunnel_handle;
1204	struct bnxt_tc_info *tc_info = bp->tc_info;
1205	int rc;
1206
1207	if (flow_node->decap_l2_node)
1208		bnxt_tc_put_decap_l2_node(bp, flow_node);
1209
1210	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1211				     &tc_info->decap_ht_params,
1212				     flow_node->decap_node);
1213	if (!rc && decap_handle != INVALID_TUNNEL_HANDLE)
1214		hwrm_cfa_decap_filter_free(bp, decap_handle);
1215}
1216
1217static int bnxt_tc_resolve_tunnel_hdrs(struct bnxt *bp,
1218				       struct ip_tunnel_key *tun_key,
1219				       struct bnxt_tc_l2_key *l2_info)
1220{
1221#ifdef CONFIG_INET
1222	struct net_device *real_dst_dev = bp->dev;
1223	struct flowi4 flow = { {0} };
1224	struct net_device *dst_dev;
1225	struct neighbour *nbr;
1226	struct rtable *rt;
1227	int rc;
1228
1229	flow.flowi4_proto = IPPROTO_UDP;
1230	flow.fl4_dport = tun_key->tp_dst;
1231	flow.daddr = tun_key->u.ipv4.dst;
1232
1233	rt = ip_route_output_key(dev_net(real_dst_dev), &flow);
1234	if (IS_ERR(rt)) {
1235		netdev_info(bp->dev, "no route to %pI4b\n", &flow.daddr);
1236		return -EOPNOTSUPP;
1237	}
1238
1239	/* The route must either point to the real_dst_dev or a dst_dev that
1240	 * uses the real_dst_dev.
1241	 */
1242	dst_dev = rt->dst.dev;
1243	if (is_vlan_dev(dst_dev)) {
1244#if IS_ENABLED(CONFIG_VLAN_8021Q)
1245		struct vlan_dev_priv *vlan = vlan_dev_priv(dst_dev);
1246
1247		if (vlan->real_dev != real_dst_dev) {
1248			netdev_info(bp->dev,
1249				    "dst_dev(%s) doesn't use PF-if(%s)\n",
1250				    netdev_name(dst_dev),
1251				    netdev_name(real_dst_dev));
1252			rc = -EOPNOTSUPP;
1253			goto put_rt;
1254		}
1255		l2_info->inner_vlan_tci = htons(vlan->vlan_id);
1256		l2_info->inner_vlan_tpid = vlan->vlan_proto;
1257		l2_info->num_vlans = 1;
1258#endif
1259	} else if (dst_dev != real_dst_dev) {
1260		netdev_info(bp->dev,
1261			    "dst_dev(%s) for %pI4b is not PF-if(%s)\n",
1262			    netdev_name(dst_dev), &flow.daddr,
1263			    netdev_name(real_dst_dev));
1264		rc = -EOPNOTSUPP;
1265		goto put_rt;
1266	}
1267
1268	nbr = dst_neigh_lookup(&rt->dst, &flow.daddr);
1269	if (!nbr) {
1270		netdev_info(bp->dev, "can't lookup neighbor for %pI4b\n",
1271			    &flow.daddr);
1272		rc = -EOPNOTSUPP;
1273		goto put_rt;
1274	}
1275
1276	tun_key->u.ipv4.src = flow.saddr;
1277	tun_key->ttl = ip4_dst_hoplimit(&rt->dst);
1278	neigh_ha_snapshot(l2_info->dmac, nbr, dst_dev);
1279	ether_addr_copy(l2_info->smac, dst_dev->dev_addr);
1280	neigh_release(nbr);
1281	ip_rt_put(rt);
1282
1283	return 0;
1284put_rt:
1285	ip_rt_put(rt);
1286	return rc;
1287#else
1288	return -EOPNOTSUPP;
1289#endif
1290}
1291
1292static int bnxt_tc_get_decap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1293				    struct bnxt_tc_flow_node *flow_node,
1294				    __le32 *decap_filter_handle)
1295{
1296	struct ip_tunnel_key *decap_key = &flow->tun_key;
1297	struct bnxt_tc_info *tc_info = bp->tc_info;
1298	struct bnxt_tc_l2_key l2_info = { {0} };
1299	struct bnxt_tc_tunnel_node *decap_node;
1300	struct ip_tunnel_key tun_key = { 0 };
1301	struct bnxt_tc_l2_key *decap_l2_info;
1302	__le32 ref_decap_handle;
1303	int rc;
1304
1305	/* Check if there's another flow using the same tunnel decap.
1306	 * If not, add this tunnel to the table and resolve the other
1307	 * tunnel header fileds. Ignore src_port in the tunnel_key,
1308	 * since it is not required for decap filters.
1309	 */
1310	decap_key->tp_src = 0;
1311	decap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->decap_table,
1312					     &tc_info->decap_ht_params,
1313					     decap_key);
1314	if (!decap_node)
1315		return -ENOMEM;
1316
1317	flow_node->decap_node = decap_node;
1318
1319	if (decap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1320		goto done;
1321
1322	/* Resolve the L2 fields for tunnel decap
1323	 * Resolve the route for remote vtep (saddr) of the decap key
1324	 * Find it's next-hop mac addrs
1325	 */
1326	tun_key.u.ipv4.dst = flow->tun_key.u.ipv4.src;
1327	tun_key.tp_dst = flow->tun_key.tp_dst;
1328	rc = bnxt_tc_resolve_tunnel_hdrs(bp, &tun_key, &l2_info);
1329	if (rc)
1330		goto put_decap;
1331
1332	decap_l2_info = &decap_node->l2_info;
1333	/* decap smac is wildcarded */
1334	ether_addr_copy(decap_l2_info->dmac, l2_info.smac);
1335	if (l2_info.num_vlans) {
1336		decap_l2_info->num_vlans = l2_info.num_vlans;
1337		decap_l2_info->inner_vlan_tpid = l2_info.inner_vlan_tpid;
1338		decap_l2_info->inner_vlan_tci = l2_info.inner_vlan_tci;
1339	}
1340	flow->flags |= BNXT_TC_FLOW_FLAGS_TUNL_ETH_ADDRS;
1341
1342	/* For getting a decap_filter_handle we first need to check if
1343	 * there are any other decap flows that share the same tunnel L2
1344	 * key and if so, pass that flow's decap_filter_handle as the
1345	 * ref_decap_handle for this flow.
1346	 */
1347	rc = bnxt_tc_get_ref_decap_handle(bp, flow, decap_l2_info, flow_node,
1348					  &ref_decap_handle);
1349	if (rc)
1350		goto put_decap;
1351
1352	/* Issue the hwrm cmd to allocate a decap filter handle */
1353	rc = hwrm_cfa_decap_filter_alloc(bp, flow, decap_l2_info,
1354					 ref_decap_handle,
1355					 &decap_node->tunnel_handle);
1356	if (rc)
1357		goto put_decap_l2;
1358
1359done:
1360	*decap_filter_handle = decap_node->tunnel_handle;
1361	return 0;
1362
1363put_decap_l2:
1364	bnxt_tc_put_decap_l2_node(bp, flow_node);
1365put_decap:
1366	bnxt_tc_put_tunnel_node(bp, &tc_info->decap_table,
1367				&tc_info->decap_ht_params,
1368				flow_node->decap_node);
1369	return rc;
1370}
1371
1372static void bnxt_tc_put_encap_handle(struct bnxt *bp,
1373				     struct bnxt_tc_tunnel_node *encap_node)
1374{
1375	__le32 encap_handle = encap_node->tunnel_handle;
1376	struct bnxt_tc_info *tc_info = bp->tc_info;
1377	int rc;
1378
1379	rc = bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1380				     &tc_info->encap_ht_params, encap_node);
1381	if (!rc && encap_handle != INVALID_TUNNEL_HANDLE)
1382		hwrm_cfa_encap_record_free(bp, encap_handle);
1383}
1384
1385/* Lookup the tunnel encap table and check if there's an encap_handle
1386 * alloc'd already.
1387 * If not, query L2 info via a route lookup and issue an encap_record_alloc
1388 * cmd to FW.
1389 */
1390static int bnxt_tc_get_encap_handle(struct bnxt *bp, struct bnxt_tc_flow *flow,
1391				    struct bnxt_tc_flow_node *flow_node,
1392				    __le32 *encap_handle)
1393{
1394	struct ip_tunnel_key *encap_key = &flow->actions.tun_encap_key;
1395	struct bnxt_tc_info *tc_info = bp->tc_info;
1396	struct bnxt_tc_tunnel_node *encap_node;
1397	int rc;
1398
1399	/* Check if there's another flow using the same tunnel encap.
1400	 * If not, add this tunnel to the table and resolve the other
1401	 * tunnel header fileds
1402	 */
1403	encap_node = bnxt_tc_get_tunnel_node(bp, &tc_info->encap_table,
1404					     &tc_info->encap_ht_params,
1405					     encap_key);
1406	if (!encap_node)
1407		return -ENOMEM;
1408
1409	flow_node->encap_node = encap_node;
1410
1411	if (encap_node->tunnel_handle != INVALID_TUNNEL_HANDLE)
1412		goto done;
1413
1414	rc = bnxt_tc_resolve_tunnel_hdrs(bp, encap_key, &encap_node->l2_info);
1415	if (rc)
1416		goto put_encap;
1417
1418	/* Allocate a new tunnel encap record */
1419	rc = hwrm_cfa_encap_record_alloc(bp, encap_key, &encap_node->l2_info,
1420					 &encap_node->tunnel_handle);
1421	if (rc)
1422		goto put_encap;
1423
1424done:
1425	*encap_handle = encap_node->tunnel_handle;
1426	return 0;
1427
1428put_encap:
1429	bnxt_tc_put_tunnel_node(bp, &tc_info->encap_table,
1430				&tc_info->encap_ht_params, encap_node);
1431	return rc;
1432}
1433
1434static void bnxt_tc_put_tunnel_handle(struct bnxt *bp,
1435				      struct bnxt_tc_flow *flow,
1436				      struct bnxt_tc_flow_node *flow_node)
1437{
1438	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1439		bnxt_tc_put_decap_handle(bp, flow_node);
1440	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1441		bnxt_tc_put_encap_handle(bp, flow_node->encap_node);
1442}
1443
1444static int bnxt_tc_get_tunnel_handle(struct bnxt *bp,
1445				     struct bnxt_tc_flow *flow,
1446				     struct bnxt_tc_flow_node *flow_node,
1447				     __le32 *tunnel_handle)
1448{
1449	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1450		return bnxt_tc_get_decap_handle(bp, flow, flow_node,
1451						tunnel_handle);
1452	else if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_ENCAP)
1453		return bnxt_tc_get_encap_handle(bp, flow, flow_node,
1454						tunnel_handle);
1455	else
1456		return 0;
1457}
1458static int __bnxt_tc_del_flow(struct bnxt *bp,
1459			      struct bnxt_tc_flow_node *flow_node)
1460{
1461	struct bnxt_tc_info *tc_info = bp->tc_info;
1462	int rc;
1463
1464	/* send HWRM cmd to free the flow-id */
1465	bnxt_hwrm_cfa_flow_free(bp, flow_node);
1466
1467	mutex_lock(&tc_info->lock);
1468
1469	/* release references to any tunnel encap/decap nodes */
1470	bnxt_tc_put_tunnel_handle(bp, &flow_node->flow, flow_node);
1471
1472	/* release reference to l2 node */
1473	bnxt_tc_put_l2_node(bp, flow_node);
1474
1475	mutex_unlock(&tc_info->lock);
1476
1477	rc = rhashtable_remove_fast(&tc_info->flow_table, &flow_node->node,
1478				    tc_info->flow_ht_params);
1479	if (rc)
1480		netdev_err(bp->dev, "Error: %s: rhashtable_remove_fast rc=%d\n",
1481			   __func__, rc);
1482
1483	kfree_rcu(flow_node, rcu);
1484	return 0;
1485}
1486
1487static void bnxt_tc_set_flow_dir(struct bnxt *bp, struct bnxt_tc_flow *flow,
1488				 u16 src_fid)
1489{
1490	flow->l2_key.dir = (bp->pf.fw_fid == src_fid) ? BNXT_DIR_RX : BNXT_DIR_TX;
1491}
1492
1493static void bnxt_tc_set_src_fid(struct bnxt *bp, struct bnxt_tc_flow *flow,
1494				u16 src_fid)
1495{
1496	if (flow->actions.flags & BNXT_TC_ACTION_FLAG_TUNNEL_DECAP)
1497		flow->src_fid = bp->pf.fw_fid;
1498	else
1499		flow->src_fid = src_fid;
1500}
1501
1502/* Add a new flow or replace an existing flow.
1503 * Notes on locking:
1504 * There are essentially two critical sections here.
1505 * 1. while adding a new flow
1506 *    a) lookup l2-key
1507 *    b) issue HWRM cmd and get flow_handle
1508 *    c) link l2-key with flow
1509 * 2. while deleting a flow
1510 *    a) unlinking l2-key from flow
1511 * A lock is needed to protect these two critical sections.
1512 *
1513 * The hash-tables are already protected by the rhashtable API.
1514 */
1515static int bnxt_tc_add_flow(struct bnxt *bp, u16 src_fid,
1516			    struct flow_cls_offload *tc_flow_cmd)
1517{
1518	struct bnxt_tc_flow_node *new_node, *old_node;
1519	struct bnxt_tc_info *tc_info = bp->tc_info;
1520	struct bnxt_tc_flow *flow;
1521	__le32 tunnel_handle = 0;
1522	__le16 ref_flow_handle;
1523	int rc;
1524
1525	/* allocate memory for the new flow and it's node */
1526	new_node = kzalloc(sizeof(*new_node), GFP_KERNEL);
1527	if (!new_node) {
1528		rc = -ENOMEM;
1529		goto done;
1530	}
1531	new_node->cookie = tc_flow_cmd->cookie;
1532	flow = &new_node->flow;
1533
1534	rc = bnxt_tc_parse_flow(bp, tc_flow_cmd, flow);
1535	if (rc)
1536		goto free_node;
1537
1538	bnxt_tc_set_src_fid(bp, flow, src_fid);
1539	bnxt_tc_set_flow_dir(bp, flow, flow->src_fid);
1540
1541	if (!bnxt_tc_can_offload(bp, flow)) {
1542		rc = -EOPNOTSUPP;
1543		kfree_rcu(new_node, rcu);
1544		return rc;
1545	}
1546
1547	/* If a flow exists with the same cookie, delete it */
1548	old_node = rhashtable_lookup_fast(&tc_info->flow_table,
1549					  &tc_flow_cmd->cookie,
1550					  tc_info->flow_ht_params);
1551	if (old_node)
1552		__bnxt_tc_del_flow(bp, old_node);
1553
1554	/* Check if the L2 part of the flow has been offloaded already.
1555	 * If so, bump up it's refcnt and get it's reference handle.
1556	 */
1557	mutex_lock(&tc_info->lock);
1558	rc = bnxt_tc_get_ref_flow_handle(bp, flow, new_node, &ref_flow_handle);
1559	if (rc)
1560		goto unlock;
1561
1562	/* If the flow involves tunnel encap/decap, get tunnel_handle */
1563	rc = bnxt_tc_get_tunnel_handle(bp, flow, new_node, &tunnel_handle);
1564	if (rc)
1565		goto put_l2;
1566
1567	/* send HWRM cmd to alloc the flow */
1568	rc = bnxt_hwrm_cfa_flow_alloc(bp, flow, ref_flow_handle,
1569				      tunnel_handle, new_node);
1570	if (rc)
1571		goto put_tunnel;
1572
1573	flow->lastused = jiffies;
1574	spin_lock_init(&flow->stats_lock);
1575	/* add new flow to flow-table */
1576	rc = rhashtable_insert_fast(&tc_info->flow_table, &new_node->node,
1577				    tc_info->flow_ht_params);
1578	if (rc)
1579		goto hwrm_flow_free;
1580
1581	mutex_unlock(&tc_info->lock);
1582	return 0;
1583
1584hwrm_flow_free:
1585	bnxt_hwrm_cfa_flow_free(bp, new_node);
1586put_tunnel:
1587	bnxt_tc_put_tunnel_handle(bp, flow, new_node);
1588put_l2:
1589	bnxt_tc_put_l2_node(bp, new_node);
1590unlock:
1591	mutex_unlock(&tc_info->lock);
1592free_node:
1593	kfree_rcu(new_node, rcu);
1594done:
1595	netdev_err(bp->dev, "Error: %s: cookie=0x%lx error=%d\n",
1596		   __func__, tc_flow_cmd->cookie, rc);
1597	return rc;
1598}
1599
1600static int bnxt_tc_del_flow(struct bnxt *bp,
1601			    struct flow_cls_offload *tc_flow_cmd)
1602{
1603	struct bnxt_tc_info *tc_info = bp->tc_info;
1604	struct bnxt_tc_flow_node *flow_node;
1605
1606	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1607					   &tc_flow_cmd->cookie,
1608					   tc_info->flow_ht_params);
1609	if (!flow_node)
1610		return -EINVAL;
1611
1612	return __bnxt_tc_del_flow(bp, flow_node);
1613}
1614
1615static int bnxt_tc_get_flow_stats(struct bnxt *bp,
1616				  struct flow_cls_offload *tc_flow_cmd)
1617{
1618	struct bnxt_tc_flow_stats stats, *curr_stats, *prev_stats;
1619	struct bnxt_tc_info *tc_info = bp->tc_info;
1620	struct bnxt_tc_flow_node *flow_node;
1621	struct bnxt_tc_flow *flow;
1622	unsigned long lastused;
1623
1624	flow_node = rhashtable_lookup_fast(&tc_info->flow_table,
1625					   &tc_flow_cmd->cookie,
1626					   tc_info->flow_ht_params);
1627	if (!flow_node)
1628		return -1;
1629
1630	flow = &flow_node->flow;
1631	curr_stats = &flow->stats;
1632	prev_stats = &flow->prev_stats;
1633
1634	spin_lock(&flow->stats_lock);
1635	stats.packets = curr_stats->packets - prev_stats->packets;
1636	stats.bytes = curr_stats->bytes - prev_stats->bytes;
1637	*prev_stats = *curr_stats;
1638	lastused = flow->lastused;
1639	spin_unlock(&flow->stats_lock);
1640
1641	flow_stats_update(&tc_flow_cmd->stats, stats.bytes, stats.packets, 0,
1642			  lastused, FLOW_ACTION_HW_STATS_DELAYED);
1643	return 0;
1644}
1645
1646static void bnxt_fill_cfa_stats_req(struct bnxt *bp,
1647				    struct bnxt_tc_flow_node *flow_node,
1648				    __le16 *flow_handle, __le32 *flow_id)
1649{
1650	u16 handle;
1651
1652	if (bp->fw_cap & BNXT_FW_CAP_OVS_64BIT_HANDLE) {
1653		*flow_id = flow_node->flow_id;
1654
1655		/* If flow_id is used to fetch flow stats then:
1656		 * 1. lower 12 bits of flow_handle must be set to all 1s.
1657		 * 2. 15th bit of flow_handle must specify the flow
1658		 *    direction (TX/RX).
1659		 */
1660		if (flow_node->flow.l2_key.dir == BNXT_DIR_RX)
1661			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_DIR_RX |
1662				 CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1663		else
1664			handle = CFA_FLOW_INFO_REQ_FLOW_HANDLE_MAX_MASK;
1665
1666		*flow_handle = cpu_to_le16(handle);
1667	} else {
1668		*flow_handle = flow_node->flow_handle;
1669	}
1670}
1671
1672static int
1673bnxt_hwrm_cfa_flow_stats_get(struct bnxt *bp, int num_flows,
1674			     struct bnxt_tc_stats_batch stats_batch[])
1675{
1676	struct hwrm_cfa_flow_stats_input req = { 0 };
1677	struct hwrm_cfa_flow_stats_output *resp;
1678	__le16 *req_flow_handles = &req.flow_handle_0;
1679	__le32 *req_flow_ids = &req.flow_id_0;
1680	int rc, i;
1681
1682	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_FLOW_STATS, -1, -1);
1683	req.num_flows = cpu_to_le16(num_flows);
1684	for (i = 0; i < num_flows; i++) {
1685		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1686
1687		bnxt_fill_cfa_stats_req(bp, flow_node,
1688					&req_flow_handles[i], &req_flow_ids[i]);
1689	}
1690
1691	mutex_lock(&bp->hwrm_cmd_lock);
1692	rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
1693	if (!rc) {
1694		__le64 *resp_packets;
1695		__le64 *resp_bytes;
1696
1697		resp = bnxt_get_hwrm_resp_addr(bp, &req);
1698		resp_packets = &resp->packet_0;
1699		resp_bytes = &resp->byte_0;
1700
1701		for (i = 0; i < num_flows; i++) {
1702			stats_batch[i].hw_stats.packets =
1703						le64_to_cpu(resp_packets[i]);
1704			stats_batch[i].hw_stats.bytes =
1705						le64_to_cpu(resp_bytes[i]);
1706		}
1707	} else {
1708		netdev_info(bp->dev, "error rc=%d\n", rc);
1709	}
1710	mutex_unlock(&bp->hwrm_cmd_lock);
1711
1712	return rc;
1713}
1714
1715/* Add val to accum while handling a possible wraparound
1716 * of val. Eventhough val is of type u64, its actual width
1717 * is denoted by mask and will wrap-around beyond that width.
1718 */
1719static void accumulate_val(u64 *accum, u64 val, u64 mask)
1720{
1721#define low_bits(x, mask)		((x) & (mask))
1722#define high_bits(x, mask)		((x) & ~(mask))
1723	bool wrapped = val < low_bits(*accum, mask);
1724
1725	*accum = high_bits(*accum, mask) + val;
1726	if (wrapped)
1727		*accum += (mask + 1);
1728}
1729
1730/* The HW counters' width is much less than 64bits.
1731 * Handle possible wrap-around while updating the stat counters
1732 */
1733static void bnxt_flow_stats_accum(struct bnxt_tc_info *tc_info,
1734				  struct bnxt_tc_flow_stats *acc_stats,
1735				  struct bnxt_tc_flow_stats *hw_stats)
1736{
1737	accumulate_val(&acc_stats->bytes, hw_stats->bytes, tc_info->bytes_mask);
1738	accumulate_val(&acc_stats->packets, hw_stats->packets,
1739		       tc_info->packets_mask);
1740}
1741
1742static int
1743bnxt_tc_flow_stats_batch_update(struct bnxt *bp, int num_flows,
1744				struct bnxt_tc_stats_batch stats_batch[])
1745{
1746	struct bnxt_tc_info *tc_info = bp->tc_info;
1747	int rc, i;
1748
1749	rc = bnxt_hwrm_cfa_flow_stats_get(bp, num_flows, stats_batch);
1750	if (rc)
1751		return rc;
1752
1753	for (i = 0; i < num_flows; i++) {
1754		struct bnxt_tc_flow_node *flow_node = stats_batch[i].flow_node;
1755		struct bnxt_tc_flow *flow = &flow_node->flow;
1756
1757		spin_lock(&flow->stats_lock);
1758		bnxt_flow_stats_accum(tc_info, &flow->stats,
1759				      &stats_batch[i].hw_stats);
1760		if (flow->stats.packets != flow->prev_stats.packets)
1761			flow->lastused = jiffies;
1762		spin_unlock(&flow->stats_lock);
1763	}
1764
1765	return 0;
1766}
1767
1768static int
1769bnxt_tc_flow_stats_batch_prep(struct bnxt *bp,
1770			      struct bnxt_tc_stats_batch stats_batch[],
1771			      int *num_flows)
1772{
1773	struct bnxt_tc_info *tc_info = bp->tc_info;
1774	struct rhashtable_iter *iter = &tc_info->iter;
1775	void *flow_node;
1776	int rc, i;
1777
1778	rhashtable_walk_start(iter);
1779
1780	rc = 0;
1781	for (i = 0; i < BNXT_FLOW_STATS_BATCH_MAX; i++) {
1782		flow_node = rhashtable_walk_next(iter);
1783		if (IS_ERR(flow_node)) {
1784			i = 0;
1785			if (PTR_ERR(flow_node) == -EAGAIN) {
1786				continue;
1787			} else {
1788				rc = PTR_ERR(flow_node);
1789				goto done;
1790			}
1791		}
1792
1793		/* No more flows */
1794		if (!flow_node)
1795			goto done;
1796
1797		stats_batch[i].flow_node = flow_node;
1798	}
1799done:
1800	rhashtable_walk_stop(iter);
1801	*num_flows = i;
1802	return rc;
1803}
1804
1805void bnxt_tc_flow_stats_work(struct bnxt *bp)
1806{
1807	struct bnxt_tc_info *tc_info = bp->tc_info;
1808	int num_flows, rc;
1809
1810	num_flows = atomic_read(&tc_info->flow_table.nelems);
1811	if (!num_flows)
1812		return;
1813
1814	rhashtable_walk_enter(&tc_info->flow_table, &tc_info->iter);
1815
1816	for (;;) {
1817		rc = bnxt_tc_flow_stats_batch_prep(bp, tc_info->stats_batch,
1818						   &num_flows);
1819		if (rc) {
1820			if (rc == -EAGAIN)
1821				continue;
1822			break;
1823		}
1824
1825		if (!num_flows)
1826			break;
1827
1828		bnxt_tc_flow_stats_batch_update(bp, num_flows,
1829						tc_info->stats_batch);
1830	}
1831
1832	rhashtable_walk_exit(&tc_info->iter);
1833}
1834
1835int bnxt_tc_setup_flower(struct bnxt *bp, u16 src_fid,
1836			 struct flow_cls_offload *cls_flower)
1837{
1838	switch (cls_flower->command) {
1839	case FLOW_CLS_REPLACE:
1840		return bnxt_tc_add_flow(bp, src_fid, cls_flower);
1841	case FLOW_CLS_DESTROY:
1842		return bnxt_tc_del_flow(bp, cls_flower);
1843	case FLOW_CLS_STATS:
1844		return bnxt_tc_get_flow_stats(bp, cls_flower);
1845	default:
1846		return -EOPNOTSUPP;
1847	}
1848}
1849
1850static int bnxt_tc_setup_indr_block_cb(enum tc_setup_type type,
1851				       void *type_data, void *cb_priv)
1852{
1853	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1854	struct flow_cls_offload *flower = type_data;
1855	struct bnxt *bp = priv->bp;
1856
1857	if (!tc_cls_can_offload_and_chain0(bp->dev, type_data))
1858		return -EOPNOTSUPP;
1859
1860	switch (type) {
1861	case TC_SETUP_CLSFLOWER:
1862		return bnxt_tc_setup_flower(bp, bp->pf.fw_fid, flower);
1863	default:
1864		return -EOPNOTSUPP;
1865	}
1866}
1867
1868static struct bnxt_flower_indr_block_cb_priv *
1869bnxt_tc_indr_block_cb_lookup(struct bnxt *bp, struct net_device *netdev)
1870{
1871	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1872
1873	list_for_each_entry(cb_priv, &bp->tc_indr_block_list, list)
1874		if (cb_priv->tunnel_netdev == netdev)
1875			return cb_priv;
1876
1877	return NULL;
1878}
1879
1880static void bnxt_tc_setup_indr_rel(void *cb_priv)
1881{
1882	struct bnxt_flower_indr_block_cb_priv *priv = cb_priv;
1883
1884	list_del(&priv->list);
1885	kfree(priv);
1886}
1887
1888static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct Qdisc *sch, struct bnxt *bp,
1889				    struct flow_block_offload *f, void *data,
1890				    void (*cleanup)(struct flow_block_cb *block_cb))
1891{
1892	struct bnxt_flower_indr_block_cb_priv *cb_priv;
1893	struct flow_block_cb *block_cb;
1894
1895	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
1896		return -EOPNOTSUPP;
1897
1898	switch (f->command) {
1899	case FLOW_BLOCK_BIND:
1900		cb_priv = kmalloc(sizeof(*cb_priv), GFP_KERNEL);
1901		if (!cb_priv)
1902			return -ENOMEM;
1903
1904		cb_priv->tunnel_netdev = netdev;
1905		cb_priv->bp = bp;
1906		list_add(&cb_priv->list, &bp->tc_indr_block_list);
1907
1908		block_cb = flow_indr_block_cb_alloc(bnxt_tc_setup_indr_block_cb,
1909						    cb_priv, cb_priv,
1910						    bnxt_tc_setup_indr_rel, f,
1911						    netdev, sch, data, bp, cleanup);
1912		if (IS_ERR(block_cb)) {
1913			list_del(&cb_priv->list);
1914			kfree(cb_priv);
1915			return PTR_ERR(block_cb);
1916		}
1917
1918		flow_block_cb_add(block_cb, f);
1919		list_add_tail(&block_cb->driver_list, &bnxt_block_cb_list);
1920		break;
1921	case FLOW_BLOCK_UNBIND:
1922		cb_priv = bnxt_tc_indr_block_cb_lookup(bp, netdev);
1923		if (!cb_priv)
1924			return -ENOENT;
1925
1926		block_cb = flow_block_cb_lookup(f->block,
1927						bnxt_tc_setup_indr_block_cb,
1928						cb_priv);
1929		if (!block_cb)
1930			return -ENOENT;
1931
1932		flow_indr_block_cb_remove(block_cb, f);
1933		list_del(&block_cb->driver_list);
1934		break;
1935	default:
1936		return -EOPNOTSUPP;
1937	}
1938	return 0;
1939}
1940
1941static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
1942{
1943	return netif_is_vxlan(netdev);
1944}
1945
1946static int bnxt_tc_setup_indr_cb(struct net_device *netdev, struct Qdisc *sch, void *cb_priv,
1947				 enum tc_setup_type type, void *type_data,
1948				 void *data,
1949				 void (*cleanup)(struct flow_block_cb *block_cb))
1950{
1951	if (!bnxt_is_netdev_indr_offload(netdev))
1952		return -EOPNOTSUPP;
1953
1954	switch (type) {
1955	case TC_SETUP_BLOCK:
1956		return bnxt_tc_setup_indr_block(netdev, sch, cb_priv, type_data, data, cleanup);
1957	default:
1958		break;
1959	}
1960
1961	return -EOPNOTSUPP;
1962}
1963
1964static const struct rhashtable_params bnxt_tc_flow_ht_params = {
1965	.head_offset = offsetof(struct bnxt_tc_flow_node, node),
1966	.key_offset = offsetof(struct bnxt_tc_flow_node, cookie),
1967	.key_len = sizeof(((struct bnxt_tc_flow_node *)0)->cookie),
1968	.automatic_shrinking = true
1969};
1970
1971static const struct rhashtable_params bnxt_tc_l2_ht_params = {
1972	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1973	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1974	.key_len = BNXT_TC_L2_KEY_LEN,
1975	.automatic_shrinking = true
1976};
1977
1978static const struct rhashtable_params bnxt_tc_decap_l2_ht_params = {
1979	.head_offset = offsetof(struct bnxt_tc_l2_node, node),
1980	.key_offset = offsetof(struct bnxt_tc_l2_node, key),
1981	.key_len = BNXT_TC_L2_KEY_LEN,
1982	.automatic_shrinking = true
1983};
1984
1985static const struct rhashtable_params bnxt_tc_tunnel_ht_params = {
1986	.head_offset = offsetof(struct bnxt_tc_tunnel_node, node),
1987	.key_offset = offsetof(struct bnxt_tc_tunnel_node, key),
1988	.key_len = sizeof(struct ip_tunnel_key),
1989	.automatic_shrinking = true
1990};
1991
1992/* convert counter width in bits to a mask */
1993#define mask(width)		((u64)~0 >> (64 - (width)))
1994
1995int bnxt_init_tc(struct bnxt *bp)
1996{
1997	struct bnxt_tc_info *tc_info;
1998	int rc;
1999
2000	if (bp->hwrm_spec_code < 0x10803)
2001		return 0;
2002
2003	tc_info = kzalloc(sizeof(*tc_info), GFP_KERNEL);
2004	if (!tc_info)
2005		return -ENOMEM;
2006	mutex_init(&tc_info->lock);
2007
2008	/* Counter widths are programmed by FW */
2009	tc_info->bytes_mask = mask(36);
2010	tc_info->packets_mask = mask(28);
2011
2012	tc_info->flow_ht_params = bnxt_tc_flow_ht_params;
2013	rc = rhashtable_init(&tc_info->flow_table, &tc_info->flow_ht_params);
2014	if (rc)
2015		goto free_tc_info;
2016
2017	tc_info->l2_ht_params = bnxt_tc_l2_ht_params;
2018	rc = rhashtable_init(&tc_info->l2_table, &tc_info->l2_ht_params);
2019	if (rc)
2020		goto destroy_flow_table;
2021
2022	tc_info->decap_l2_ht_params = bnxt_tc_decap_l2_ht_params;
2023	rc = rhashtable_init(&tc_info->decap_l2_table,
2024			     &tc_info->decap_l2_ht_params);
2025	if (rc)
2026		goto destroy_l2_table;
2027
2028	tc_info->decap_ht_params = bnxt_tc_tunnel_ht_params;
2029	rc = rhashtable_init(&tc_info->decap_table,
2030			     &tc_info->decap_ht_params);
2031	if (rc)
2032		goto destroy_decap_l2_table;
2033
2034	tc_info->encap_ht_params = bnxt_tc_tunnel_ht_params;
2035	rc = rhashtable_init(&tc_info->encap_table,
2036			     &tc_info->encap_ht_params);
2037	if (rc)
2038		goto destroy_decap_table;
2039
2040	tc_info->enabled = true;
2041	bp->dev->hw_features |= NETIF_F_HW_TC;
2042	bp->dev->features |= NETIF_F_HW_TC;
2043	bp->tc_info = tc_info;
2044
2045	/* init indirect block notifications */
2046	INIT_LIST_HEAD(&bp->tc_indr_block_list);
2047
2048	rc = flow_indr_dev_register(bnxt_tc_setup_indr_cb, bp);
2049	if (!rc)
2050		return 0;
2051
2052	rhashtable_destroy(&tc_info->encap_table);
2053
2054destroy_decap_table:
2055	rhashtable_destroy(&tc_info->decap_table);
2056destroy_decap_l2_table:
2057	rhashtable_destroy(&tc_info->decap_l2_table);
2058destroy_l2_table:
2059	rhashtable_destroy(&tc_info->l2_table);
2060destroy_flow_table:
2061	rhashtable_destroy(&tc_info->flow_table);
2062free_tc_info:
2063	kfree(tc_info);
2064	bp->tc_info = NULL;
2065	return rc;
2066}
2067
2068void bnxt_shutdown_tc(struct bnxt *bp)
2069{
2070	struct bnxt_tc_info *tc_info = bp->tc_info;
2071
2072	if (!bnxt_tc_flower_enabled(bp))
2073		return;
2074
2075	flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp,
2076				 bnxt_tc_setup_indr_rel);
2077	rhashtable_destroy(&tc_info->flow_table);
2078	rhashtable_destroy(&tc_info->l2_table);
2079	rhashtable_destroy(&tc_info->decap_l2_table);
2080	rhashtable_destroy(&tc_info->decap_table);
2081	rhashtable_destroy(&tc_info->encap_table);
2082	kfree(tc_info);
2083	bp->tc_info = NULL;
2084}
2085