1// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
2/* Copyright (c) 2016-2018 Mellanox Technologies. All rights reserved */
3
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/rhashtable.h>
7#include <linux/bitops.h>
8#include <linux/in6.h>
9#include <linux/notifier.h>
10#include <linux/inetdevice.h>
11#include <linux/netdevice.h>
12#include <linux/if_bridge.h>
13#include <linux/socket.h>
14#include <linux/route.h>
15#include <linux/gcd.h>
16#include <linux/if_macvlan.h>
17#include <linux/refcount.h>
18#include <linux/jhash.h>
19#include <linux/net_namespace.h>
20#include <linux/mutex.h>
21#include <net/netevent.h>
22#include <net/neighbour.h>
23#include <net/arp.h>
24#include <net/ip_fib.h>
25#include <net/ip6_fib.h>
26#include <net/nexthop.h>
27#include <net/fib_rules.h>
28#include <net/ip_tunnels.h>
29#include <net/l3mdev.h>
30#include <net/addrconf.h>
31#include <net/ndisc.h>
32#include <net/ipv6.h>
33#include <net/fib_notifier.h>
34#include <net/switchdev.h>
35
36#include "spectrum.h"
37#include "core.h"
38#include "reg.h"
39#include "spectrum_cnt.h"
40#include "spectrum_dpipe.h"
41#include "spectrum_ipip.h"
42#include "spectrum_mr.h"
43#include "spectrum_mr_tcam.h"
44#include "spectrum_router.h"
45#include "spectrum_span.h"
46
47struct mlxsw_sp_fib;
48struct mlxsw_sp_vr;
49struct mlxsw_sp_lpm_tree;
50struct mlxsw_sp_rif_ops;
51
52struct mlxsw_sp_rif {
53	struct list_head nexthop_list;
54	struct list_head neigh_list;
55	struct net_device *dev; /* NULL for underlay RIF */
56	struct mlxsw_sp_fid *fid;
57	unsigned char addr[ETH_ALEN];
58	int mtu;
59	u16 rif_index;
60	u16 vr_id;
61	const struct mlxsw_sp_rif_ops *ops;
62	struct mlxsw_sp *mlxsw_sp;
63
64	unsigned int counter_ingress;
65	bool counter_ingress_valid;
66	unsigned int counter_egress;
67	bool counter_egress_valid;
68};
69
70struct mlxsw_sp_rif_params {
71	struct net_device *dev;
72	union {
73		u16 system_port;
74		u16 lag_id;
75	};
76	u16 vid;
77	bool lag;
78};
79
80struct mlxsw_sp_rif_subport {
81	struct mlxsw_sp_rif common;
82	refcount_t ref_count;
83	union {
84		u16 system_port;
85		u16 lag_id;
86	};
87	u16 vid;
88	bool lag;
89};
90
91struct mlxsw_sp_rif_ipip_lb {
92	struct mlxsw_sp_rif common;
93	struct mlxsw_sp_rif_ipip_lb_config lb_config;
94	u16 ul_vr_id; /* Reserved for Spectrum-2. */
95	u16 ul_rif_id; /* Reserved for Spectrum. */
96};
97
98struct mlxsw_sp_rif_params_ipip_lb {
99	struct mlxsw_sp_rif_params common;
100	struct mlxsw_sp_rif_ipip_lb_config lb_config;
101};
102
103struct mlxsw_sp_rif_ops {
104	enum mlxsw_sp_rif_type type;
105	size_t rif_size;
106
107	void (*setup)(struct mlxsw_sp_rif *rif,
108		      const struct mlxsw_sp_rif_params *params);
109	int (*configure)(struct mlxsw_sp_rif *rif);
110	void (*deconfigure)(struct mlxsw_sp_rif *rif);
111	struct mlxsw_sp_fid * (*fid_get)(struct mlxsw_sp_rif *rif,
112					 struct netlink_ext_ack *extack);
113	void (*fdb_del)(struct mlxsw_sp_rif *rif, const char *mac);
114};
115
116static struct mlxsw_sp_rif *
117mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
118			 const struct net_device *dev);
119static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif);
120static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree);
121static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
122				  struct mlxsw_sp_lpm_tree *lpm_tree);
123static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
124				     const struct mlxsw_sp_fib *fib,
125				     u8 tree_id);
126static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
127				       const struct mlxsw_sp_fib *fib);
128
129static unsigned int *
130mlxsw_sp_rif_p_counter_get(struct mlxsw_sp_rif *rif,
131			   enum mlxsw_sp_rif_counter_dir dir)
132{
133	switch (dir) {
134	case MLXSW_SP_RIF_COUNTER_EGRESS:
135		return &rif->counter_egress;
136	case MLXSW_SP_RIF_COUNTER_INGRESS:
137		return &rif->counter_ingress;
138	}
139	return NULL;
140}
141
142static bool
143mlxsw_sp_rif_counter_valid_get(struct mlxsw_sp_rif *rif,
144			       enum mlxsw_sp_rif_counter_dir dir)
145{
146	switch (dir) {
147	case MLXSW_SP_RIF_COUNTER_EGRESS:
148		return rif->counter_egress_valid;
149	case MLXSW_SP_RIF_COUNTER_INGRESS:
150		return rif->counter_ingress_valid;
151	}
152	return false;
153}
154
155static void
156mlxsw_sp_rif_counter_valid_set(struct mlxsw_sp_rif *rif,
157			       enum mlxsw_sp_rif_counter_dir dir,
158			       bool valid)
159{
160	switch (dir) {
161	case MLXSW_SP_RIF_COUNTER_EGRESS:
162		rif->counter_egress_valid = valid;
163		break;
164	case MLXSW_SP_RIF_COUNTER_INGRESS:
165		rif->counter_ingress_valid = valid;
166		break;
167	}
168}
169
170static int mlxsw_sp_rif_counter_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
171				     unsigned int counter_index, bool enable,
172				     enum mlxsw_sp_rif_counter_dir dir)
173{
174	char ritr_pl[MLXSW_REG_RITR_LEN];
175	bool is_egress = false;
176	int err;
177
178	if (dir == MLXSW_SP_RIF_COUNTER_EGRESS)
179		is_egress = true;
180	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
181	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
182	if (err)
183		return err;
184
185	mlxsw_reg_ritr_counter_pack(ritr_pl, counter_index, enable,
186				    is_egress);
187	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
188}
189
190int mlxsw_sp_rif_counter_value_get(struct mlxsw_sp *mlxsw_sp,
191				   struct mlxsw_sp_rif *rif,
192				   enum mlxsw_sp_rif_counter_dir dir, u64 *cnt)
193{
194	char ricnt_pl[MLXSW_REG_RICNT_LEN];
195	unsigned int *p_counter_index;
196	bool valid;
197	int err;
198
199	valid = mlxsw_sp_rif_counter_valid_get(rif, dir);
200	if (!valid)
201		return -EINVAL;
202
203	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
204	if (!p_counter_index)
205		return -EINVAL;
206	mlxsw_reg_ricnt_pack(ricnt_pl, *p_counter_index,
207			     MLXSW_REG_RICNT_OPCODE_NOP);
208	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
209	if (err)
210		return err;
211	*cnt = mlxsw_reg_ricnt_good_unicast_packets_get(ricnt_pl);
212	return 0;
213}
214
215static int mlxsw_sp_rif_counter_clear(struct mlxsw_sp *mlxsw_sp,
216				      unsigned int counter_index)
217{
218	char ricnt_pl[MLXSW_REG_RICNT_LEN];
219
220	mlxsw_reg_ricnt_pack(ricnt_pl, counter_index,
221			     MLXSW_REG_RICNT_OPCODE_CLEAR);
222	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ricnt), ricnt_pl);
223}
224
225int mlxsw_sp_rif_counter_alloc(struct mlxsw_sp *mlxsw_sp,
226			       struct mlxsw_sp_rif *rif,
227			       enum mlxsw_sp_rif_counter_dir dir)
228{
229	unsigned int *p_counter_index;
230	int err;
231
232	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
233	if (!p_counter_index)
234		return -EINVAL;
235	err = mlxsw_sp_counter_alloc(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
236				     p_counter_index);
237	if (err)
238		return err;
239
240	err = mlxsw_sp_rif_counter_clear(mlxsw_sp, *p_counter_index);
241	if (err)
242		goto err_counter_clear;
243
244	err = mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
245					*p_counter_index, true, dir);
246	if (err)
247		goto err_counter_edit;
248	mlxsw_sp_rif_counter_valid_set(rif, dir, true);
249	return 0;
250
251err_counter_edit:
252err_counter_clear:
253	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
254			      *p_counter_index);
255	return err;
256}
257
258void mlxsw_sp_rif_counter_free(struct mlxsw_sp *mlxsw_sp,
259			       struct mlxsw_sp_rif *rif,
260			       enum mlxsw_sp_rif_counter_dir dir)
261{
262	unsigned int *p_counter_index;
263
264	if (!mlxsw_sp_rif_counter_valid_get(rif, dir))
265		return;
266
267	p_counter_index = mlxsw_sp_rif_p_counter_get(rif, dir);
268	if (WARN_ON(!p_counter_index))
269		return;
270	mlxsw_sp_rif_counter_edit(mlxsw_sp, rif->rif_index,
271				  *p_counter_index, false, dir);
272	mlxsw_sp_counter_free(mlxsw_sp, MLXSW_SP_COUNTER_SUB_POOL_RIF,
273			      *p_counter_index);
274	mlxsw_sp_rif_counter_valid_set(rif, dir, false);
275}
276
277static void mlxsw_sp_rif_counters_alloc(struct mlxsw_sp_rif *rif)
278{
279	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
280	struct devlink *devlink;
281
282	devlink = priv_to_devlink(mlxsw_sp->core);
283	if (!devlink_dpipe_table_counter_enabled(devlink,
284						 MLXSW_SP_DPIPE_TABLE_NAME_ERIF))
285		return;
286	mlxsw_sp_rif_counter_alloc(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
287}
288
289static void mlxsw_sp_rif_counters_free(struct mlxsw_sp_rif *rif)
290{
291	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
292
293	mlxsw_sp_rif_counter_free(mlxsw_sp, rif, MLXSW_SP_RIF_COUNTER_EGRESS);
294}
295
296#define MLXSW_SP_PREFIX_COUNT (sizeof(struct in6_addr) * BITS_PER_BYTE + 1)
297
298struct mlxsw_sp_prefix_usage {
299	DECLARE_BITMAP(b, MLXSW_SP_PREFIX_COUNT);
300};
301
302#define mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) \
303	for_each_set_bit(prefix, (prefix_usage)->b, MLXSW_SP_PREFIX_COUNT)
304
305static bool
306mlxsw_sp_prefix_usage_eq(struct mlxsw_sp_prefix_usage *prefix_usage1,
307			 struct mlxsw_sp_prefix_usage *prefix_usage2)
308{
309	return !memcmp(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
310}
311
312static void
313mlxsw_sp_prefix_usage_cpy(struct mlxsw_sp_prefix_usage *prefix_usage1,
314			  struct mlxsw_sp_prefix_usage *prefix_usage2)
315{
316	memcpy(prefix_usage1, prefix_usage2, sizeof(*prefix_usage1));
317}
318
319static void
320mlxsw_sp_prefix_usage_set(struct mlxsw_sp_prefix_usage *prefix_usage,
321			  unsigned char prefix_len)
322{
323	set_bit(prefix_len, prefix_usage->b);
324}
325
326static void
327mlxsw_sp_prefix_usage_clear(struct mlxsw_sp_prefix_usage *prefix_usage,
328			    unsigned char prefix_len)
329{
330	clear_bit(prefix_len, prefix_usage->b);
331}
332
333struct mlxsw_sp_fib_key {
334	unsigned char addr[sizeof(struct in6_addr)];
335	unsigned char prefix_len;
336};
337
338enum mlxsw_sp_fib_entry_type {
339	MLXSW_SP_FIB_ENTRY_TYPE_REMOTE,
340	MLXSW_SP_FIB_ENTRY_TYPE_LOCAL,
341	MLXSW_SP_FIB_ENTRY_TYPE_TRAP,
342	MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE,
343	MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE,
344
345	/* This is a special case of local delivery, where a packet should be
346	 * decapsulated on reception. Note that there is no corresponding ENCAP,
347	 * because that's a type of next hop, not of FIB entry. (There can be
348	 * several next hops in a REMOTE entry, and some of them may be
349	 * encapsulating entries.)
350	 */
351	MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP,
352	MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP,
353};
354
355struct mlxsw_sp_nexthop_group;
356struct mlxsw_sp_fib_entry;
357
358struct mlxsw_sp_fib_node {
359	struct mlxsw_sp_fib_entry *fib_entry;
360	struct list_head list;
361	struct rhash_head ht_node;
362	struct mlxsw_sp_fib *fib;
363	struct mlxsw_sp_fib_key key;
364};
365
366struct mlxsw_sp_fib_entry_decap {
367	struct mlxsw_sp_ipip_entry *ipip_entry;
368	u32 tunnel_index;
369};
370
371struct mlxsw_sp_fib_entry {
372	struct mlxsw_sp_fib_node *fib_node;
373	enum mlxsw_sp_fib_entry_type type;
374	struct list_head nexthop_group_node;
375	struct mlxsw_sp_nexthop_group *nh_group;
376	struct mlxsw_sp_fib_entry_decap decap; /* Valid for decap entries. */
377};
378
379struct mlxsw_sp_fib4_entry {
380	struct mlxsw_sp_fib_entry common;
381	u32 tb_id;
382	u32 prio;
383	u8 tos;
384	u8 type;
385};
386
387struct mlxsw_sp_fib6_entry {
388	struct mlxsw_sp_fib_entry common;
389	struct list_head rt6_list;
390	unsigned int nrt6;
391};
392
393struct mlxsw_sp_rt6 {
394	struct list_head list;
395	struct fib6_info *rt;
396};
397
398struct mlxsw_sp_lpm_tree {
399	u8 id; /* tree ID */
400	unsigned int ref_count;
401	enum mlxsw_sp_l3proto proto;
402	unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT];
403	struct mlxsw_sp_prefix_usage prefix_usage;
404};
405
406struct mlxsw_sp_fib {
407	struct rhashtable ht;
408	struct list_head node_list;
409	struct mlxsw_sp_vr *vr;
410	struct mlxsw_sp_lpm_tree *lpm_tree;
411	enum mlxsw_sp_l3proto proto;
412};
413
414struct mlxsw_sp_vr {
415	u16 id; /* virtual router ID */
416	u32 tb_id; /* kernel fib table id */
417	unsigned int rif_count;
418	struct mlxsw_sp_fib *fib4;
419	struct mlxsw_sp_fib *fib6;
420	struct mlxsw_sp_mr_table *mr_table[MLXSW_SP_L3_PROTO_MAX];
421	struct mlxsw_sp_rif *ul_rif;
422	refcount_t ul_rif_refcnt;
423};
424
425static const struct rhashtable_params mlxsw_sp_fib_ht_params;
426
427static struct mlxsw_sp_fib *mlxsw_sp_fib_create(struct mlxsw_sp *mlxsw_sp,
428						struct mlxsw_sp_vr *vr,
429						enum mlxsw_sp_l3proto proto)
430{
431	struct mlxsw_sp_lpm_tree *lpm_tree;
432	struct mlxsw_sp_fib *fib;
433	int err;
434
435	lpm_tree = mlxsw_sp->router->lpm.proto_trees[proto];
436	fib = kzalloc(sizeof(*fib), GFP_KERNEL);
437	if (!fib)
438		return ERR_PTR(-ENOMEM);
439	err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params);
440	if (err)
441		goto err_rhashtable_init;
442	INIT_LIST_HEAD(&fib->node_list);
443	fib->proto = proto;
444	fib->vr = vr;
445	fib->lpm_tree = lpm_tree;
446	mlxsw_sp_lpm_tree_hold(lpm_tree);
447	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, lpm_tree->id);
448	if (err)
449		goto err_lpm_tree_bind;
450	return fib;
451
452err_lpm_tree_bind:
453	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
454err_rhashtable_init:
455	kfree(fib);
456	return ERR_PTR(err);
457}
458
459static void mlxsw_sp_fib_destroy(struct mlxsw_sp *mlxsw_sp,
460				 struct mlxsw_sp_fib *fib)
461{
462	mlxsw_sp_vr_lpm_tree_unbind(mlxsw_sp, fib);
463	mlxsw_sp_lpm_tree_put(mlxsw_sp, fib->lpm_tree);
464	WARN_ON(!list_empty(&fib->node_list));
465	rhashtable_destroy(&fib->ht);
466	kfree(fib);
467}
468
469static struct mlxsw_sp_lpm_tree *
470mlxsw_sp_lpm_tree_find_unused(struct mlxsw_sp *mlxsw_sp)
471{
472	static struct mlxsw_sp_lpm_tree *lpm_tree;
473	int i;
474
475	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
476		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
477		if (lpm_tree->ref_count == 0)
478			return lpm_tree;
479	}
480	return NULL;
481}
482
483static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp,
484				   struct mlxsw_sp_lpm_tree *lpm_tree)
485{
486	char ralta_pl[MLXSW_REG_RALTA_LEN];
487
488	mlxsw_reg_ralta_pack(ralta_pl, true,
489			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
490			     lpm_tree->id);
491	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
492}
493
494static void mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp,
495				   struct mlxsw_sp_lpm_tree *lpm_tree)
496{
497	char ralta_pl[MLXSW_REG_RALTA_LEN];
498
499	mlxsw_reg_ralta_pack(ralta_pl, false,
500			     (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto,
501			     lpm_tree->id);
502	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
503}
504
505static int
506mlxsw_sp_lpm_tree_left_struct_set(struct mlxsw_sp *mlxsw_sp,
507				  struct mlxsw_sp_prefix_usage *prefix_usage,
508				  struct mlxsw_sp_lpm_tree *lpm_tree)
509{
510	char ralst_pl[MLXSW_REG_RALST_LEN];
511	u8 root_bin = 0;
512	u8 prefix;
513	u8 last_prefix = MLXSW_REG_RALST_BIN_NO_CHILD;
514
515	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage)
516		root_bin = prefix;
517
518	mlxsw_reg_ralst_pack(ralst_pl, root_bin, lpm_tree->id);
519	mlxsw_sp_prefix_usage_for_each(prefix, prefix_usage) {
520		if (prefix == 0)
521			continue;
522		mlxsw_reg_ralst_bin_pack(ralst_pl, prefix, last_prefix,
523					 MLXSW_REG_RALST_BIN_NO_CHILD);
524		last_prefix = prefix;
525	}
526	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
527}
528
529static struct mlxsw_sp_lpm_tree *
530mlxsw_sp_lpm_tree_create(struct mlxsw_sp *mlxsw_sp,
531			 struct mlxsw_sp_prefix_usage *prefix_usage,
532			 enum mlxsw_sp_l3proto proto)
533{
534	struct mlxsw_sp_lpm_tree *lpm_tree;
535	int err;
536
537	lpm_tree = mlxsw_sp_lpm_tree_find_unused(mlxsw_sp);
538	if (!lpm_tree)
539		return ERR_PTR(-EBUSY);
540	lpm_tree->proto = proto;
541	err = mlxsw_sp_lpm_tree_alloc(mlxsw_sp, lpm_tree);
542	if (err)
543		return ERR_PTR(err);
544
545	err = mlxsw_sp_lpm_tree_left_struct_set(mlxsw_sp, prefix_usage,
546						lpm_tree);
547	if (err)
548		goto err_left_struct_set;
549	memcpy(&lpm_tree->prefix_usage, prefix_usage,
550	       sizeof(lpm_tree->prefix_usage));
551	memset(&lpm_tree->prefix_ref_count, 0,
552	       sizeof(lpm_tree->prefix_ref_count));
553	lpm_tree->ref_count = 1;
554	return lpm_tree;
555
556err_left_struct_set:
557	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
558	return ERR_PTR(err);
559}
560
561static void mlxsw_sp_lpm_tree_destroy(struct mlxsw_sp *mlxsw_sp,
562				      struct mlxsw_sp_lpm_tree *lpm_tree)
563{
564	mlxsw_sp_lpm_tree_free(mlxsw_sp, lpm_tree);
565}
566
567static struct mlxsw_sp_lpm_tree *
568mlxsw_sp_lpm_tree_get(struct mlxsw_sp *mlxsw_sp,
569		      struct mlxsw_sp_prefix_usage *prefix_usage,
570		      enum mlxsw_sp_l3proto proto)
571{
572	struct mlxsw_sp_lpm_tree *lpm_tree;
573	int i;
574
575	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
576		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
577		if (lpm_tree->ref_count != 0 &&
578		    lpm_tree->proto == proto &&
579		    mlxsw_sp_prefix_usage_eq(&lpm_tree->prefix_usage,
580					     prefix_usage)) {
581			mlxsw_sp_lpm_tree_hold(lpm_tree);
582			return lpm_tree;
583		}
584	}
585	return mlxsw_sp_lpm_tree_create(mlxsw_sp, prefix_usage, proto);
586}
587
588static void mlxsw_sp_lpm_tree_hold(struct mlxsw_sp_lpm_tree *lpm_tree)
589{
590	lpm_tree->ref_count++;
591}
592
593static void mlxsw_sp_lpm_tree_put(struct mlxsw_sp *mlxsw_sp,
594				  struct mlxsw_sp_lpm_tree *lpm_tree)
595{
596	if (--lpm_tree->ref_count == 0)
597		mlxsw_sp_lpm_tree_destroy(mlxsw_sp, lpm_tree);
598}
599
600#define MLXSW_SP_LPM_TREE_MIN 1 /* tree 0 is reserved */
601
602static int mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
603{
604	struct mlxsw_sp_prefix_usage req_prefix_usage = {{ 0 } };
605	struct mlxsw_sp_lpm_tree *lpm_tree;
606	u64 max_trees;
607	int err, i;
608
609	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LPM_TREES))
610		return -EIO;
611
612	max_trees = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LPM_TREES);
613	mlxsw_sp->router->lpm.tree_count = max_trees - MLXSW_SP_LPM_TREE_MIN;
614	mlxsw_sp->router->lpm.trees = kcalloc(mlxsw_sp->router->lpm.tree_count,
615					     sizeof(struct mlxsw_sp_lpm_tree),
616					     GFP_KERNEL);
617	if (!mlxsw_sp->router->lpm.trees)
618		return -ENOMEM;
619
620	for (i = 0; i < mlxsw_sp->router->lpm.tree_count; i++) {
621		lpm_tree = &mlxsw_sp->router->lpm.trees[i];
622		lpm_tree->id = i + MLXSW_SP_LPM_TREE_MIN;
623	}
624
625	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
626					 MLXSW_SP_L3_PROTO_IPV4);
627	if (IS_ERR(lpm_tree)) {
628		err = PTR_ERR(lpm_tree);
629		goto err_ipv4_tree_get;
630	}
631	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4] = lpm_tree;
632
633	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
634					 MLXSW_SP_L3_PROTO_IPV6);
635	if (IS_ERR(lpm_tree)) {
636		err = PTR_ERR(lpm_tree);
637		goto err_ipv6_tree_get;
638	}
639	mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6] = lpm_tree;
640
641	return 0;
642
643err_ipv6_tree_get:
644	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
645	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
646err_ipv4_tree_get:
647	kfree(mlxsw_sp->router->lpm.trees);
648	return err;
649}
650
651static void mlxsw_sp_lpm_fini(struct mlxsw_sp *mlxsw_sp)
652{
653	struct mlxsw_sp_lpm_tree *lpm_tree;
654
655	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV6];
656	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
657
658	lpm_tree = mlxsw_sp->router->lpm.proto_trees[MLXSW_SP_L3_PROTO_IPV4];
659	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
660
661	kfree(mlxsw_sp->router->lpm.trees);
662}
663
664static bool mlxsw_sp_vr_is_used(const struct mlxsw_sp_vr *vr)
665{
666	return !!vr->fib4 || !!vr->fib6 ||
667	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] ||
668	       !!vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
669}
670
671static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
672{
673	struct mlxsw_sp_vr *vr;
674	int i;
675
676	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
677		vr = &mlxsw_sp->router->vrs[i];
678		if (!mlxsw_sp_vr_is_used(vr))
679			return vr;
680	}
681	return NULL;
682}
683
684static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp,
685				     const struct mlxsw_sp_fib *fib, u8 tree_id)
686{
687	char raltb_pl[MLXSW_REG_RALTB_LEN];
688
689	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
690			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
691			     tree_id);
692	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
693}
694
695static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp,
696				       const struct mlxsw_sp_fib *fib)
697{
698	char raltb_pl[MLXSW_REG_RALTB_LEN];
699
700	/* Bind to tree 0 which is default */
701	mlxsw_reg_raltb_pack(raltb_pl, fib->vr->id,
702			     (enum mlxsw_reg_ralxx_protocol) fib->proto, 0);
703	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl);
704}
705
706static u32 mlxsw_sp_fix_tb_id(u32 tb_id)
707{
708	/* For our purpose, squash main, default and local tables into one */
709	if (tb_id == RT_TABLE_LOCAL || tb_id == RT_TABLE_DEFAULT)
710		tb_id = RT_TABLE_MAIN;
711	return tb_id;
712}
713
714static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
715					    u32 tb_id)
716{
717	struct mlxsw_sp_vr *vr;
718	int i;
719
720	tb_id = mlxsw_sp_fix_tb_id(tb_id);
721
722	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
723		vr = &mlxsw_sp->router->vrs[i];
724		if (mlxsw_sp_vr_is_used(vr) && vr->tb_id == tb_id)
725			return vr;
726	}
727	return NULL;
728}
729
730int mlxsw_sp_router_tb_id_vr_id(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
731				u16 *vr_id)
732{
733	struct mlxsw_sp_vr *vr;
734	int err = 0;
735
736	mutex_lock(&mlxsw_sp->router->lock);
737	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
738	if (!vr) {
739		err = -ESRCH;
740		goto out;
741	}
742	*vr_id = vr->id;
743out:
744	mutex_unlock(&mlxsw_sp->router->lock);
745	return err;
746}
747
748static struct mlxsw_sp_fib *mlxsw_sp_vr_fib(const struct mlxsw_sp_vr *vr,
749					    enum mlxsw_sp_l3proto proto)
750{
751	switch (proto) {
752	case MLXSW_SP_L3_PROTO_IPV4:
753		return vr->fib4;
754	case MLXSW_SP_L3_PROTO_IPV6:
755		return vr->fib6;
756	}
757	return NULL;
758}
759
760static struct mlxsw_sp_vr *mlxsw_sp_vr_create(struct mlxsw_sp *mlxsw_sp,
761					      u32 tb_id,
762					      struct netlink_ext_ack *extack)
763{
764	struct mlxsw_sp_mr_table *mr4_table, *mr6_table;
765	struct mlxsw_sp_fib *fib4;
766	struct mlxsw_sp_fib *fib6;
767	struct mlxsw_sp_vr *vr;
768	int err;
769
770	vr = mlxsw_sp_vr_find_unused(mlxsw_sp);
771	if (!vr) {
772		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported virtual routers");
773		return ERR_PTR(-EBUSY);
774	}
775	fib4 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
776	if (IS_ERR(fib4))
777		return ERR_CAST(fib4);
778	fib6 = mlxsw_sp_fib_create(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
779	if (IS_ERR(fib6)) {
780		err = PTR_ERR(fib6);
781		goto err_fib6_create;
782	}
783	mr4_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
784					     MLXSW_SP_L3_PROTO_IPV4);
785	if (IS_ERR(mr4_table)) {
786		err = PTR_ERR(mr4_table);
787		goto err_mr4_table_create;
788	}
789	mr6_table = mlxsw_sp_mr_table_create(mlxsw_sp, vr->id,
790					     MLXSW_SP_L3_PROTO_IPV6);
791	if (IS_ERR(mr6_table)) {
792		err = PTR_ERR(mr6_table);
793		goto err_mr6_table_create;
794	}
795
796	vr->fib4 = fib4;
797	vr->fib6 = fib6;
798	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = mr4_table;
799	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = mr6_table;
800	vr->tb_id = tb_id;
801	return vr;
802
803err_mr6_table_create:
804	mlxsw_sp_mr_table_destroy(mr4_table);
805err_mr4_table_create:
806	mlxsw_sp_fib_destroy(mlxsw_sp, fib6);
807err_fib6_create:
808	mlxsw_sp_fib_destroy(mlxsw_sp, fib4);
809	return ERR_PTR(err);
810}
811
812static void mlxsw_sp_vr_destroy(struct mlxsw_sp *mlxsw_sp,
813				struct mlxsw_sp_vr *vr)
814{
815	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]);
816	vr->mr_table[MLXSW_SP_L3_PROTO_IPV6] = NULL;
817	mlxsw_sp_mr_table_destroy(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]);
818	vr->mr_table[MLXSW_SP_L3_PROTO_IPV4] = NULL;
819	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib6);
820	vr->fib6 = NULL;
821	mlxsw_sp_fib_destroy(mlxsw_sp, vr->fib4);
822	vr->fib4 = NULL;
823}
824
825static struct mlxsw_sp_vr *mlxsw_sp_vr_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
826					   struct netlink_ext_ack *extack)
827{
828	struct mlxsw_sp_vr *vr;
829
830	tb_id = mlxsw_sp_fix_tb_id(tb_id);
831	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
832	if (!vr)
833		vr = mlxsw_sp_vr_create(mlxsw_sp, tb_id, extack);
834	return vr;
835}
836
837static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
838{
839	if (!vr->rif_count && list_empty(&vr->fib4->node_list) &&
840	    list_empty(&vr->fib6->node_list) &&
841	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV4]) &&
842	    mlxsw_sp_mr_table_empty(vr->mr_table[MLXSW_SP_L3_PROTO_IPV6]))
843		mlxsw_sp_vr_destroy(mlxsw_sp, vr);
844}
845
846static bool
847mlxsw_sp_vr_lpm_tree_should_replace(struct mlxsw_sp_vr *vr,
848				    enum mlxsw_sp_l3proto proto, u8 tree_id)
849{
850	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
851
852	if (!mlxsw_sp_vr_is_used(vr))
853		return false;
854	if (fib->lpm_tree->id == tree_id)
855		return true;
856	return false;
857}
858
859static int mlxsw_sp_vr_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
860					struct mlxsw_sp_fib *fib,
861					struct mlxsw_sp_lpm_tree *new_tree)
862{
863	struct mlxsw_sp_lpm_tree *old_tree = fib->lpm_tree;
864	int err;
865
866	fib->lpm_tree = new_tree;
867	mlxsw_sp_lpm_tree_hold(new_tree);
868	err = mlxsw_sp_vr_lpm_tree_bind(mlxsw_sp, fib, new_tree->id);
869	if (err)
870		goto err_tree_bind;
871	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
872	return 0;
873
874err_tree_bind:
875	mlxsw_sp_lpm_tree_put(mlxsw_sp, new_tree);
876	fib->lpm_tree = old_tree;
877	return err;
878}
879
880static int mlxsw_sp_vrs_lpm_tree_replace(struct mlxsw_sp *mlxsw_sp,
881					 struct mlxsw_sp_fib *fib,
882					 struct mlxsw_sp_lpm_tree *new_tree)
883{
884	enum mlxsw_sp_l3proto proto = fib->proto;
885	struct mlxsw_sp_lpm_tree *old_tree;
886	u8 old_id, new_id = new_tree->id;
887	struct mlxsw_sp_vr *vr;
888	int i, err;
889
890	old_tree = mlxsw_sp->router->lpm.proto_trees[proto];
891	old_id = old_tree->id;
892
893	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
894		vr = &mlxsw_sp->router->vrs[i];
895		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, old_id))
896			continue;
897		err = mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
898						   mlxsw_sp_vr_fib(vr, proto),
899						   new_tree);
900		if (err)
901			goto err_tree_replace;
902	}
903
904	memcpy(new_tree->prefix_ref_count, old_tree->prefix_ref_count,
905	       sizeof(new_tree->prefix_ref_count));
906	mlxsw_sp->router->lpm.proto_trees[proto] = new_tree;
907	mlxsw_sp_lpm_tree_put(mlxsw_sp, old_tree);
908
909	return 0;
910
911err_tree_replace:
912	for (i--; i >= 0; i--) {
913		if (!mlxsw_sp_vr_lpm_tree_should_replace(vr, proto, new_id))
914			continue;
915		mlxsw_sp_vr_lpm_tree_replace(mlxsw_sp,
916					     mlxsw_sp_vr_fib(vr, proto),
917					     old_tree);
918	}
919	return err;
920}
921
922static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
923{
924	struct mlxsw_sp_vr *vr;
925	u64 max_vrs;
926	int i;
927
928	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
929		return -EIO;
930
931	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
932	mlxsw_sp->router->vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
933					GFP_KERNEL);
934	if (!mlxsw_sp->router->vrs)
935		return -ENOMEM;
936
937	for (i = 0; i < max_vrs; i++) {
938		vr = &mlxsw_sp->router->vrs[i];
939		vr->id = i;
940	}
941
942	return 0;
943}
944
945static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
946
947static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
948{
949	/* At this stage we're guaranteed not to have new incoming
950	 * FIB notifications and the work queue is free from FIBs
951	 * sitting on top of mlxsw netdevs. However, we can still
952	 * have other FIBs queued. Flush the queue before flushing
953	 * the device's tables. No need for locks, as we're the only
954	 * writer.
955	 */
956	mlxsw_core_flush_owq();
957	mlxsw_sp_router_fib_flush(mlxsw_sp);
958	kfree(mlxsw_sp->router->vrs);
959}
960
961static struct net_device *
962__mlxsw_sp_ipip_netdev_ul_dev_get(const struct net_device *ol_dev)
963{
964	struct ip_tunnel *tun = netdev_priv(ol_dev);
965	struct net *net = dev_net(ol_dev);
966
967	return dev_get_by_index_rcu(net, tun->parms.link);
968}
969
970u32 mlxsw_sp_ipip_dev_ul_tb_id(const struct net_device *ol_dev)
971{
972	struct net_device *d;
973	u32 tb_id;
974
975	rcu_read_lock();
976	d = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
977	if (d)
978		tb_id = l3mdev_fib_table(d) ? : RT_TABLE_MAIN;
979	else
980		tb_id = RT_TABLE_MAIN;
981	rcu_read_unlock();
982
983	return tb_id;
984}
985
986static struct mlxsw_sp_rif *
987mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
988		    const struct mlxsw_sp_rif_params *params,
989		    struct netlink_ext_ack *extack);
990
991static struct mlxsw_sp_rif_ipip_lb *
992mlxsw_sp_ipip_ol_ipip_lb_create(struct mlxsw_sp *mlxsw_sp,
993				enum mlxsw_sp_ipip_type ipipt,
994				struct net_device *ol_dev,
995				struct netlink_ext_ack *extack)
996{
997	struct mlxsw_sp_rif_params_ipip_lb lb_params;
998	const struct mlxsw_sp_ipip_ops *ipip_ops;
999	struct mlxsw_sp_rif *rif;
1000
1001	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1002	lb_params = (struct mlxsw_sp_rif_params_ipip_lb) {
1003		.common.dev = ol_dev,
1004		.common.lag = false,
1005		.lb_config = ipip_ops->ol_loopback_config(mlxsw_sp, ol_dev),
1006	};
1007
1008	rif = mlxsw_sp_rif_create(mlxsw_sp, &lb_params.common, extack);
1009	if (IS_ERR(rif))
1010		return ERR_CAST(rif);
1011	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
1012}
1013
1014static struct mlxsw_sp_ipip_entry *
1015mlxsw_sp_ipip_entry_alloc(struct mlxsw_sp *mlxsw_sp,
1016			  enum mlxsw_sp_ipip_type ipipt,
1017			  struct net_device *ol_dev)
1018{
1019	const struct mlxsw_sp_ipip_ops *ipip_ops;
1020	struct mlxsw_sp_ipip_entry *ipip_entry;
1021	struct mlxsw_sp_ipip_entry *ret = NULL;
1022
1023	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipipt];
1024	ipip_entry = kzalloc(sizeof(*ipip_entry), GFP_KERNEL);
1025	if (!ipip_entry)
1026		return ERR_PTR(-ENOMEM);
1027
1028	ipip_entry->ol_lb = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp, ipipt,
1029							    ol_dev, NULL);
1030	if (IS_ERR(ipip_entry->ol_lb)) {
1031		ret = ERR_CAST(ipip_entry->ol_lb);
1032		goto err_ol_ipip_lb_create;
1033	}
1034
1035	ipip_entry->ipipt = ipipt;
1036	ipip_entry->ol_dev = ol_dev;
1037
1038	switch (ipip_ops->ul_proto) {
1039	case MLXSW_SP_L3_PROTO_IPV4:
1040		ipip_entry->parms4 = mlxsw_sp_ipip_netdev_parms4(ol_dev);
1041		break;
1042	case MLXSW_SP_L3_PROTO_IPV6:
1043		WARN_ON(1);
1044		break;
1045	}
1046
1047	return ipip_entry;
1048
1049err_ol_ipip_lb_create:
1050	kfree(ipip_entry);
1051	return ret;
1052}
1053
1054static void
1055mlxsw_sp_ipip_entry_dealloc(struct mlxsw_sp_ipip_entry *ipip_entry)
1056{
1057	mlxsw_sp_rif_destroy(&ipip_entry->ol_lb->common);
1058	kfree(ipip_entry);
1059}
1060
1061static bool
1062mlxsw_sp_ipip_entry_saddr_matches(struct mlxsw_sp *mlxsw_sp,
1063				  const enum mlxsw_sp_l3proto ul_proto,
1064				  union mlxsw_sp_l3addr saddr,
1065				  u32 ul_tb_id,
1066				  struct mlxsw_sp_ipip_entry *ipip_entry)
1067{
1068	u32 tun_ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1069	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1070	union mlxsw_sp_l3addr tun_saddr;
1071
1072	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1073		return false;
1074
1075	tun_saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1076	return tun_ul_tb_id == ul_tb_id &&
1077	       mlxsw_sp_l3addr_eq(&tun_saddr, &saddr);
1078}
1079
1080static int
1081mlxsw_sp_fib_entry_decap_init(struct mlxsw_sp *mlxsw_sp,
1082			      struct mlxsw_sp_fib_entry *fib_entry,
1083			      struct mlxsw_sp_ipip_entry *ipip_entry)
1084{
1085	u32 tunnel_index;
1086	int err;
1087
1088	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1089				  1, &tunnel_index);
1090	if (err)
1091		return err;
1092
1093	ipip_entry->decap_fib_entry = fib_entry;
1094	fib_entry->decap.ipip_entry = ipip_entry;
1095	fib_entry->decap.tunnel_index = tunnel_index;
1096	return 0;
1097}
1098
1099static void mlxsw_sp_fib_entry_decap_fini(struct mlxsw_sp *mlxsw_sp,
1100					  struct mlxsw_sp_fib_entry *fib_entry)
1101{
1102	/* Unlink this node from the IPIP entry that it's the decap entry of. */
1103	fib_entry->decap.ipip_entry->decap_fib_entry = NULL;
1104	fib_entry->decap.ipip_entry = NULL;
1105	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
1106			   1, fib_entry->decap.tunnel_index);
1107}
1108
1109static struct mlxsw_sp_fib_node *
1110mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
1111			 size_t addr_len, unsigned char prefix_len);
1112static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
1113				     struct mlxsw_sp_fib_entry *fib_entry);
1114
1115static void
1116mlxsw_sp_ipip_entry_demote_decap(struct mlxsw_sp *mlxsw_sp,
1117				 struct mlxsw_sp_ipip_entry *ipip_entry)
1118{
1119	struct mlxsw_sp_fib_entry *fib_entry = ipip_entry->decap_fib_entry;
1120
1121	mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
1122	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1123
1124	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1125}
1126
1127static void
1128mlxsw_sp_ipip_entry_promote_decap(struct mlxsw_sp *mlxsw_sp,
1129				  struct mlxsw_sp_ipip_entry *ipip_entry,
1130				  struct mlxsw_sp_fib_entry *decap_fib_entry)
1131{
1132	if (mlxsw_sp_fib_entry_decap_init(mlxsw_sp, decap_fib_entry,
1133					  ipip_entry))
1134		return;
1135	decap_fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
1136
1137	if (mlxsw_sp_fib_entry_update(mlxsw_sp, decap_fib_entry))
1138		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1139}
1140
1141static struct mlxsw_sp_fib_entry *
1142mlxsw_sp_router_ip2me_fib_entry_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
1143				     enum mlxsw_sp_l3proto proto,
1144				     const union mlxsw_sp_l3addr *addr,
1145				     enum mlxsw_sp_fib_entry_type type)
1146{
1147	struct mlxsw_sp_fib_node *fib_node;
1148	unsigned char addr_prefix_len;
1149	struct mlxsw_sp_fib *fib;
1150	struct mlxsw_sp_vr *vr;
1151	const void *addrp;
1152	size_t addr_len;
1153	u32 addr4;
1154
1155	vr = mlxsw_sp_vr_find(mlxsw_sp, tb_id);
1156	if (!vr)
1157		return NULL;
1158	fib = mlxsw_sp_vr_fib(vr, proto);
1159
1160	switch (proto) {
1161	case MLXSW_SP_L3_PROTO_IPV4:
1162		addr4 = be32_to_cpu(addr->addr4);
1163		addrp = &addr4;
1164		addr_len = 4;
1165		addr_prefix_len = 32;
1166		break;
1167	case MLXSW_SP_L3_PROTO_IPV6:
1168	default:
1169		WARN_ON(1);
1170		return NULL;
1171	}
1172
1173	fib_node = mlxsw_sp_fib_node_lookup(fib, addrp, addr_len,
1174					    addr_prefix_len);
1175	if (!fib_node || fib_node->fib_entry->type != type)
1176		return NULL;
1177
1178	return fib_node->fib_entry;
1179}
1180
1181/* Given an IPIP entry, find the corresponding decap route. */
1182static struct mlxsw_sp_fib_entry *
1183mlxsw_sp_ipip_entry_find_decap(struct mlxsw_sp *mlxsw_sp,
1184			       struct mlxsw_sp_ipip_entry *ipip_entry)
1185{
1186	static struct mlxsw_sp_fib_node *fib_node;
1187	const struct mlxsw_sp_ipip_ops *ipip_ops;
1188	unsigned char saddr_prefix_len;
1189	union mlxsw_sp_l3addr saddr;
1190	struct mlxsw_sp_fib *ul_fib;
1191	struct mlxsw_sp_vr *ul_vr;
1192	const void *saddrp;
1193	size_t saddr_len;
1194	u32 ul_tb_id;
1195	u32 saddr4;
1196
1197	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1198
1199	ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ipip_entry->ol_dev);
1200	ul_vr = mlxsw_sp_vr_find(mlxsw_sp, ul_tb_id);
1201	if (!ul_vr)
1202		return NULL;
1203
1204	ul_fib = mlxsw_sp_vr_fib(ul_vr, ipip_ops->ul_proto);
1205	saddr = mlxsw_sp_ipip_netdev_saddr(ipip_ops->ul_proto,
1206					   ipip_entry->ol_dev);
1207
1208	switch (ipip_ops->ul_proto) {
1209	case MLXSW_SP_L3_PROTO_IPV4:
1210		saddr4 = be32_to_cpu(saddr.addr4);
1211		saddrp = &saddr4;
1212		saddr_len = 4;
1213		saddr_prefix_len = 32;
1214		break;
1215	default:
1216		WARN_ON(1);
1217		return NULL;
1218	}
1219
1220	fib_node = mlxsw_sp_fib_node_lookup(ul_fib, saddrp, saddr_len,
1221					    saddr_prefix_len);
1222	if (!fib_node ||
1223	    fib_node->fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP)
1224		return NULL;
1225
1226	return fib_node->fib_entry;
1227}
1228
1229static struct mlxsw_sp_ipip_entry *
1230mlxsw_sp_ipip_entry_create(struct mlxsw_sp *mlxsw_sp,
1231			   enum mlxsw_sp_ipip_type ipipt,
1232			   struct net_device *ol_dev)
1233{
1234	struct mlxsw_sp_ipip_entry *ipip_entry;
1235
1236	ipip_entry = mlxsw_sp_ipip_entry_alloc(mlxsw_sp, ipipt, ol_dev);
1237	if (IS_ERR(ipip_entry))
1238		return ipip_entry;
1239
1240	list_add_tail(&ipip_entry->ipip_list_node,
1241		      &mlxsw_sp->router->ipip_list);
1242
1243	return ipip_entry;
1244}
1245
1246static void
1247mlxsw_sp_ipip_entry_destroy(struct mlxsw_sp *mlxsw_sp,
1248			    struct mlxsw_sp_ipip_entry *ipip_entry)
1249{
1250	list_del(&ipip_entry->ipip_list_node);
1251	mlxsw_sp_ipip_entry_dealloc(ipip_entry);
1252}
1253
1254static bool
1255mlxsw_sp_ipip_entry_matches_decap(struct mlxsw_sp *mlxsw_sp,
1256				  const struct net_device *ul_dev,
1257				  enum mlxsw_sp_l3proto ul_proto,
1258				  union mlxsw_sp_l3addr ul_dip,
1259				  struct mlxsw_sp_ipip_entry *ipip_entry)
1260{
1261	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1262	enum mlxsw_sp_ipip_type ipipt = ipip_entry->ipipt;
1263
1264	if (mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto != ul_proto)
1265		return false;
1266
1267	return mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, ul_dip,
1268						 ul_tb_id, ipip_entry);
1269}
1270
1271/* Given decap parameters, find the corresponding IPIP entry. */
1272static struct mlxsw_sp_ipip_entry *
1273mlxsw_sp_ipip_entry_find_by_decap(struct mlxsw_sp *mlxsw_sp,
1274				  const struct net_device *ul_dev,
1275				  enum mlxsw_sp_l3proto ul_proto,
1276				  union mlxsw_sp_l3addr ul_dip)
1277{
1278	struct mlxsw_sp_ipip_entry *ipip_entry;
1279
1280	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1281			    ipip_list_node)
1282		if (mlxsw_sp_ipip_entry_matches_decap(mlxsw_sp, ul_dev,
1283						      ul_proto, ul_dip,
1284						      ipip_entry))
1285			return ipip_entry;
1286
1287	return NULL;
1288}
1289
1290static bool mlxsw_sp_netdev_ipip_type(const struct mlxsw_sp *mlxsw_sp,
1291				      const struct net_device *dev,
1292				      enum mlxsw_sp_ipip_type *p_type)
1293{
1294	struct mlxsw_sp_router *router = mlxsw_sp->router;
1295	const struct mlxsw_sp_ipip_ops *ipip_ops;
1296	enum mlxsw_sp_ipip_type ipipt;
1297
1298	for (ipipt = 0; ipipt < MLXSW_SP_IPIP_TYPE_MAX; ++ipipt) {
1299		ipip_ops = router->ipip_ops_arr[ipipt];
1300		if (dev->type == ipip_ops->dev_type) {
1301			if (p_type)
1302				*p_type = ipipt;
1303			return true;
1304		}
1305	}
1306	return false;
1307}
1308
1309bool mlxsw_sp_netdev_is_ipip_ol(const struct mlxsw_sp *mlxsw_sp,
1310				const struct net_device *dev)
1311{
1312	return mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL);
1313}
1314
1315static struct mlxsw_sp_ipip_entry *
1316mlxsw_sp_ipip_entry_find_by_ol_dev(struct mlxsw_sp *mlxsw_sp,
1317				   const struct net_device *ol_dev)
1318{
1319	struct mlxsw_sp_ipip_entry *ipip_entry;
1320
1321	list_for_each_entry(ipip_entry, &mlxsw_sp->router->ipip_list,
1322			    ipip_list_node)
1323		if (ipip_entry->ol_dev == ol_dev)
1324			return ipip_entry;
1325
1326	return NULL;
1327}
1328
1329static struct mlxsw_sp_ipip_entry *
1330mlxsw_sp_ipip_entry_find_by_ul_dev(const struct mlxsw_sp *mlxsw_sp,
1331				   const struct net_device *ul_dev,
1332				   struct mlxsw_sp_ipip_entry *start)
1333{
1334	struct mlxsw_sp_ipip_entry *ipip_entry;
1335
1336	ipip_entry = list_prepare_entry(start, &mlxsw_sp->router->ipip_list,
1337					ipip_list_node);
1338	list_for_each_entry_continue(ipip_entry, &mlxsw_sp->router->ipip_list,
1339				     ipip_list_node) {
1340		struct net_device *ol_dev = ipip_entry->ol_dev;
1341		struct net_device *ipip_ul_dev;
1342
1343		rcu_read_lock();
1344		ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1345		rcu_read_unlock();
1346
1347		if (ipip_ul_dev == ul_dev)
1348			return ipip_entry;
1349	}
1350
1351	return NULL;
1352}
1353
1354bool mlxsw_sp_netdev_is_ipip_ul(struct mlxsw_sp *mlxsw_sp,
1355				const struct net_device *dev)
1356{
1357	bool is_ipip_ul;
1358
1359	mutex_lock(&mlxsw_sp->router->lock);
1360	is_ipip_ul = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp, dev, NULL);
1361	mutex_unlock(&mlxsw_sp->router->lock);
1362
1363	return is_ipip_ul;
1364}
1365
1366static bool mlxsw_sp_netdevice_ipip_can_offload(struct mlxsw_sp *mlxsw_sp,
1367						const struct net_device *ol_dev,
1368						enum mlxsw_sp_ipip_type ipipt)
1369{
1370	const struct mlxsw_sp_ipip_ops *ops
1371		= mlxsw_sp->router->ipip_ops_arr[ipipt];
1372
1373	/* For deciding whether decap should be offloaded, we don't care about
1374	 * overlay protocol, so ask whether either one is supported.
1375	 */
1376	return ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV4) ||
1377	       ops->can_offload(mlxsw_sp, ol_dev, MLXSW_SP_L3_PROTO_IPV6);
1378}
1379
1380static int mlxsw_sp_netdevice_ipip_ol_reg_event(struct mlxsw_sp *mlxsw_sp,
1381						struct net_device *ol_dev)
1382{
1383	enum mlxsw_sp_ipip_type ipipt = MLXSW_SP_IPIP_TYPE_MAX;
1384	struct mlxsw_sp_ipip_entry *ipip_entry;
1385	enum mlxsw_sp_l3proto ul_proto;
1386	union mlxsw_sp_l3addr saddr;
1387	u32 ul_tb_id;
1388
1389	mlxsw_sp_netdev_ipip_type(mlxsw_sp, ol_dev, &ipipt);
1390	if (mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev, ipipt)) {
1391		ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(ol_dev);
1392		ul_proto = mlxsw_sp->router->ipip_ops_arr[ipipt]->ul_proto;
1393		saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ol_dev);
1394		if (!mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1395							  saddr, ul_tb_id,
1396							  NULL)) {
1397			ipip_entry = mlxsw_sp_ipip_entry_create(mlxsw_sp, ipipt,
1398								ol_dev);
1399			if (IS_ERR(ipip_entry))
1400				return PTR_ERR(ipip_entry);
1401		}
1402	}
1403
1404	return 0;
1405}
1406
1407static void mlxsw_sp_netdevice_ipip_ol_unreg_event(struct mlxsw_sp *mlxsw_sp,
1408						   struct net_device *ol_dev)
1409{
1410	struct mlxsw_sp_ipip_entry *ipip_entry;
1411
1412	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1413	if (ipip_entry)
1414		mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1415}
1416
1417static void
1418mlxsw_sp_ipip_entry_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1419				struct mlxsw_sp_ipip_entry *ipip_entry)
1420{
1421	struct mlxsw_sp_fib_entry *decap_fib_entry;
1422
1423	decap_fib_entry = mlxsw_sp_ipip_entry_find_decap(mlxsw_sp, ipip_entry);
1424	if (decap_fib_entry)
1425		mlxsw_sp_ipip_entry_promote_decap(mlxsw_sp, ipip_entry,
1426						  decap_fib_entry);
1427}
1428
1429static int
1430mlxsw_sp_rif_ipip_lb_op(struct mlxsw_sp_rif_ipip_lb *lb_rif, u16 ul_vr_id,
1431			u16 ul_rif_id, bool enable)
1432{
1433	struct mlxsw_sp_rif_ipip_lb_config lb_cf = lb_rif->lb_config;
1434	struct mlxsw_sp_rif *rif = &lb_rif->common;
1435	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
1436	char ritr_pl[MLXSW_REG_RITR_LEN];
1437	u32 saddr4;
1438
1439	switch (lb_cf.ul_protocol) {
1440	case MLXSW_SP_L3_PROTO_IPV4:
1441		saddr4 = be32_to_cpu(lb_cf.saddr.addr4);
1442		mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
1443				    rif->rif_index, rif->vr_id, rif->dev->mtu);
1444		mlxsw_reg_ritr_loopback_ipip4_pack(ritr_pl, lb_cf.lb_ipipt,
1445			    MLXSW_REG_RITR_LOOPBACK_IPIP_OPTIONS_GRE_KEY_PRESET,
1446			    ul_vr_id, ul_rif_id, saddr4, lb_cf.okey);
1447		break;
1448
1449	case MLXSW_SP_L3_PROTO_IPV6:
1450		return -EAFNOSUPPORT;
1451	}
1452
1453	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
1454}
1455
1456static int mlxsw_sp_netdevice_ipip_ol_update_mtu(struct mlxsw_sp *mlxsw_sp,
1457						 struct net_device *ol_dev)
1458{
1459	struct mlxsw_sp_ipip_entry *ipip_entry;
1460	struct mlxsw_sp_rif_ipip_lb *lb_rif;
1461	int err = 0;
1462
1463	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1464	if (ipip_entry) {
1465		lb_rif = ipip_entry->ol_lb;
1466		err = mlxsw_sp_rif_ipip_lb_op(lb_rif, lb_rif->ul_vr_id,
1467					      lb_rif->ul_rif_id, true);
1468		if (err)
1469			goto out;
1470		lb_rif->common.mtu = ol_dev->mtu;
1471	}
1472
1473out:
1474	return err;
1475}
1476
1477static void mlxsw_sp_netdevice_ipip_ol_up_event(struct mlxsw_sp *mlxsw_sp,
1478						struct net_device *ol_dev)
1479{
1480	struct mlxsw_sp_ipip_entry *ipip_entry;
1481
1482	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1483	if (ipip_entry)
1484		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1485}
1486
1487static void
1488mlxsw_sp_ipip_entry_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1489				  struct mlxsw_sp_ipip_entry *ipip_entry)
1490{
1491	if (ipip_entry->decap_fib_entry)
1492		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1493}
1494
1495static void mlxsw_sp_netdevice_ipip_ol_down_event(struct mlxsw_sp *mlxsw_sp,
1496						  struct net_device *ol_dev)
1497{
1498	struct mlxsw_sp_ipip_entry *ipip_entry;
1499
1500	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1501	if (ipip_entry)
1502		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1503}
1504
1505static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
1506					 struct mlxsw_sp_rif *old_rif,
1507					 struct mlxsw_sp_rif *new_rif);
1508static int
1509mlxsw_sp_ipip_entry_ol_lb_update(struct mlxsw_sp *mlxsw_sp,
1510				 struct mlxsw_sp_ipip_entry *ipip_entry,
1511				 bool keep_encap,
1512				 struct netlink_ext_ack *extack)
1513{
1514	struct mlxsw_sp_rif_ipip_lb *old_lb_rif = ipip_entry->ol_lb;
1515	struct mlxsw_sp_rif_ipip_lb *new_lb_rif;
1516
1517	new_lb_rif = mlxsw_sp_ipip_ol_ipip_lb_create(mlxsw_sp,
1518						     ipip_entry->ipipt,
1519						     ipip_entry->ol_dev,
1520						     extack);
1521	if (IS_ERR(new_lb_rif))
1522		return PTR_ERR(new_lb_rif);
1523	ipip_entry->ol_lb = new_lb_rif;
1524
1525	if (keep_encap)
1526		mlxsw_sp_nexthop_rif_migrate(mlxsw_sp, &old_lb_rif->common,
1527					     &new_lb_rif->common);
1528
1529	mlxsw_sp_rif_destroy(&old_lb_rif->common);
1530
1531	return 0;
1532}
1533
1534static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
1535					struct mlxsw_sp_rif *rif);
1536
1537/**
1538 * __mlxsw_sp_ipip_entry_update_tunnel - Update offload related to IPIP entry.
1539 * @mlxsw_sp: mlxsw_sp.
1540 * @ipip_entry: IPIP entry.
1541 * @recreate_loopback: Recreates the associated loopback RIF.
1542 * @keep_encap: Updates next hops that use the tunnel netdevice. This is only
1543 *              relevant when recreate_loopback is true.
1544 * @update_nexthops: Updates next hops, keeping the current loopback RIF. This
1545 *                   is only relevant when recreate_loopback is false.
1546 * @extack: extack.
1547 *
1548 * Return: Non-zero value on failure.
1549 */
1550int __mlxsw_sp_ipip_entry_update_tunnel(struct mlxsw_sp *mlxsw_sp,
1551					struct mlxsw_sp_ipip_entry *ipip_entry,
1552					bool recreate_loopback,
1553					bool keep_encap,
1554					bool update_nexthops,
1555					struct netlink_ext_ack *extack)
1556{
1557	int err;
1558
1559	/* RIFs can't be edited, so to update loopback, we need to destroy and
1560	 * recreate it. That creates a window of opportunity where RALUE and
1561	 * RATR registers end up referencing a RIF that's already gone. RATRs
1562	 * are handled in mlxsw_sp_ipip_entry_ol_lb_update(), and to take care
1563	 * of RALUE, demote the decap route back.
1564	 */
1565	if (ipip_entry->decap_fib_entry)
1566		mlxsw_sp_ipip_entry_demote_decap(mlxsw_sp, ipip_entry);
1567
1568	if (recreate_loopback) {
1569		err = mlxsw_sp_ipip_entry_ol_lb_update(mlxsw_sp, ipip_entry,
1570						       keep_encap, extack);
1571		if (err)
1572			return err;
1573	} else if (update_nexthops) {
1574		mlxsw_sp_nexthop_rif_update(mlxsw_sp,
1575					    &ipip_entry->ol_lb->common);
1576	}
1577
1578	if (ipip_entry->ol_dev->flags & IFF_UP)
1579		mlxsw_sp_ipip_entry_ol_up_event(mlxsw_sp, ipip_entry);
1580
1581	return 0;
1582}
1583
1584static int mlxsw_sp_netdevice_ipip_ol_vrf_event(struct mlxsw_sp *mlxsw_sp,
1585						struct net_device *ol_dev,
1586						struct netlink_ext_ack *extack)
1587{
1588	struct mlxsw_sp_ipip_entry *ipip_entry =
1589		mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1590
1591	if (!ipip_entry)
1592		return 0;
1593
1594	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1595						   true, false, false, extack);
1596}
1597
1598static int
1599mlxsw_sp_netdevice_ipip_ul_vrf_event(struct mlxsw_sp *mlxsw_sp,
1600				     struct mlxsw_sp_ipip_entry *ipip_entry,
1601				     struct net_device *ul_dev,
1602				     bool *demote_this,
1603				     struct netlink_ext_ack *extack)
1604{
1605	u32 ul_tb_id = l3mdev_fib_table(ul_dev) ? : RT_TABLE_MAIN;
1606	enum mlxsw_sp_l3proto ul_proto;
1607	union mlxsw_sp_l3addr saddr;
1608
1609	/* Moving underlay to a different VRF might cause local address
1610	 * conflict, and the conflicting tunnels need to be demoted.
1611	 */
1612	ul_proto = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt]->ul_proto;
1613	saddr = mlxsw_sp_ipip_netdev_saddr(ul_proto, ipip_entry->ol_dev);
1614	if (mlxsw_sp_ipip_demote_tunnel_by_saddr(mlxsw_sp, ul_proto,
1615						 saddr, ul_tb_id,
1616						 ipip_entry)) {
1617		*demote_this = true;
1618		return 0;
1619	}
1620
1621	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1622						   true, true, false, extack);
1623}
1624
1625static int
1626mlxsw_sp_netdevice_ipip_ul_up_event(struct mlxsw_sp *mlxsw_sp,
1627				    struct mlxsw_sp_ipip_entry *ipip_entry,
1628				    struct net_device *ul_dev)
1629{
1630	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1631						   false, false, true, NULL);
1632}
1633
1634static int
1635mlxsw_sp_netdevice_ipip_ul_down_event(struct mlxsw_sp *mlxsw_sp,
1636				      struct mlxsw_sp_ipip_entry *ipip_entry,
1637				      struct net_device *ul_dev)
1638{
1639	/* A down underlay device causes encapsulated packets to not be
1640	 * forwarded, but decap still works. So refresh next hops without
1641	 * touching anything else.
1642	 */
1643	return __mlxsw_sp_ipip_entry_update_tunnel(mlxsw_sp, ipip_entry,
1644						   false, false, true, NULL);
1645}
1646
1647static int
1648mlxsw_sp_netdevice_ipip_ol_change_event(struct mlxsw_sp *mlxsw_sp,
1649					struct net_device *ol_dev,
1650					struct netlink_ext_ack *extack)
1651{
1652	const struct mlxsw_sp_ipip_ops *ipip_ops;
1653	struct mlxsw_sp_ipip_entry *ipip_entry;
1654	int err;
1655
1656	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, ol_dev);
1657	if (!ipip_entry)
1658		/* A change might make a tunnel eligible for offloading, but
1659		 * that is currently not implemented. What falls to slow path
1660		 * stays there.
1661		 */
1662		return 0;
1663
1664	/* A change might make a tunnel not eligible for offloading. */
1665	if (!mlxsw_sp_netdevice_ipip_can_offload(mlxsw_sp, ol_dev,
1666						 ipip_entry->ipipt)) {
1667		mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1668		return 0;
1669	}
1670
1671	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
1672	err = ipip_ops->ol_netdev_change(mlxsw_sp, ipip_entry, extack);
1673	return err;
1674}
1675
1676void mlxsw_sp_ipip_entry_demote_tunnel(struct mlxsw_sp *mlxsw_sp,
1677				       struct mlxsw_sp_ipip_entry *ipip_entry)
1678{
1679	struct net_device *ol_dev = ipip_entry->ol_dev;
1680
1681	if (ol_dev->flags & IFF_UP)
1682		mlxsw_sp_ipip_entry_ol_down_event(mlxsw_sp, ipip_entry);
1683	mlxsw_sp_ipip_entry_destroy(mlxsw_sp, ipip_entry);
1684}
1685
1686/* The configuration where several tunnels have the same local address in the
1687 * same underlay table needs special treatment in the HW. That is currently not
1688 * implemented in the driver. This function finds and demotes the first tunnel
1689 * with a given source address, except the one passed in in the argument
1690 * `except'.
1691 */
1692bool
1693mlxsw_sp_ipip_demote_tunnel_by_saddr(struct mlxsw_sp *mlxsw_sp,
1694				     enum mlxsw_sp_l3proto ul_proto,
1695				     union mlxsw_sp_l3addr saddr,
1696				     u32 ul_tb_id,
1697				     const struct mlxsw_sp_ipip_entry *except)
1698{
1699	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1700
1701	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1702				 ipip_list_node) {
1703		if (ipip_entry != except &&
1704		    mlxsw_sp_ipip_entry_saddr_matches(mlxsw_sp, ul_proto, saddr,
1705						      ul_tb_id, ipip_entry)) {
1706			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1707			return true;
1708		}
1709	}
1710
1711	return false;
1712}
1713
1714static void mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(struct mlxsw_sp *mlxsw_sp,
1715						     struct net_device *ul_dev)
1716{
1717	struct mlxsw_sp_ipip_entry *ipip_entry, *tmp;
1718
1719	list_for_each_entry_safe(ipip_entry, tmp, &mlxsw_sp->router->ipip_list,
1720				 ipip_list_node) {
1721		struct net_device *ol_dev = ipip_entry->ol_dev;
1722		struct net_device *ipip_ul_dev;
1723
1724		rcu_read_lock();
1725		ipip_ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
1726		rcu_read_unlock();
1727		if (ipip_ul_dev == ul_dev)
1728			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1729	}
1730}
1731
1732int mlxsw_sp_netdevice_ipip_ol_event(struct mlxsw_sp *mlxsw_sp,
1733				     struct net_device *ol_dev,
1734				     unsigned long event,
1735				     struct netdev_notifier_info *info)
1736{
1737	struct netdev_notifier_changeupper_info *chup;
1738	struct netlink_ext_ack *extack;
1739	int err = 0;
1740
1741	mutex_lock(&mlxsw_sp->router->lock);
1742	switch (event) {
1743	case NETDEV_REGISTER:
1744		err = mlxsw_sp_netdevice_ipip_ol_reg_event(mlxsw_sp, ol_dev);
1745		break;
1746	case NETDEV_UNREGISTER:
1747		mlxsw_sp_netdevice_ipip_ol_unreg_event(mlxsw_sp, ol_dev);
1748		break;
1749	case NETDEV_UP:
1750		mlxsw_sp_netdevice_ipip_ol_up_event(mlxsw_sp, ol_dev);
1751		break;
1752	case NETDEV_DOWN:
1753		mlxsw_sp_netdevice_ipip_ol_down_event(mlxsw_sp, ol_dev);
1754		break;
1755	case NETDEV_CHANGEUPPER:
1756		chup = container_of(info, typeof(*chup), info);
1757		extack = info->extack;
1758		if (netif_is_l3_master(chup->upper_dev))
1759			err = mlxsw_sp_netdevice_ipip_ol_vrf_event(mlxsw_sp,
1760								   ol_dev,
1761								   extack);
1762		break;
1763	case NETDEV_CHANGE:
1764		extack = info->extack;
1765		err = mlxsw_sp_netdevice_ipip_ol_change_event(mlxsw_sp,
1766							      ol_dev, extack);
1767		break;
1768	case NETDEV_CHANGEMTU:
1769		err = mlxsw_sp_netdevice_ipip_ol_update_mtu(mlxsw_sp, ol_dev);
1770		break;
1771	}
1772	mutex_unlock(&mlxsw_sp->router->lock);
1773	return err;
1774}
1775
1776static int
1777__mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1778				   struct mlxsw_sp_ipip_entry *ipip_entry,
1779				   struct net_device *ul_dev,
1780				   bool *demote_this,
1781				   unsigned long event,
1782				   struct netdev_notifier_info *info)
1783{
1784	struct netdev_notifier_changeupper_info *chup;
1785	struct netlink_ext_ack *extack;
1786
1787	switch (event) {
1788	case NETDEV_CHANGEUPPER:
1789		chup = container_of(info, typeof(*chup), info);
1790		extack = info->extack;
1791		if (netif_is_l3_master(chup->upper_dev))
1792			return mlxsw_sp_netdevice_ipip_ul_vrf_event(mlxsw_sp,
1793								    ipip_entry,
1794								    ul_dev,
1795								    demote_this,
1796								    extack);
1797		break;
1798
1799	case NETDEV_UP:
1800		return mlxsw_sp_netdevice_ipip_ul_up_event(mlxsw_sp, ipip_entry,
1801							   ul_dev);
1802	case NETDEV_DOWN:
1803		return mlxsw_sp_netdevice_ipip_ul_down_event(mlxsw_sp,
1804							     ipip_entry,
1805							     ul_dev);
1806	}
1807	return 0;
1808}
1809
1810int
1811mlxsw_sp_netdevice_ipip_ul_event(struct mlxsw_sp *mlxsw_sp,
1812				 struct net_device *ul_dev,
1813				 unsigned long event,
1814				 struct netdev_notifier_info *info)
1815{
1816	struct mlxsw_sp_ipip_entry *ipip_entry = NULL;
1817	int err = 0;
1818
1819	mutex_lock(&mlxsw_sp->router->lock);
1820	while ((ipip_entry = mlxsw_sp_ipip_entry_find_by_ul_dev(mlxsw_sp,
1821								ul_dev,
1822								ipip_entry))) {
1823		struct mlxsw_sp_ipip_entry *prev;
1824		bool demote_this = false;
1825
1826		err = __mlxsw_sp_netdevice_ipip_ul_event(mlxsw_sp, ipip_entry,
1827							 ul_dev, &demote_this,
1828							 event, info);
1829		if (err) {
1830			mlxsw_sp_ipip_demote_tunnel_by_ul_netdev(mlxsw_sp,
1831								 ul_dev);
1832			break;
1833		}
1834
1835		if (demote_this) {
1836			if (list_is_first(&ipip_entry->ipip_list_node,
1837					  &mlxsw_sp->router->ipip_list))
1838				prev = NULL;
1839			else
1840				/* This can't be cached from previous iteration,
1841				 * because that entry could be gone now.
1842				 */
1843				prev = list_prev_entry(ipip_entry,
1844						       ipip_list_node);
1845			mlxsw_sp_ipip_entry_demote_tunnel(mlxsw_sp, ipip_entry);
1846			ipip_entry = prev;
1847		}
1848	}
1849	mutex_unlock(&mlxsw_sp->router->lock);
1850
1851	return err;
1852}
1853
1854int mlxsw_sp_router_nve_promote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1855				      enum mlxsw_sp_l3proto ul_proto,
1856				      const union mlxsw_sp_l3addr *ul_sip,
1857				      u32 tunnel_index)
1858{
1859	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1860	struct mlxsw_sp_router *router = mlxsw_sp->router;
1861	struct mlxsw_sp_fib_entry *fib_entry;
1862	int err = 0;
1863
1864	mutex_lock(&mlxsw_sp->router->lock);
1865
1866	if (WARN_ON_ONCE(router->nve_decap_config.valid)) {
1867		err = -EINVAL;
1868		goto out;
1869	}
1870
1871	router->nve_decap_config.ul_tb_id = ul_tb_id;
1872	router->nve_decap_config.tunnel_index = tunnel_index;
1873	router->nve_decap_config.ul_proto = ul_proto;
1874	router->nve_decap_config.ul_sip = *ul_sip;
1875	router->nve_decap_config.valid = true;
1876
1877	/* It is valid to create a tunnel with a local IP and only later
1878	 * assign this IP address to a local interface
1879	 */
1880	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1881							 ul_proto, ul_sip,
1882							 type);
1883	if (!fib_entry)
1884		goto out;
1885
1886	fib_entry->decap.tunnel_index = tunnel_index;
1887	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1888
1889	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1890	if (err)
1891		goto err_fib_entry_update;
1892
1893	goto out;
1894
1895err_fib_entry_update:
1896	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1897	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1898out:
1899	mutex_unlock(&mlxsw_sp->router->lock);
1900	return err;
1901}
1902
1903void mlxsw_sp_router_nve_demote_decap(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
1904				      enum mlxsw_sp_l3proto ul_proto,
1905				      const union mlxsw_sp_l3addr *ul_sip)
1906{
1907	enum mlxsw_sp_fib_entry_type type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
1908	struct mlxsw_sp_router *router = mlxsw_sp->router;
1909	struct mlxsw_sp_fib_entry *fib_entry;
1910
1911	mutex_lock(&mlxsw_sp->router->lock);
1912
1913	if (WARN_ON_ONCE(!router->nve_decap_config.valid))
1914		goto out;
1915
1916	router->nve_decap_config.valid = false;
1917
1918	fib_entry = mlxsw_sp_router_ip2me_fib_entry_find(mlxsw_sp, ul_tb_id,
1919							 ul_proto, ul_sip,
1920							 type);
1921	if (!fib_entry)
1922		goto out;
1923
1924	fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
1925	mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
1926out:
1927	mutex_unlock(&mlxsw_sp->router->lock);
1928}
1929
1930static bool mlxsw_sp_router_nve_is_decap(struct mlxsw_sp *mlxsw_sp,
1931					 u32 ul_tb_id,
1932					 enum mlxsw_sp_l3proto ul_proto,
1933					 const union mlxsw_sp_l3addr *ul_sip)
1934{
1935	struct mlxsw_sp_router *router = mlxsw_sp->router;
1936
1937	return router->nve_decap_config.valid &&
1938	       router->nve_decap_config.ul_tb_id == ul_tb_id &&
1939	       router->nve_decap_config.ul_proto == ul_proto &&
1940	       !memcmp(&router->nve_decap_config.ul_sip, ul_sip,
1941		       sizeof(*ul_sip));
1942}
1943
1944struct mlxsw_sp_neigh_key {
1945	struct neighbour *n;
1946};
1947
1948struct mlxsw_sp_neigh_entry {
1949	struct list_head rif_list_node;
1950	struct rhash_head ht_node;
1951	struct mlxsw_sp_neigh_key key;
1952	u16 rif;
1953	bool connected;
1954	unsigned char ha[ETH_ALEN];
1955	struct list_head nexthop_list; /* list of nexthops using
1956					* this neigh entry
1957					*/
1958	struct list_head nexthop_neighs_list_node;
1959	unsigned int counter_index;
1960	bool counter_valid;
1961};
1962
1963static const struct rhashtable_params mlxsw_sp_neigh_ht_params = {
1964	.key_offset = offsetof(struct mlxsw_sp_neigh_entry, key),
1965	.head_offset = offsetof(struct mlxsw_sp_neigh_entry, ht_node),
1966	.key_len = sizeof(struct mlxsw_sp_neigh_key),
1967};
1968
1969struct mlxsw_sp_neigh_entry *
1970mlxsw_sp_rif_neigh_next(struct mlxsw_sp_rif *rif,
1971			struct mlxsw_sp_neigh_entry *neigh_entry)
1972{
1973	if (!neigh_entry) {
1974		if (list_empty(&rif->neigh_list))
1975			return NULL;
1976		else
1977			return list_first_entry(&rif->neigh_list,
1978						typeof(*neigh_entry),
1979						rif_list_node);
1980	}
1981	if (list_is_last(&neigh_entry->rif_list_node, &rif->neigh_list))
1982		return NULL;
1983	return list_next_entry(neigh_entry, rif_list_node);
1984}
1985
1986int mlxsw_sp_neigh_entry_type(struct mlxsw_sp_neigh_entry *neigh_entry)
1987{
1988	return neigh_entry->key.n->tbl->family;
1989}
1990
1991unsigned char *
1992mlxsw_sp_neigh_entry_ha(struct mlxsw_sp_neigh_entry *neigh_entry)
1993{
1994	return neigh_entry->ha;
1995}
1996
1997u32 mlxsw_sp_neigh4_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
1998{
1999	struct neighbour *n;
2000
2001	n = neigh_entry->key.n;
2002	return ntohl(*((__be32 *) n->primary_key));
2003}
2004
2005struct in6_addr *
2006mlxsw_sp_neigh6_entry_dip(struct mlxsw_sp_neigh_entry *neigh_entry)
2007{
2008	struct neighbour *n;
2009
2010	n = neigh_entry->key.n;
2011	return (struct in6_addr *) &n->primary_key;
2012}
2013
2014int mlxsw_sp_neigh_counter_get(struct mlxsw_sp *mlxsw_sp,
2015			       struct mlxsw_sp_neigh_entry *neigh_entry,
2016			       u64 *p_counter)
2017{
2018	if (!neigh_entry->counter_valid)
2019		return -EINVAL;
2020
2021	return mlxsw_sp_flow_counter_get(mlxsw_sp, neigh_entry->counter_index,
2022					 p_counter, NULL);
2023}
2024
2025static struct mlxsw_sp_neigh_entry *
2026mlxsw_sp_neigh_entry_alloc(struct mlxsw_sp *mlxsw_sp, struct neighbour *n,
2027			   u16 rif)
2028{
2029	struct mlxsw_sp_neigh_entry *neigh_entry;
2030
2031	neigh_entry = kzalloc(sizeof(*neigh_entry), GFP_KERNEL);
2032	if (!neigh_entry)
2033		return NULL;
2034
2035	neigh_entry->key.n = n;
2036	neigh_entry->rif = rif;
2037	INIT_LIST_HEAD(&neigh_entry->nexthop_list);
2038
2039	return neigh_entry;
2040}
2041
2042static void mlxsw_sp_neigh_entry_free(struct mlxsw_sp_neigh_entry *neigh_entry)
2043{
2044	kfree(neigh_entry);
2045}
2046
2047static int
2048mlxsw_sp_neigh_entry_insert(struct mlxsw_sp *mlxsw_sp,
2049			    struct mlxsw_sp_neigh_entry *neigh_entry)
2050{
2051	return rhashtable_insert_fast(&mlxsw_sp->router->neigh_ht,
2052				      &neigh_entry->ht_node,
2053				      mlxsw_sp_neigh_ht_params);
2054}
2055
2056static void
2057mlxsw_sp_neigh_entry_remove(struct mlxsw_sp *mlxsw_sp,
2058			    struct mlxsw_sp_neigh_entry *neigh_entry)
2059{
2060	rhashtable_remove_fast(&mlxsw_sp->router->neigh_ht,
2061			       &neigh_entry->ht_node,
2062			       mlxsw_sp_neigh_ht_params);
2063}
2064
2065static bool
2066mlxsw_sp_neigh_counter_should_alloc(struct mlxsw_sp *mlxsw_sp,
2067				    struct mlxsw_sp_neigh_entry *neigh_entry)
2068{
2069	struct devlink *devlink;
2070	const char *table_name;
2071
2072	switch (mlxsw_sp_neigh_entry_type(neigh_entry)) {
2073	case AF_INET:
2074		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST4;
2075		break;
2076	case AF_INET6:
2077		table_name = MLXSW_SP_DPIPE_TABLE_NAME_HOST6;
2078		break;
2079	default:
2080		WARN_ON(1);
2081		return false;
2082	}
2083
2084	devlink = priv_to_devlink(mlxsw_sp->core);
2085	return devlink_dpipe_table_counter_enabled(devlink, table_name);
2086}
2087
2088static void
2089mlxsw_sp_neigh_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2090			     struct mlxsw_sp_neigh_entry *neigh_entry)
2091{
2092	if (!mlxsw_sp_neigh_counter_should_alloc(mlxsw_sp, neigh_entry))
2093		return;
2094
2095	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &neigh_entry->counter_index))
2096		return;
2097
2098	neigh_entry->counter_valid = true;
2099}
2100
2101static void
2102mlxsw_sp_neigh_counter_free(struct mlxsw_sp *mlxsw_sp,
2103			    struct mlxsw_sp_neigh_entry *neigh_entry)
2104{
2105	if (!neigh_entry->counter_valid)
2106		return;
2107	mlxsw_sp_flow_counter_free(mlxsw_sp,
2108				   neigh_entry->counter_index);
2109	neigh_entry->counter_valid = false;
2110}
2111
2112static struct mlxsw_sp_neigh_entry *
2113mlxsw_sp_neigh_entry_create(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2114{
2115	struct mlxsw_sp_neigh_entry *neigh_entry;
2116	struct mlxsw_sp_rif *rif;
2117	int err;
2118
2119	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, n->dev);
2120	if (!rif)
2121		return ERR_PTR(-EINVAL);
2122
2123	neigh_entry = mlxsw_sp_neigh_entry_alloc(mlxsw_sp, n, rif->rif_index);
2124	if (!neigh_entry)
2125		return ERR_PTR(-ENOMEM);
2126
2127	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
2128	if (err)
2129		goto err_neigh_entry_insert;
2130
2131	mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2132	list_add(&neigh_entry->rif_list_node, &rif->neigh_list);
2133
2134	return neigh_entry;
2135
2136err_neigh_entry_insert:
2137	mlxsw_sp_neigh_entry_free(neigh_entry);
2138	return ERR_PTR(err);
2139}
2140
2141static void
2142mlxsw_sp_neigh_entry_destroy(struct mlxsw_sp *mlxsw_sp,
2143			     struct mlxsw_sp_neigh_entry *neigh_entry)
2144{
2145	list_del(&neigh_entry->rif_list_node);
2146	mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2147	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
2148	mlxsw_sp_neigh_entry_free(neigh_entry);
2149}
2150
2151static struct mlxsw_sp_neigh_entry *
2152mlxsw_sp_neigh_entry_lookup(struct mlxsw_sp *mlxsw_sp, struct neighbour *n)
2153{
2154	struct mlxsw_sp_neigh_key key;
2155
2156	key.n = n;
2157	return rhashtable_lookup_fast(&mlxsw_sp->router->neigh_ht,
2158				      &key, mlxsw_sp_neigh_ht_params);
2159}
2160
2161static void
2162mlxsw_sp_router_neighs_update_interval_init(struct mlxsw_sp *mlxsw_sp)
2163{
2164	unsigned long interval;
2165
2166#if IS_ENABLED(CONFIG_IPV6)
2167	interval = min_t(unsigned long,
2168			 NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME),
2169			 NEIGH_VAR(&nd_tbl.parms, DELAY_PROBE_TIME));
2170#else
2171	interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
2172#endif
2173	mlxsw_sp->router->neighs_update.interval = jiffies_to_msecs(interval);
2174}
2175
2176static void mlxsw_sp_router_neigh_ent_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2177						   char *rauhtd_pl,
2178						   int ent_index)
2179{
2180	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
2181	struct net_device *dev;
2182	struct neighbour *n;
2183	__be32 dipn;
2184	u32 dip;
2185	u16 rif;
2186
2187	mlxsw_reg_rauhtd_ent_ipv4_unpack(rauhtd_pl, ent_index, &rif, &dip);
2188
2189	if (WARN_ON_ONCE(rif >= max_rifs))
2190		return;
2191	if (!mlxsw_sp->router->rifs[rif]) {
2192		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2193		return;
2194	}
2195
2196	dipn = htonl(dip);
2197	dev = mlxsw_sp->router->rifs[rif]->dev;
2198	n = neigh_lookup(&arp_tbl, &dipn, dev);
2199	if (!n)
2200		return;
2201
2202	netdev_dbg(dev, "Updating neighbour with IP=%pI4h\n", &dip);
2203	neigh_event_send(n, NULL);
2204	neigh_release(n);
2205}
2206
2207#if IS_ENABLED(CONFIG_IPV6)
2208static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2209						   char *rauhtd_pl,
2210						   int rec_index)
2211{
2212	struct net_device *dev;
2213	struct neighbour *n;
2214	struct in6_addr dip;
2215	u16 rif;
2216
2217	mlxsw_reg_rauhtd_ent_ipv6_unpack(rauhtd_pl, rec_index, &rif,
2218					 (char *) &dip);
2219
2220	if (!mlxsw_sp->router->rifs[rif]) {
2221		dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Incorrect RIF in neighbour entry\n");
2222		return;
2223	}
2224
2225	dev = mlxsw_sp->router->rifs[rif]->dev;
2226	n = neigh_lookup(&nd_tbl, &dip, dev);
2227	if (!n)
2228		return;
2229
2230	netdev_dbg(dev, "Updating neighbour with IP=%pI6c\n", &dip);
2231	neigh_event_send(n, NULL);
2232	neigh_release(n);
2233}
2234#else
2235static void mlxsw_sp_router_neigh_ent_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2236						   char *rauhtd_pl,
2237						   int rec_index)
2238{
2239}
2240#endif
2241
2242static void mlxsw_sp_router_neigh_rec_ipv4_process(struct mlxsw_sp *mlxsw_sp,
2243						   char *rauhtd_pl,
2244						   int rec_index)
2245{
2246	u8 num_entries;
2247	int i;
2248
2249	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2250								rec_index);
2251	/* Hardware starts counting at 0, so add 1. */
2252	num_entries++;
2253
2254	/* Each record consists of several neighbour entries. */
2255	for (i = 0; i < num_entries; i++) {
2256		int ent_index;
2257
2258		ent_index = rec_index * MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC + i;
2259		mlxsw_sp_router_neigh_ent_ipv4_process(mlxsw_sp, rauhtd_pl,
2260						       ent_index);
2261	}
2262
2263}
2264
2265static void mlxsw_sp_router_neigh_rec_ipv6_process(struct mlxsw_sp *mlxsw_sp,
2266						   char *rauhtd_pl,
2267						   int rec_index)
2268{
2269	/* One record contains one entry. */
2270	mlxsw_sp_router_neigh_ent_ipv6_process(mlxsw_sp, rauhtd_pl,
2271					       rec_index);
2272}
2273
2274static void mlxsw_sp_router_neigh_rec_process(struct mlxsw_sp *mlxsw_sp,
2275					      char *rauhtd_pl, int rec_index)
2276{
2277	switch (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, rec_index)) {
2278	case MLXSW_REG_RAUHTD_TYPE_IPV4:
2279		mlxsw_sp_router_neigh_rec_ipv4_process(mlxsw_sp, rauhtd_pl,
2280						       rec_index);
2281		break;
2282	case MLXSW_REG_RAUHTD_TYPE_IPV6:
2283		mlxsw_sp_router_neigh_rec_ipv6_process(mlxsw_sp, rauhtd_pl,
2284						       rec_index);
2285		break;
2286	}
2287}
2288
2289static bool mlxsw_sp_router_rauhtd_is_full(char *rauhtd_pl)
2290{
2291	u8 num_rec, last_rec_index, num_entries;
2292
2293	num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2294	last_rec_index = num_rec - 1;
2295
2296	if (num_rec < MLXSW_REG_RAUHTD_REC_MAX_NUM)
2297		return false;
2298	if (mlxsw_reg_rauhtd_rec_type_get(rauhtd_pl, last_rec_index) ==
2299	    MLXSW_REG_RAUHTD_TYPE_IPV6)
2300		return true;
2301
2302	num_entries = mlxsw_reg_rauhtd_ipv4_rec_num_entries_get(rauhtd_pl,
2303								last_rec_index);
2304	if (++num_entries == MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC)
2305		return true;
2306	return false;
2307}
2308
2309static int
2310__mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp,
2311				       char *rauhtd_pl,
2312				       enum mlxsw_reg_rauhtd_type type)
2313{
2314	int i, num_rec;
2315	int err;
2316
2317	/* Ensure the RIF we read from the device does not change mid-dump. */
2318	mutex_lock(&mlxsw_sp->router->lock);
2319	do {
2320		mlxsw_reg_rauhtd_pack(rauhtd_pl, type);
2321		err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(rauhtd),
2322				      rauhtd_pl);
2323		if (err) {
2324			dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to dump neighbour table\n");
2325			break;
2326		}
2327		num_rec = mlxsw_reg_rauhtd_num_rec_get(rauhtd_pl);
2328		for (i = 0; i < num_rec; i++)
2329			mlxsw_sp_router_neigh_rec_process(mlxsw_sp, rauhtd_pl,
2330							  i);
2331	} while (mlxsw_sp_router_rauhtd_is_full(rauhtd_pl));
2332	mutex_unlock(&mlxsw_sp->router->lock);
2333
2334	return err;
2335}
2336
2337static int mlxsw_sp_router_neighs_update_rauhtd(struct mlxsw_sp *mlxsw_sp)
2338{
2339	enum mlxsw_reg_rauhtd_type type;
2340	char *rauhtd_pl;
2341	int err;
2342
2343	rauhtd_pl = kmalloc(MLXSW_REG_RAUHTD_LEN, GFP_KERNEL);
2344	if (!rauhtd_pl)
2345		return -ENOMEM;
2346
2347	type = MLXSW_REG_RAUHTD_TYPE_IPV4;
2348	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2349	if (err)
2350		goto out;
2351
2352	type = MLXSW_REG_RAUHTD_TYPE_IPV6;
2353	err = __mlxsw_sp_router_neighs_update_rauhtd(mlxsw_sp, rauhtd_pl, type);
2354out:
2355	kfree(rauhtd_pl);
2356	return err;
2357}
2358
2359static void mlxsw_sp_router_neighs_update_nh(struct mlxsw_sp *mlxsw_sp)
2360{
2361	struct mlxsw_sp_neigh_entry *neigh_entry;
2362
2363	mutex_lock(&mlxsw_sp->router->lock);
2364	list_for_each_entry(neigh_entry, &mlxsw_sp->router->nexthop_neighs_list,
2365			    nexthop_neighs_list_node)
2366		/* If this neigh have nexthops, make the kernel think this neigh
2367		 * is active regardless of the traffic.
2368		 */
2369		neigh_event_send(neigh_entry->key.n, NULL);
2370	mutex_unlock(&mlxsw_sp->router->lock);
2371}
2372
2373static void
2374mlxsw_sp_router_neighs_update_work_schedule(struct mlxsw_sp *mlxsw_sp)
2375{
2376	unsigned long interval = mlxsw_sp->router->neighs_update.interval;
2377
2378	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw,
2379			       msecs_to_jiffies(interval));
2380}
2381
2382static void mlxsw_sp_router_neighs_update_work(struct work_struct *work)
2383{
2384	struct mlxsw_sp_router *router;
2385	int err;
2386
2387	router = container_of(work, struct mlxsw_sp_router,
2388			      neighs_update.dw.work);
2389	err = mlxsw_sp_router_neighs_update_rauhtd(router->mlxsw_sp);
2390	if (err)
2391		dev_err(router->mlxsw_sp->bus_info->dev, "Could not update kernel for neigh activity");
2392
2393	mlxsw_sp_router_neighs_update_nh(router->mlxsw_sp);
2394
2395	mlxsw_sp_router_neighs_update_work_schedule(router->mlxsw_sp);
2396}
2397
2398static void mlxsw_sp_router_probe_unresolved_nexthops(struct work_struct *work)
2399{
2400	struct mlxsw_sp_neigh_entry *neigh_entry;
2401	struct mlxsw_sp_router *router;
2402
2403	router = container_of(work, struct mlxsw_sp_router,
2404			      nexthop_probe_dw.work);
2405	/* Iterate over nexthop neighbours, find those who are unresolved and
2406	 * send arp on them. This solves the chicken-egg problem when
2407	 * the nexthop wouldn't get offloaded until the neighbor is resolved
2408	 * but it wouldn't get resolved ever in case traffic is flowing in HW
2409	 * using different nexthop.
2410	 */
2411	mutex_lock(&router->lock);
2412	list_for_each_entry(neigh_entry, &router->nexthop_neighs_list,
2413			    nexthop_neighs_list_node)
2414		if (!neigh_entry->connected)
2415			neigh_event_send(neigh_entry->key.n, NULL);
2416	mutex_unlock(&router->lock);
2417
2418	mlxsw_core_schedule_dw(&router->nexthop_probe_dw,
2419			       MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL);
2420}
2421
2422static void
2423mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
2424			      struct mlxsw_sp_neigh_entry *neigh_entry,
2425			      bool removing, bool dead);
2426
2427static enum mlxsw_reg_rauht_op mlxsw_sp_rauht_op(bool adding)
2428{
2429	return adding ? MLXSW_REG_RAUHT_OP_WRITE_ADD :
2430			MLXSW_REG_RAUHT_OP_WRITE_DELETE;
2431}
2432
2433static int
2434mlxsw_sp_router_neigh_entry_op4(struct mlxsw_sp *mlxsw_sp,
2435				struct mlxsw_sp_neigh_entry *neigh_entry,
2436				enum mlxsw_reg_rauht_op op)
2437{
2438	struct neighbour *n = neigh_entry->key.n;
2439	u32 dip = ntohl(*((__be32 *) n->primary_key));
2440	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2441
2442	mlxsw_reg_rauht_pack4(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2443			      dip);
2444	if (neigh_entry->counter_valid)
2445		mlxsw_reg_rauht_pack_counter(rauht_pl,
2446					     neigh_entry->counter_index);
2447	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2448}
2449
2450static int
2451mlxsw_sp_router_neigh_entry_op6(struct mlxsw_sp *mlxsw_sp,
2452				struct mlxsw_sp_neigh_entry *neigh_entry,
2453				enum mlxsw_reg_rauht_op op)
2454{
2455	struct neighbour *n = neigh_entry->key.n;
2456	char rauht_pl[MLXSW_REG_RAUHT_LEN];
2457	const char *dip = n->primary_key;
2458
2459	mlxsw_reg_rauht_pack6(rauht_pl, op, neigh_entry->rif, neigh_entry->ha,
2460			      dip);
2461	if (neigh_entry->counter_valid)
2462		mlxsw_reg_rauht_pack_counter(rauht_pl,
2463					     neigh_entry->counter_index);
2464	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rauht), rauht_pl);
2465}
2466
2467bool mlxsw_sp_neigh_ipv6_ignore(struct mlxsw_sp_neigh_entry *neigh_entry)
2468{
2469	struct neighbour *n = neigh_entry->key.n;
2470
2471	/* Packets with a link-local destination address are trapped
2472	 * after LPM lookup and never reach the neighbour table, so
2473	 * there is no need to program such neighbours to the device.
2474	 */
2475	if (ipv6_addr_type((struct in6_addr *) &n->primary_key) &
2476	    IPV6_ADDR_LINKLOCAL)
2477		return true;
2478	return false;
2479}
2480
2481static void
2482mlxsw_sp_neigh_entry_update(struct mlxsw_sp *mlxsw_sp,
2483			    struct mlxsw_sp_neigh_entry *neigh_entry,
2484			    bool adding)
2485{
2486	enum mlxsw_reg_rauht_op op = mlxsw_sp_rauht_op(adding);
2487	int err;
2488
2489	if (!adding && !neigh_entry->connected)
2490		return;
2491	neigh_entry->connected = adding;
2492	if (neigh_entry->key.n->tbl->family == AF_INET) {
2493		err = mlxsw_sp_router_neigh_entry_op4(mlxsw_sp, neigh_entry,
2494						      op);
2495		if (err)
2496			return;
2497	} else if (neigh_entry->key.n->tbl->family == AF_INET6) {
2498		if (mlxsw_sp_neigh_ipv6_ignore(neigh_entry))
2499			return;
2500		err = mlxsw_sp_router_neigh_entry_op6(mlxsw_sp, neigh_entry,
2501						      op);
2502		if (err)
2503			return;
2504	} else {
2505		WARN_ON_ONCE(1);
2506		return;
2507	}
2508
2509	if (adding)
2510		neigh_entry->key.n->flags |= NTF_OFFLOADED;
2511	else
2512		neigh_entry->key.n->flags &= ~NTF_OFFLOADED;
2513}
2514
2515void
2516mlxsw_sp_neigh_entry_counter_update(struct mlxsw_sp *mlxsw_sp,
2517				    struct mlxsw_sp_neigh_entry *neigh_entry,
2518				    bool adding)
2519{
2520	if (adding)
2521		mlxsw_sp_neigh_counter_alloc(mlxsw_sp, neigh_entry);
2522	else
2523		mlxsw_sp_neigh_counter_free(mlxsw_sp, neigh_entry);
2524	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, true);
2525}
2526
2527struct mlxsw_sp_netevent_work {
2528	struct work_struct work;
2529	struct mlxsw_sp *mlxsw_sp;
2530	struct neighbour *n;
2531};
2532
2533static void mlxsw_sp_router_neigh_event_work(struct work_struct *work)
2534{
2535	struct mlxsw_sp_netevent_work *net_work =
2536		container_of(work, struct mlxsw_sp_netevent_work, work);
2537	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2538	struct mlxsw_sp_neigh_entry *neigh_entry;
2539	struct neighbour *n = net_work->n;
2540	unsigned char ha[ETH_ALEN];
2541	bool entry_connected;
2542	u8 nud_state, dead;
2543
2544	/* If these parameters are changed after we release the lock,
2545	 * then we are guaranteed to receive another event letting us
2546	 * know about it.
2547	 */
2548	read_lock_bh(&n->lock);
2549	memcpy(ha, n->ha, ETH_ALEN);
2550	nud_state = n->nud_state;
2551	dead = n->dead;
2552	read_unlock_bh(&n->lock);
2553
2554	mutex_lock(&mlxsw_sp->router->lock);
2555	mlxsw_sp_span_respin(mlxsw_sp);
2556
2557	entry_connected = nud_state & NUD_VALID && !dead;
2558	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
2559	if (!entry_connected && !neigh_entry)
2560		goto out;
2561	if (!neigh_entry) {
2562		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
2563		if (IS_ERR(neigh_entry))
2564			goto out;
2565	}
2566
2567	memcpy(neigh_entry->ha, ha, ETH_ALEN);
2568	mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, entry_connected);
2569	mlxsw_sp_nexthop_neigh_update(mlxsw_sp, neigh_entry, !entry_connected,
2570				      dead);
2571
2572	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
2573		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2574
2575out:
2576	mutex_unlock(&mlxsw_sp->router->lock);
2577	neigh_release(n);
2578	kfree(net_work);
2579}
2580
2581static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp);
2582
2583static void mlxsw_sp_router_mp_hash_event_work(struct work_struct *work)
2584{
2585	struct mlxsw_sp_netevent_work *net_work =
2586		container_of(work, struct mlxsw_sp_netevent_work, work);
2587	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2588
2589	mlxsw_sp_mp_hash_init(mlxsw_sp);
2590	kfree(net_work);
2591}
2592
2593static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp);
2594
2595static void mlxsw_sp_router_update_priority_work(struct work_struct *work)
2596{
2597	struct mlxsw_sp_netevent_work *net_work =
2598		container_of(work, struct mlxsw_sp_netevent_work, work);
2599	struct mlxsw_sp *mlxsw_sp = net_work->mlxsw_sp;
2600
2601	__mlxsw_sp_router_init(mlxsw_sp);
2602	kfree(net_work);
2603}
2604
2605static int mlxsw_sp_router_schedule_work(struct net *net,
2606					 struct notifier_block *nb,
2607					 void (*cb)(struct work_struct *))
2608{
2609	struct mlxsw_sp_netevent_work *net_work;
2610	struct mlxsw_sp_router *router;
2611
2612	router = container_of(nb, struct mlxsw_sp_router, netevent_nb);
2613	if (!net_eq(net, mlxsw_sp_net(router->mlxsw_sp)))
2614		return NOTIFY_DONE;
2615
2616	net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2617	if (!net_work)
2618		return NOTIFY_BAD;
2619
2620	INIT_WORK(&net_work->work, cb);
2621	net_work->mlxsw_sp = router->mlxsw_sp;
2622	mlxsw_core_schedule_work(&net_work->work);
2623	return NOTIFY_DONE;
2624}
2625
2626static int mlxsw_sp_router_netevent_event(struct notifier_block *nb,
2627					  unsigned long event, void *ptr)
2628{
2629	struct mlxsw_sp_netevent_work *net_work;
2630	struct mlxsw_sp_port *mlxsw_sp_port;
2631	struct mlxsw_sp *mlxsw_sp;
2632	unsigned long interval;
2633	struct neigh_parms *p;
2634	struct neighbour *n;
2635
2636	switch (event) {
2637	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
2638		p = ptr;
2639
2640		/* We don't care about changes in the default table. */
2641		if (!p->dev || (p->tbl->family != AF_INET &&
2642				p->tbl->family != AF_INET6))
2643			return NOTIFY_DONE;
2644
2645		/* We are in atomic context and can't take RTNL mutex,
2646		 * so use RCU variant to walk the device chain.
2647		 */
2648		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(p->dev);
2649		if (!mlxsw_sp_port)
2650			return NOTIFY_DONE;
2651
2652		mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2653		interval = jiffies_to_msecs(NEIGH_VAR(p, DELAY_PROBE_TIME));
2654		mlxsw_sp->router->neighs_update.interval = interval;
2655
2656		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2657		break;
2658	case NETEVENT_NEIGH_UPDATE:
2659		n = ptr;
2660
2661		if (n->tbl->family != AF_INET && n->tbl->family != AF_INET6)
2662			return NOTIFY_DONE;
2663
2664		mlxsw_sp_port = mlxsw_sp_port_lower_dev_hold(n->dev);
2665		if (!mlxsw_sp_port)
2666			return NOTIFY_DONE;
2667
2668		net_work = kzalloc(sizeof(*net_work), GFP_ATOMIC);
2669		if (!net_work) {
2670			mlxsw_sp_port_dev_put(mlxsw_sp_port);
2671			return NOTIFY_BAD;
2672		}
2673
2674		INIT_WORK(&net_work->work, mlxsw_sp_router_neigh_event_work);
2675		net_work->mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
2676		net_work->n = n;
2677
2678		/* Take a reference to ensure the neighbour won't be
2679		 * destructed until we drop the reference in delayed
2680		 * work.
2681		 */
2682		neigh_clone(n);
2683		mlxsw_core_schedule_work(&net_work->work);
2684		mlxsw_sp_port_dev_put(mlxsw_sp_port);
2685		break;
2686	case NETEVENT_IPV4_MPATH_HASH_UPDATE:
2687	case NETEVENT_IPV6_MPATH_HASH_UPDATE:
2688		return mlxsw_sp_router_schedule_work(ptr, nb,
2689				mlxsw_sp_router_mp_hash_event_work);
2690
2691	case NETEVENT_IPV4_FWD_UPDATE_PRIORITY_UPDATE:
2692		return mlxsw_sp_router_schedule_work(ptr, nb,
2693				mlxsw_sp_router_update_priority_work);
2694	}
2695
2696	return NOTIFY_DONE;
2697}
2698
2699static int mlxsw_sp_neigh_init(struct mlxsw_sp *mlxsw_sp)
2700{
2701	int err;
2702
2703	err = rhashtable_init(&mlxsw_sp->router->neigh_ht,
2704			      &mlxsw_sp_neigh_ht_params);
2705	if (err)
2706		return err;
2707
2708	/* Initialize the polling interval according to the default
2709	 * table.
2710	 */
2711	mlxsw_sp_router_neighs_update_interval_init(mlxsw_sp);
2712
2713	/* Create the delayed works for the activity_update */
2714	INIT_DELAYED_WORK(&mlxsw_sp->router->neighs_update.dw,
2715			  mlxsw_sp_router_neighs_update_work);
2716	INIT_DELAYED_WORK(&mlxsw_sp->router->nexthop_probe_dw,
2717			  mlxsw_sp_router_probe_unresolved_nexthops);
2718	mlxsw_core_schedule_dw(&mlxsw_sp->router->neighs_update.dw, 0);
2719	mlxsw_core_schedule_dw(&mlxsw_sp->router->nexthop_probe_dw, 0);
2720	return 0;
2721}
2722
2723static void mlxsw_sp_neigh_fini(struct mlxsw_sp *mlxsw_sp)
2724{
2725	cancel_delayed_work_sync(&mlxsw_sp->router->neighs_update.dw);
2726	cancel_delayed_work_sync(&mlxsw_sp->router->nexthop_probe_dw);
2727	rhashtable_destroy(&mlxsw_sp->router->neigh_ht);
2728}
2729
2730static void mlxsw_sp_neigh_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
2731					 struct mlxsw_sp_rif *rif)
2732{
2733	struct mlxsw_sp_neigh_entry *neigh_entry, *tmp;
2734
2735	list_for_each_entry_safe(neigh_entry, tmp, &rif->neigh_list,
2736				 rif_list_node) {
2737		mlxsw_sp_neigh_entry_update(mlxsw_sp, neigh_entry, false);
2738		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
2739	}
2740}
2741
2742enum mlxsw_sp_nexthop_type {
2743	MLXSW_SP_NEXTHOP_TYPE_ETH,
2744	MLXSW_SP_NEXTHOP_TYPE_IPIP,
2745};
2746
2747struct mlxsw_sp_nexthop_key {
2748	struct fib_nh *fib_nh;
2749};
2750
2751struct mlxsw_sp_nexthop {
2752	struct list_head neigh_list_node; /* member of neigh entry list */
2753	struct list_head rif_list_node;
2754	struct list_head router_list_node;
2755	struct mlxsw_sp_nexthop_group *nh_grp; /* pointer back to the group
2756						* this belongs to
2757						*/
2758	struct rhash_head ht_node;
2759	struct mlxsw_sp_nexthop_key key;
2760	unsigned char gw_addr[sizeof(struct in6_addr)];
2761	int ifindex;
2762	int nh_weight;
2763	int norm_nh_weight;
2764	int num_adj_entries;
2765	struct mlxsw_sp_rif *rif;
2766	u8 should_offload:1, /* set indicates this neigh is connected and
2767			      * should be put to KVD linear area of this group.
2768			      */
2769	   offloaded:1, /* set in case the neigh is actually put into
2770			 * KVD linear area of this group.
2771			 */
2772	   update:1; /* set indicates that MAC of this neigh should be
2773		      * updated in HW
2774		      */
2775	enum mlxsw_sp_nexthop_type type;
2776	union {
2777		struct mlxsw_sp_neigh_entry *neigh_entry;
2778		struct mlxsw_sp_ipip_entry *ipip_entry;
2779	};
2780	unsigned int counter_index;
2781	bool counter_valid;
2782};
2783
2784struct mlxsw_sp_nexthop_group {
2785	void *priv;
2786	struct rhash_head ht_node;
2787	struct list_head fib_list; /* list of fib entries that use this group */
2788	struct neigh_table *neigh_tbl;
2789	u8 adj_index_valid:1,
2790	   gateway:1; /* routes using the group use a gateway */
2791	u32 adj_index;
2792	u16 ecmp_size;
2793	u16 count;
2794	int sum_norm_weight;
2795	struct mlxsw_sp_nexthop nexthops[0];
2796#define nh_rif	nexthops[0].rif
2797};
2798
2799void mlxsw_sp_nexthop_counter_alloc(struct mlxsw_sp *mlxsw_sp,
2800				    struct mlxsw_sp_nexthop *nh)
2801{
2802	struct devlink *devlink;
2803
2804	devlink = priv_to_devlink(mlxsw_sp->core);
2805	if (!devlink_dpipe_table_counter_enabled(devlink,
2806						 MLXSW_SP_DPIPE_TABLE_NAME_ADJ))
2807		return;
2808
2809	if (mlxsw_sp_flow_counter_alloc(mlxsw_sp, &nh->counter_index))
2810		return;
2811
2812	nh->counter_valid = true;
2813}
2814
2815void mlxsw_sp_nexthop_counter_free(struct mlxsw_sp *mlxsw_sp,
2816				   struct mlxsw_sp_nexthop *nh)
2817{
2818	if (!nh->counter_valid)
2819		return;
2820	mlxsw_sp_flow_counter_free(mlxsw_sp, nh->counter_index);
2821	nh->counter_valid = false;
2822}
2823
2824int mlxsw_sp_nexthop_counter_get(struct mlxsw_sp *mlxsw_sp,
2825				 struct mlxsw_sp_nexthop *nh, u64 *p_counter)
2826{
2827	if (!nh->counter_valid)
2828		return -EINVAL;
2829
2830	return mlxsw_sp_flow_counter_get(mlxsw_sp, nh->counter_index,
2831					 p_counter, NULL);
2832}
2833
2834struct mlxsw_sp_nexthop *mlxsw_sp_nexthop_next(struct mlxsw_sp_router *router,
2835					       struct mlxsw_sp_nexthop *nh)
2836{
2837	if (!nh) {
2838		if (list_empty(&router->nexthop_list))
2839			return NULL;
2840		else
2841			return list_first_entry(&router->nexthop_list,
2842						typeof(*nh), router_list_node);
2843	}
2844	if (list_is_last(&nh->router_list_node, &router->nexthop_list))
2845		return NULL;
2846	return list_next_entry(nh, router_list_node);
2847}
2848
2849bool mlxsw_sp_nexthop_offload(struct mlxsw_sp_nexthop *nh)
2850{
2851	return nh->offloaded;
2852}
2853
2854unsigned char *mlxsw_sp_nexthop_ha(struct mlxsw_sp_nexthop *nh)
2855{
2856	if (!nh->offloaded)
2857		return NULL;
2858	return nh->neigh_entry->ha;
2859}
2860
2861int mlxsw_sp_nexthop_indexes(struct mlxsw_sp_nexthop *nh, u32 *p_adj_index,
2862			     u32 *p_adj_size, u32 *p_adj_hash_index)
2863{
2864	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2865	u32 adj_hash_index = 0;
2866	int i;
2867
2868	if (!nh->offloaded || !nh_grp->adj_index_valid)
2869		return -EINVAL;
2870
2871	*p_adj_index = nh_grp->adj_index;
2872	*p_adj_size = nh_grp->ecmp_size;
2873
2874	for (i = 0; i < nh_grp->count; i++) {
2875		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2876
2877		if (nh_iter == nh)
2878			break;
2879		if (nh_iter->offloaded)
2880			adj_hash_index += nh_iter->num_adj_entries;
2881	}
2882
2883	*p_adj_hash_index = adj_hash_index;
2884	return 0;
2885}
2886
2887struct mlxsw_sp_rif *mlxsw_sp_nexthop_rif(struct mlxsw_sp_nexthop *nh)
2888{
2889	return nh->rif;
2890}
2891
2892bool mlxsw_sp_nexthop_group_has_ipip(struct mlxsw_sp_nexthop *nh)
2893{
2894	struct mlxsw_sp_nexthop_group *nh_grp = nh->nh_grp;
2895	int i;
2896
2897	for (i = 0; i < nh_grp->count; i++) {
2898		struct mlxsw_sp_nexthop *nh_iter = &nh_grp->nexthops[i];
2899
2900		if (nh_iter->type == MLXSW_SP_NEXTHOP_TYPE_IPIP)
2901			return true;
2902	}
2903	return false;
2904}
2905
2906static struct fib_info *
2907mlxsw_sp_nexthop4_group_fi(const struct mlxsw_sp_nexthop_group *nh_grp)
2908{
2909	return nh_grp->priv;
2910}
2911
2912struct mlxsw_sp_nexthop_group_cmp_arg {
2913	enum mlxsw_sp_l3proto proto;
2914	union {
2915		struct fib_info *fi;
2916		struct mlxsw_sp_fib6_entry *fib6_entry;
2917	};
2918};
2919
2920static bool
2921mlxsw_sp_nexthop6_group_has_nexthop(const struct mlxsw_sp_nexthop_group *nh_grp,
2922				    const struct in6_addr *gw, int ifindex,
2923				    int weight)
2924{
2925	int i;
2926
2927	for (i = 0; i < nh_grp->count; i++) {
2928		const struct mlxsw_sp_nexthop *nh;
2929
2930		nh = &nh_grp->nexthops[i];
2931		if (nh->ifindex == ifindex && nh->nh_weight == weight &&
2932		    ipv6_addr_equal(gw, (struct in6_addr *) nh->gw_addr))
2933			return true;
2934	}
2935
2936	return false;
2937}
2938
2939static bool
2940mlxsw_sp_nexthop6_group_cmp(const struct mlxsw_sp_nexthop_group *nh_grp,
2941			    const struct mlxsw_sp_fib6_entry *fib6_entry)
2942{
2943	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
2944
2945	if (nh_grp->count != fib6_entry->nrt6)
2946		return false;
2947
2948	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
2949		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
2950		struct in6_addr *gw;
2951		int ifindex, weight;
2952
2953		ifindex = fib6_nh->fib_nh_dev->ifindex;
2954		weight = fib6_nh->fib_nh_weight;
2955		gw = &fib6_nh->fib_nh_gw6;
2956		if (!mlxsw_sp_nexthop6_group_has_nexthop(nh_grp, gw, ifindex,
2957							 weight))
2958			return false;
2959	}
2960
2961	return true;
2962}
2963
2964static int
2965mlxsw_sp_nexthop_group_cmp(struct rhashtable_compare_arg *arg, const void *ptr)
2966{
2967	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = arg->key;
2968	const struct mlxsw_sp_nexthop_group *nh_grp = ptr;
2969
2970	switch (cmp_arg->proto) {
2971	case MLXSW_SP_L3_PROTO_IPV4:
2972		return cmp_arg->fi != mlxsw_sp_nexthop4_group_fi(nh_grp);
2973	case MLXSW_SP_L3_PROTO_IPV6:
2974		return !mlxsw_sp_nexthop6_group_cmp(nh_grp,
2975						    cmp_arg->fib6_entry);
2976	default:
2977		WARN_ON(1);
2978		return 1;
2979	}
2980}
2981
2982static int
2983mlxsw_sp_nexthop_group_type(const struct mlxsw_sp_nexthop_group *nh_grp)
2984{
2985	return nh_grp->neigh_tbl->family;
2986}
2987
2988static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
2989{
2990	const struct mlxsw_sp_nexthop_group *nh_grp = data;
2991	const struct mlxsw_sp_nexthop *nh;
2992	struct fib_info *fi;
2993	unsigned int val;
2994	int i;
2995
2996	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
2997	case AF_INET:
2998		fi = mlxsw_sp_nexthop4_group_fi(nh_grp);
2999		return jhash(&fi, sizeof(fi), seed);
3000	case AF_INET6:
3001		val = nh_grp->count;
3002		for (i = 0; i < nh_grp->count; i++) {
3003			nh = &nh_grp->nexthops[i];
3004			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
3005			val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
3006		}
3007		return jhash(&val, sizeof(val), seed);
3008	default:
3009		WARN_ON(1);
3010		return 0;
3011	}
3012}
3013
3014static u32
3015mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
3016{
3017	unsigned int val = fib6_entry->nrt6;
3018	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3019
3020	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3021		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3022		struct net_device *dev = fib6_nh->fib_nh_dev;
3023		struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
3024
3025		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
3026		val ^= jhash(gw, sizeof(*gw), seed);
3027	}
3028
3029	return jhash(&val, sizeof(val), seed);
3030}
3031
3032static u32
3033mlxsw_sp_nexthop_group_hash(const void *data, u32 len, u32 seed)
3034{
3035	const struct mlxsw_sp_nexthop_group_cmp_arg *cmp_arg = data;
3036
3037	switch (cmp_arg->proto) {
3038	case MLXSW_SP_L3_PROTO_IPV4:
3039		return jhash(&cmp_arg->fi, sizeof(cmp_arg->fi), seed);
3040	case MLXSW_SP_L3_PROTO_IPV6:
3041		return mlxsw_sp_nexthop6_group_hash(cmp_arg->fib6_entry, seed);
3042	default:
3043		WARN_ON(1);
3044		return 0;
3045	}
3046}
3047
3048static const struct rhashtable_params mlxsw_sp_nexthop_group_ht_params = {
3049	.head_offset = offsetof(struct mlxsw_sp_nexthop_group, ht_node),
3050	.hashfn	     = mlxsw_sp_nexthop_group_hash,
3051	.obj_hashfn  = mlxsw_sp_nexthop_group_hash_obj,
3052	.obj_cmpfn   = mlxsw_sp_nexthop_group_cmp,
3053};
3054
3055static int mlxsw_sp_nexthop_group_insert(struct mlxsw_sp *mlxsw_sp,
3056					 struct mlxsw_sp_nexthop_group *nh_grp)
3057{
3058	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3059	    !nh_grp->gateway)
3060		return 0;
3061
3062	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_group_ht,
3063				      &nh_grp->ht_node,
3064				      mlxsw_sp_nexthop_group_ht_params);
3065}
3066
3067static void mlxsw_sp_nexthop_group_remove(struct mlxsw_sp *mlxsw_sp,
3068					  struct mlxsw_sp_nexthop_group *nh_grp)
3069{
3070	if (mlxsw_sp_nexthop_group_type(nh_grp) == AF_INET6 &&
3071	    !nh_grp->gateway)
3072		return;
3073
3074	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_group_ht,
3075			       &nh_grp->ht_node,
3076			       mlxsw_sp_nexthop_group_ht_params);
3077}
3078
3079static struct mlxsw_sp_nexthop_group *
3080mlxsw_sp_nexthop4_group_lookup(struct mlxsw_sp *mlxsw_sp,
3081			       struct fib_info *fi)
3082{
3083	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3084
3085	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV4;
3086	cmp_arg.fi = fi;
3087	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3088				      &cmp_arg,
3089				      mlxsw_sp_nexthop_group_ht_params);
3090}
3091
3092static struct mlxsw_sp_nexthop_group *
3093mlxsw_sp_nexthop6_group_lookup(struct mlxsw_sp *mlxsw_sp,
3094			       struct mlxsw_sp_fib6_entry *fib6_entry)
3095{
3096	struct mlxsw_sp_nexthop_group_cmp_arg cmp_arg;
3097
3098	cmp_arg.proto = MLXSW_SP_L3_PROTO_IPV6;
3099	cmp_arg.fib6_entry = fib6_entry;
3100	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_group_ht,
3101				      &cmp_arg,
3102				      mlxsw_sp_nexthop_group_ht_params);
3103}
3104
3105static const struct rhashtable_params mlxsw_sp_nexthop_ht_params = {
3106	.key_offset = offsetof(struct mlxsw_sp_nexthop, key),
3107	.head_offset = offsetof(struct mlxsw_sp_nexthop, ht_node),
3108	.key_len = sizeof(struct mlxsw_sp_nexthop_key),
3109};
3110
3111static int mlxsw_sp_nexthop_insert(struct mlxsw_sp *mlxsw_sp,
3112				   struct mlxsw_sp_nexthop *nh)
3113{
3114	return rhashtable_insert_fast(&mlxsw_sp->router->nexthop_ht,
3115				      &nh->ht_node, mlxsw_sp_nexthop_ht_params);
3116}
3117
3118static void mlxsw_sp_nexthop_remove(struct mlxsw_sp *mlxsw_sp,
3119				    struct mlxsw_sp_nexthop *nh)
3120{
3121	rhashtable_remove_fast(&mlxsw_sp->router->nexthop_ht, &nh->ht_node,
3122			       mlxsw_sp_nexthop_ht_params);
3123}
3124
3125static struct mlxsw_sp_nexthop *
3126mlxsw_sp_nexthop_lookup(struct mlxsw_sp *mlxsw_sp,
3127			struct mlxsw_sp_nexthop_key key)
3128{
3129	return rhashtable_lookup_fast(&mlxsw_sp->router->nexthop_ht, &key,
3130				      mlxsw_sp_nexthop_ht_params);
3131}
3132
3133static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp,
3134					     const struct mlxsw_sp_fib *fib,
3135					     u32 adj_index, u16 ecmp_size,
3136					     u32 new_adj_index,
3137					     u16 new_ecmp_size)
3138{
3139	char raleu_pl[MLXSW_REG_RALEU_LEN];
3140
3141	mlxsw_reg_raleu_pack(raleu_pl,
3142			     (enum mlxsw_reg_ralxx_protocol) fib->proto,
3143			     fib->vr->id, adj_index, ecmp_size, new_adj_index,
3144			     new_ecmp_size);
3145	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl);
3146}
3147
3148static int mlxsw_sp_adj_index_mass_update(struct mlxsw_sp *mlxsw_sp,
3149					  struct mlxsw_sp_nexthop_group *nh_grp,
3150					  u32 old_adj_index, u16 old_ecmp_size)
3151{
3152	struct mlxsw_sp_fib_entry *fib_entry;
3153	struct mlxsw_sp_fib *fib = NULL;
3154	int err;
3155
3156	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3157		if (fib == fib_entry->fib_node->fib)
3158			continue;
3159		fib = fib_entry->fib_node->fib;
3160		err = mlxsw_sp_adj_index_mass_update_vr(mlxsw_sp, fib,
3161							old_adj_index,
3162							old_ecmp_size,
3163							nh_grp->adj_index,
3164							nh_grp->ecmp_size);
3165		if (err)
3166			return err;
3167	}
3168	return 0;
3169}
3170
3171static int __mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3172				     struct mlxsw_sp_nexthop *nh)
3173{
3174	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3175	char ratr_pl[MLXSW_REG_RATR_LEN];
3176
3177	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY,
3178			    true, MLXSW_REG_RATR_TYPE_ETHERNET,
3179			    adj_index, neigh_entry->rif);
3180	mlxsw_reg_ratr_eth_entry_pack(ratr_pl, neigh_entry->ha);
3181	if (nh->counter_valid)
3182		mlxsw_reg_ratr_counter_pack(ratr_pl, nh->counter_index, true);
3183	else
3184		mlxsw_reg_ratr_counter_pack(ratr_pl, 0, false);
3185
3186	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
3187}
3188
3189int mlxsw_sp_nexthop_update(struct mlxsw_sp *mlxsw_sp, u32 adj_index,
3190			    struct mlxsw_sp_nexthop *nh)
3191{
3192	int i;
3193
3194	for (i = 0; i < nh->num_adj_entries; i++) {
3195		int err;
3196
3197		err = __mlxsw_sp_nexthop_update(mlxsw_sp, adj_index + i, nh);
3198		if (err)
3199			return err;
3200	}
3201
3202	return 0;
3203}
3204
3205static int __mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3206					  u32 adj_index,
3207					  struct mlxsw_sp_nexthop *nh)
3208{
3209	const struct mlxsw_sp_ipip_ops *ipip_ops;
3210
3211	ipip_ops = mlxsw_sp->router->ipip_ops_arr[nh->ipip_entry->ipipt];
3212	return ipip_ops->nexthop_update(mlxsw_sp, adj_index, nh->ipip_entry);
3213}
3214
3215static int mlxsw_sp_nexthop_ipip_update(struct mlxsw_sp *mlxsw_sp,
3216					u32 adj_index,
3217					struct mlxsw_sp_nexthop *nh)
3218{
3219	int i;
3220
3221	for (i = 0; i < nh->num_adj_entries; i++) {
3222		int err;
3223
3224		err = __mlxsw_sp_nexthop_ipip_update(mlxsw_sp, adj_index + i,
3225						     nh);
3226		if (err)
3227			return err;
3228	}
3229
3230	return 0;
3231}
3232
3233static int
3234mlxsw_sp_nexthop_group_update(struct mlxsw_sp *mlxsw_sp,
3235			      struct mlxsw_sp_nexthop_group *nh_grp,
3236			      bool reallocate)
3237{
3238	u32 adj_index = nh_grp->adj_index; /* base */
3239	struct mlxsw_sp_nexthop *nh;
3240	int i;
3241
3242	for (i = 0; i < nh_grp->count; i++) {
3243		nh = &nh_grp->nexthops[i];
3244
3245		if (!nh->should_offload) {
3246			nh->offloaded = 0;
3247			continue;
3248		}
3249
3250		if (nh->update || reallocate) {
3251			int err = 0;
3252
3253			switch (nh->type) {
3254			case MLXSW_SP_NEXTHOP_TYPE_ETH:
3255				err = mlxsw_sp_nexthop_update
3256					    (mlxsw_sp, adj_index, nh);
3257				break;
3258			case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3259				err = mlxsw_sp_nexthop_ipip_update
3260					    (mlxsw_sp, adj_index, nh);
3261				break;
3262			}
3263			if (err)
3264				return err;
3265			nh->update = 0;
3266			nh->offloaded = 1;
3267		}
3268		adj_index += nh->num_adj_entries;
3269	}
3270	return 0;
3271}
3272
3273static int
3274mlxsw_sp_nexthop_fib_entries_update(struct mlxsw_sp *mlxsw_sp,
3275				    struct mlxsw_sp_nexthop_group *nh_grp)
3276{
3277	struct mlxsw_sp_fib_entry *fib_entry;
3278	int err;
3279
3280	list_for_each_entry(fib_entry, &nh_grp->fib_list, nexthop_group_node) {
3281		err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
3282		if (err)
3283			return err;
3284	}
3285	return 0;
3286}
3287
3288static void mlxsw_sp_adj_grp_size_round_up(u16 *p_adj_grp_size)
3289{
3290	/* Valid sizes for an adjacency group are:
3291	 * 1-64, 512, 1024, 2048 and 4096.
3292	 */
3293	if (*p_adj_grp_size <= 64)
3294		return;
3295	else if (*p_adj_grp_size <= 512)
3296		*p_adj_grp_size = 512;
3297	else if (*p_adj_grp_size <= 1024)
3298		*p_adj_grp_size = 1024;
3299	else if (*p_adj_grp_size <= 2048)
3300		*p_adj_grp_size = 2048;
3301	else
3302		*p_adj_grp_size = 4096;
3303}
3304
3305static void mlxsw_sp_adj_grp_size_round_down(u16 *p_adj_grp_size,
3306					     unsigned int alloc_size)
3307{
3308	if (alloc_size >= 4096)
3309		*p_adj_grp_size = 4096;
3310	else if (alloc_size >= 2048)
3311		*p_adj_grp_size = 2048;
3312	else if (alloc_size >= 1024)
3313		*p_adj_grp_size = 1024;
3314	else if (alloc_size >= 512)
3315		*p_adj_grp_size = 512;
3316}
3317
3318static int mlxsw_sp_fix_adj_grp_size(struct mlxsw_sp *mlxsw_sp,
3319				     u16 *p_adj_grp_size)
3320{
3321	unsigned int alloc_size;
3322	int err;
3323
3324	/* Round up the requested group size to the next size supported
3325	 * by the device and make sure the request can be satisfied.
3326	 */
3327	mlxsw_sp_adj_grp_size_round_up(p_adj_grp_size);
3328	err = mlxsw_sp_kvdl_alloc_count_query(mlxsw_sp,
3329					      MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3330					      *p_adj_grp_size, &alloc_size);
3331	if (err)
3332		return err;
3333	/* It is possible the allocation results in more allocated
3334	 * entries than requested. Try to use as much of them as
3335	 * possible.
3336	 */
3337	mlxsw_sp_adj_grp_size_round_down(p_adj_grp_size, alloc_size);
3338
3339	return 0;
3340}
3341
3342static void
3343mlxsw_sp_nexthop_group_normalize(struct mlxsw_sp_nexthop_group *nh_grp)
3344{
3345	int i, g = 0, sum_norm_weight = 0;
3346	struct mlxsw_sp_nexthop *nh;
3347
3348	for (i = 0; i < nh_grp->count; i++) {
3349		nh = &nh_grp->nexthops[i];
3350
3351		if (!nh->should_offload)
3352			continue;
3353		if (g > 0)
3354			g = gcd(nh->nh_weight, g);
3355		else
3356			g = nh->nh_weight;
3357	}
3358
3359	for (i = 0; i < nh_grp->count; i++) {
3360		nh = &nh_grp->nexthops[i];
3361
3362		if (!nh->should_offload)
3363			continue;
3364		nh->norm_nh_weight = nh->nh_weight / g;
3365		sum_norm_weight += nh->norm_nh_weight;
3366	}
3367
3368	nh_grp->sum_norm_weight = sum_norm_weight;
3369}
3370
3371static void
3372mlxsw_sp_nexthop_group_rebalance(struct mlxsw_sp_nexthop_group *nh_grp)
3373{
3374	int total = nh_grp->sum_norm_weight;
3375	u16 ecmp_size = nh_grp->ecmp_size;
3376	int i, weight = 0, lower_bound = 0;
3377
3378	for (i = 0; i < nh_grp->count; i++) {
3379		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3380		int upper_bound;
3381
3382		if (!nh->should_offload)
3383			continue;
3384		weight += nh->norm_nh_weight;
3385		upper_bound = DIV_ROUND_CLOSEST(ecmp_size * weight, total);
3386		nh->num_adj_entries = upper_bound - lower_bound;
3387		lower_bound = upper_bound;
3388	}
3389}
3390
3391static struct mlxsw_sp_nexthop *
3392mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
3393		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6);
3394
3395static void
3396mlxsw_sp_nexthop4_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3397					struct mlxsw_sp_nexthop_group *nh_grp)
3398{
3399	int i;
3400
3401	for (i = 0; i < nh_grp->count; i++) {
3402		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
3403
3404		if (nh->offloaded)
3405			nh->key.fib_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3406		else
3407			nh->key.fib_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3408	}
3409}
3410
3411static void
3412__mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp_nexthop_group *nh_grp,
3413					  struct mlxsw_sp_fib6_entry *fib6_entry)
3414{
3415	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
3416
3417	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
3418		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
3419		struct mlxsw_sp_nexthop *nh;
3420
3421		nh = mlxsw_sp_rt6_nexthop(nh_grp, mlxsw_sp_rt6);
3422		if (nh && nh->offloaded)
3423			fib6_nh->fib_nh_flags |= RTNH_F_OFFLOAD;
3424		else
3425			fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
3426	}
3427}
3428
3429static void
3430mlxsw_sp_nexthop6_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3431					struct mlxsw_sp_nexthop_group *nh_grp)
3432{
3433	struct mlxsw_sp_fib6_entry *fib6_entry;
3434
3435	/* Unfortunately, in IPv6 the route and the nexthop are described by
3436	 * the same struct, so we need to iterate over all the routes using the
3437	 * nexthop group and set / clear the offload indication for them.
3438	 */
3439	list_for_each_entry(fib6_entry, &nh_grp->fib_list,
3440			    common.nexthop_group_node)
3441		__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
3442}
3443
3444static void
3445mlxsw_sp_nexthop_group_offload_refresh(struct mlxsw_sp *mlxsw_sp,
3446				       struct mlxsw_sp_nexthop_group *nh_grp)
3447{
3448	switch (mlxsw_sp_nexthop_group_type(nh_grp)) {
3449	case AF_INET:
3450		mlxsw_sp_nexthop4_group_offload_refresh(mlxsw_sp, nh_grp);
3451		break;
3452	case AF_INET6:
3453		mlxsw_sp_nexthop6_group_offload_refresh(mlxsw_sp, nh_grp);
3454		break;
3455	}
3456}
3457
3458static void
3459mlxsw_sp_nexthop_group_refresh(struct mlxsw_sp *mlxsw_sp,
3460			       struct mlxsw_sp_nexthop_group *nh_grp)
3461{
3462	u16 ecmp_size, old_ecmp_size;
3463	struct mlxsw_sp_nexthop *nh;
3464	bool offload_change = false;
3465	u32 adj_index;
3466	bool old_adj_index_valid;
3467	u32 old_adj_index;
3468	int i;
3469	int err;
3470
3471	if (!nh_grp->gateway) {
3472		mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3473		return;
3474	}
3475
3476	for (i = 0; i < nh_grp->count; i++) {
3477		nh = &nh_grp->nexthops[i];
3478
3479		if (nh->should_offload != nh->offloaded) {
3480			offload_change = true;
3481			if (nh->should_offload)
3482				nh->update = 1;
3483		}
3484	}
3485	if (!offload_change) {
3486		/* Nothing was added or removed, so no need to reallocate. Just
3487		 * update MAC on existing adjacency indexes.
3488		 */
3489		err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, false);
3490		if (err) {
3491			dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3492			goto set_trap;
3493		}
3494		return;
3495	}
3496	mlxsw_sp_nexthop_group_normalize(nh_grp);
3497	if (!nh_grp->sum_norm_weight)
3498		/* No neigh of this group is connected so we just set
3499		 * the trap and let everthing flow through kernel.
3500		 */
3501		goto set_trap;
3502
3503	ecmp_size = nh_grp->sum_norm_weight;
3504	err = mlxsw_sp_fix_adj_grp_size(mlxsw_sp, &ecmp_size);
3505	if (err)
3506		/* No valid allocation size available. */
3507		goto set_trap;
3508
3509	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3510				  ecmp_size, &adj_index);
3511	if (err) {
3512		/* We ran out of KVD linear space, just set the
3513		 * trap and let everything flow through kernel.
3514		 */
3515		dev_warn(mlxsw_sp->bus_info->dev, "Failed to allocate KVD linear area for nexthop group.\n");
3516		goto set_trap;
3517	}
3518	old_adj_index_valid = nh_grp->adj_index_valid;
3519	old_adj_index = nh_grp->adj_index;
3520	old_ecmp_size = nh_grp->ecmp_size;
3521	nh_grp->adj_index_valid = 1;
3522	nh_grp->adj_index = adj_index;
3523	nh_grp->ecmp_size = ecmp_size;
3524	mlxsw_sp_nexthop_group_rebalance(nh_grp);
3525	err = mlxsw_sp_nexthop_group_update(mlxsw_sp, nh_grp, true);
3526	if (err) {
3527		dev_warn(mlxsw_sp->bus_info->dev, "Failed to update neigh MAC in adjacency table.\n");
3528		goto set_trap;
3529	}
3530
3531	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3532
3533	if (!old_adj_index_valid) {
3534		/* The trap was set for fib entries, so we have to call
3535		 * fib entry update to unset it and use adjacency index.
3536		 */
3537		err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3538		if (err) {
3539			dev_warn(mlxsw_sp->bus_info->dev, "Failed to add adjacency index to fib entries.\n");
3540			goto set_trap;
3541		}
3542		return;
3543	}
3544
3545	err = mlxsw_sp_adj_index_mass_update(mlxsw_sp, nh_grp,
3546					     old_adj_index, old_ecmp_size);
3547	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3548			   old_ecmp_size, old_adj_index);
3549	if (err) {
3550		dev_warn(mlxsw_sp->bus_info->dev, "Failed to mass-update adjacency index for nexthop group.\n");
3551		goto set_trap;
3552	}
3553
3554	return;
3555
3556set_trap:
3557	old_adj_index_valid = nh_grp->adj_index_valid;
3558	nh_grp->adj_index_valid = 0;
3559	for (i = 0; i < nh_grp->count; i++) {
3560		nh = &nh_grp->nexthops[i];
3561		nh->offloaded = 0;
3562	}
3563	err = mlxsw_sp_nexthop_fib_entries_update(mlxsw_sp, nh_grp);
3564	if (err)
3565		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set traps for fib entries.\n");
3566	mlxsw_sp_nexthop_group_offload_refresh(mlxsw_sp, nh_grp);
3567	if (old_adj_index_valid)
3568		mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ,
3569				   nh_grp->ecmp_size, nh_grp->adj_index);
3570}
3571
3572static void __mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp_nexthop *nh,
3573					    bool removing)
3574{
3575	if (!removing)
3576		nh->should_offload = 1;
3577	else
3578		nh->should_offload = 0;
3579	nh->update = 1;
3580}
3581
3582static int
3583mlxsw_sp_nexthop_dead_neigh_replace(struct mlxsw_sp *mlxsw_sp,
3584				    struct mlxsw_sp_neigh_entry *neigh_entry)
3585{
3586	struct neighbour *n, *old_n = neigh_entry->key.n;
3587	struct mlxsw_sp_nexthop *nh;
3588	bool entry_connected;
3589	u8 nud_state, dead;
3590	int err;
3591
3592	nh = list_first_entry(&neigh_entry->nexthop_list,
3593			      struct mlxsw_sp_nexthop, neigh_list_node);
3594
3595	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3596	if (!n) {
3597		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3598				 nh->rif->dev);
3599		if (IS_ERR(n))
3600			return PTR_ERR(n);
3601		neigh_event_send(n, NULL);
3602	}
3603
3604	mlxsw_sp_neigh_entry_remove(mlxsw_sp, neigh_entry);
3605	neigh_entry->key.n = n;
3606	err = mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3607	if (err)
3608		goto err_neigh_entry_insert;
3609
3610	read_lock_bh(&n->lock);
3611	nud_state = n->nud_state;
3612	dead = n->dead;
3613	read_unlock_bh(&n->lock);
3614	entry_connected = nud_state & NUD_VALID && !dead;
3615
3616	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3617			    neigh_list_node) {
3618		neigh_release(old_n);
3619		neigh_clone(n);
3620		__mlxsw_sp_nexthop_neigh_update(nh, !entry_connected);
3621		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3622	}
3623
3624	neigh_release(n);
3625
3626	return 0;
3627
3628err_neigh_entry_insert:
3629	neigh_entry->key.n = old_n;
3630	mlxsw_sp_neigh_entry_insert(mlxsw_sp, neigh_entry);
3631	neigh_release(n);
3632	return err;
3633}
3634
3635static void
3636mlxsw_sp_nexthop_neigh_update(struct mlxsw_sp *mlxsw_sp,
3637			      struct mlxsw_sp_neigh_entry *neigh_entry,
3638			      bool removing, bool dead)
3639{
3640	struct mlxsw_sp_nexthop *nh;
3641
3642	if (list_empty(&neigh_entry->nexthop_list))
3643		return;
3644
3645	if (dead) {
3646		int err;
3647
3648		err = mlxsw_sp_nexthop_dead_neigh_replace(mlxsw_sp,
3649							  neigh_entry);
3650		if (err)
3651			dev_err(mlxsw_sp->bus_info->dev, "Failed to replace dead neigh\n");
3652		return;
3653	}
3654
3655	list_for_each_entry(nh, &neigh_entry->nexthop_list,
3656			    neigh_list_node) {
3657		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3658		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3659	}
3660}
3661
3662static void mlxsw_sp_nexthop_rif_init(struct mlxsw_sp_nexthop *nh,
3663				      struct mlxsw_sp_rif *rif)
3664{
3665	if (nh->rif)
3666		return;
3667
3668	nh->rif = rif;
3669	list_add(&nh->rif_list_node, &rif->nexthop_list);
3670}
3671
3672static void mlxsw_sp_nexthop_rif_fini(struct mlxsw_sp_nexthop *nh)
3673{
3674	if (!nh->rif)
3675		return;
3676
3677	list_del(&nh->rif_list_node);
3678	nh->rif = NULL;
3679}
3680
3681static int mlxsw_sp_nexthop_neigh_init(struct mlxsw_sp *mlxsw_sp,
3682				       struct mlxsw_sp_nexthop *nh)
3683{
3684	struct mlxsw_sp_neigh_entry *neigh_entry;
3685	struct neighbour *n;
3686	u8 nud_state, dead;
3687	int err;
3688
3689	if (!nh->nh_grp->gateway || nh->neigh_entry)
3690		return 0;
3691
3692	/* Take a reference of neigh here ensuring that neigh would
3693	 * not be destructed before the nexthop entry is finished.
3694	 * The reference is taken either in neigh_lookup() or
3695	 * in neigh_create() in case n is not found.
3696	 */
3697	n = neigh_lookup(nh->nh_grp->neigh_tbl, &nh->gw_addr, nh->rif->dev);
3698	if (!n) {
3699		n = neigh_create(nh->nh_grp->neigh_tbl, &nh->gw_addr,
3700				 nh->rif->dev);
3701		if (IS_ERR(n))
3702			return PTR_ERR(n);
3703		neigh_event_send(n, NULL);
3704	}
3705	neigh_entry = mlxsw_sp_neigh_entry_lookup(mlxsw_sp, n);
3706	if (!neigh_entry) {
3707		neigh_entry = mlxsw_sp_neigh_entry_create(mlxsw_sp, n);
3708		if (IS_ERR(neigh_entry)) {
3709			err = -EINVAL;
3710			goto err_neigh_entry_create;
3711		}
3712	}
3713
3714	/* If that is the first nexthop connected to that neigh, add to
3715	 * nexthop_neighs_list
3716	 */
3717	if (list_empty(&neigh_entry->nexthop_list))
3718		list_add_tail(&neigh_entry->nexthop_neighs_list_node,
3719			      &mlxsw_sp->router->nexthop_neighs_list);
3720
3721	nh->neigh_entry = neigh_entry;
3722	list_add_tail(&nh->neigh_list_node, &neigh_entry->nexthop_list);
3723	read_lock_bh(&n->lock);
3724	nud_state = n->nud_state;
3725	dead = n->dead;
3726	read_unlock_bh(&n->lock);
3727	__mlxsw_sp_nexthop_neigh_update(nh, !(nud_state & NUD_VALID && !dead));
3728
3729	return 0;
3730
3731err_neigh_entry_create:
3732	neigh_release(n);
3733	return err;
3734}
3735
3736static void mlxsw_sp_nexthop_neigh_fini(struct mlxsw_sp *mlxsw_sp,
3737					struct mlxsw_sp_nexthop *nh)
3738{
3739	struct mlxsw_sp_neigh_entry *neigh_entry = nh->neigh_entry;
3740	struct neighbour *n;
3741
3742	if (!neigh_entry)
3743		return;
3744	n = neigh_entry->key.n;
3745
3746	__mlxsw_sp_nexthop_neigh_update(nh, true);
3747	list_del(&nh->neigh_list_node);
3748	nh->neigh_entry = NULL;
3749
3750	/* If that is the last nexthop connected to that neigh, remove from
3751	 * nexthop_neighs_list
3752	 */
3753	if (list_empty(&neigh_entry->nexthop_list))
3754		list_del(&neigh_entry->nexthop_neighs_list_node);
3755
3756	if (!neigh_entry->connected && list_empty(&neigh_entry->nexthop_list))
3757		mlxsw_sp_neigh_entry_destroy(mlxsw_sp, neigh_entry);
3758
3759	neigh_release(n);
3760}
3761
3762static bool mlxsw_sp_ipip_netdev_ul_up(struct net_device *ol_dev)
3763{
3764	struct net_device *ul_dev;
3765	bool is_up;
3766
3767	rcu_read_lock();
3768	ul_dev = __mlxsw_sp_ipip_netdev_ul_dev_get(ol_dev);
3769	is_up = ul_dev ? (ul_dev->flags & IFF_UP) : true;
3770	rcu_read_unlock();
3771
3772	return is_up;
3773}
3774
3775static void mlxsw_sp_nexthop_ipip_init(struct mlxsw_sp *mlxsw_sp,
3776				       struct mlxsw_sp_nexthop *nh,
3777				       struct mlxsw_sp_ipip_entry *ipip_entry)
3778{
3779	bool removing;
3780
3781	if (!nh->nh_grp->gateway || nh->ipip_entry)
3782		return;
3783
3784	nh->ipip_entry = ipip_entry;
3785	removing = !mlxsw_sp_ipip_netdev_ul_up(ipip_entry->ol_dev);
3786	__mlxsw_sp_nexthop_neigh_update(nh, removing);
3787	mlxsw_sp_nexthop_rif_init(nh, &ipip_entry->ol_lb->common);
3788}
3789
3790static void mlxsw_sp_nexthop_ipip_fini(struct mlxsw_sp *mlxsw_sp,
3791				       struct mlxsw_sp_nexthop *nh)
3792{
3793	struct mlxsw_sp_ipip_entry *ipip_entry = nh->ipip_entry;
3794
3795	if (!ipip_entry)
3796		return;
3797
3798	__mlxsw_sp_nexthop_neigh_update(nh, true);
3799	nh->ipip_entry = NULL;
3800}
3801
3802static bool mlxsw_sp_nexthop4_ipip_type(const struct mlxsw_sp *mlxsw_sp,
3803					const struct fib_nh *fib_nh,
3804					enum mlxsw_sp_ipip_type *p_ipipt)
3805{
3806	struct net_device *dev = fib_nh->fib_nh_dev;
3807
3808	return dev &&
3809	       fib_nh->nh_parent->fib_type == RTN_UNICAST &&
3810	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, p_ipipt);
3811}
3812
3813static void mlxsw_sp_nexthop_type_fini(struct mlxsw_sp *mlxsw_sp,
3814				       struct mlxsw_sp_nexthop *nh)
3815{
3816	switch (nh->type) {
3817	case MLXSW_SP_NEXTHOP_TYPE_ETH:
3818		mlxsw_sp_nexthop_neigh_fini(mlxsw_sp, nh);
3819		mlxsw_sp_nexthop_rif_fini(nh);
3820		break;
3821	case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3822		mlxsw_sp_nexthop_rif_fini(nh);
3823		mlxsw_sp_nexthop_ipip_fini(mlxsw_sp, nh);
3824		break;
3825	}
3826}
3827
3828static int mlxsw_sp_nexthop4_type_init(struct mlxsw_sp *mlxsw_sp,
3829				       struct mlxsw_sp_nexthop *nh,
3830				       struct fib_nh *fib_nh)
3831{
3832	const struct mlxsw_sp_ipip_ops *ipip_ops;
3833	struct net_device *dev = fib_nh->fib_nh_dev;
3834	struct mlxsw_sp_ipip_entry *ipip_entry;
3835	struct mlxsw_sp_rif *rif;
3836	int err;
3837
3838	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
3839	if (ipip_entry) {
3840		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
3841		if (ipip_ops->can_offload(mlxsw_sp, dev,
3842					  MLXSW_SP_L3_PROTO_IPV4)) {
3843			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
3844			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
3845			return 0;
3846		}
3847	}
3848
3849	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
3850	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
3851	if (!rif)
3852		return 0;
3853
3854	mlxsw_sp_nexthop_rif_init(nh, rif);
3855	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
3856	if (err)
3857		goto err_neigh_init;
3858
3859	return 0;
3860
3861err_neigh_init:
3862	mlxsw_sp_nexthop_rif_fini(nh);
3863	return err;
3864}
3865
3866static void mlxsw_sp_nexthop4_type_fini(struct mlxsw_sp *mlxsw_sp,
3867					struct mlxsw_sp_nexthop *nh)
3868{
3869	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3870}
3871
3872static int mlxsw_sp_nexthop4_init(struct mlxsw_sp *mlxsw_sp,
3873				  struct mlxsw_sp_nexthop_group *nh_grp,
3874				  struct mlxsw_sp_nexthop *nh,
3875				  struct fib_nh *fib_nh)
3876{
3877	struct net_device *dev = fib_nh->fib_nh_dev;
3878	struct in_device *in_dev;
3879	int err;
3880
3881	nh->nh_grp = nh_grp;
3882	nh->key.fib_nh = fib_nh;
3883#ifdef CONFIG_IP_ROUTE_MULTIPATH
3884	nh->nh_weight = fib_nh->fib_nh_weight;
3885#else
3886	nh->nh_weight = 1;
3887#endif
3888	memcpy(&nh->gw_addr, &fib_nh->fib_nh_gw4, sizeof(fib_nh->fib_nh_gw4));
3889	err = mlxsw_sp_nexthop_insert(mlxsw_sp, nh);
3890	if (err)
3891		return err;
3892
3893	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
3894	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
3895
3896	if (!dev)
3897		return 0;
3898
3899	rcu_read_lock();
3900	in_dev = __in_dev_get_rcu(dev);
3901	if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
3902	    fib_nh->fib_nh_flags & RTNH_F_LINKDOWN) {
3903		rcu_read_unlock();
3904		return 0;
3905	}
3906	rcu_read_unlock();
3907
3908	err = mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3909	if (err)
3910		goto err_nexthop_neigh_init;
3911
3912	return 0;
3913
3914err_nexthop_neigh_init:
3915	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3916	return err;
3917}
3918
3919static void mlxsw_sp_nexthop4_fini(struct mlxsw_sp *mlxsw_sp,
3920				   struct mlxsw_sp_nexthop *nh)
3921{
3922	mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3923	list_del(&nh->router_list_node);
3924	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
3925	mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
3926}
3927
3928static void mlxsw_sp_nexthop4_event(struct mlxsw_sp *mlxsw_sp,
3929				    unsigned long event, struct fib_nh *fib_nh)
3930{
3931	struct mlxsw_sp_nexthop_key key;
3932	struct mlxsw_sp_nexthop *nh;
3933
3934	if (mlxsw_sp->router->aborted)
3935		return;
3936
3937	key.fib_nh = fib_nh;
3938	nh = mlxsw_sp_nexthop_lookup(mlxsw_sp, key);
3939	if (!nh)
3940		return;
3941
3942	switch (event) {
3943	case FIB_EVENT_NH_ADD:
3944		mlxsw_sp_nexthop4_type_init(mlxsw_sp, nh, fib_nh);
3945		break;
3946	case FIB_EVENT_NH_DEL:
3947		mlxsw_sp_nexthop4_type_fini(mlxsw_sp, nh);
3948		break;
3949	}
3950
3951	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3952}
3953
3954static void mlxsw_sp_nexthop_rif_update(struct mlxsw_sp *mlxsw_sp,
3955					struct mlxsw_sp_rif *rif)
3956{
3957	struct mlxsw_sp_nexthop *nh;
3958	bool removing;
3959
3960	list_for_each_entry(nh, &rif->nexthop_list, rif_list_node) {
3961		switch (nh->type) {
3962		case MLXSW_SP_NEXTHOP_TYPE_ETH:
3963			removing = false;
3964			break;
3965		case MLXSW_SP_NEXTHOP_TYPE_IPIP:
3966			removing = !mlxsw_sp_ipip_netdev_ul_up(rif->dev);
3967			break;
3968		default:
3969			WARN_ON(1);
3970			continue;
3971		}
3972
3973		__mlxsw_sp_nexthop_neigh_update(nh, removing);
3974		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3975	}
3976}
3977
3978static void mlxsw_sp_nexthop_rif_migrate(struct mlxsw_sp *mlxsw_sp,
3979					 struct mlxsw_sp_rif *old_rif,
3980					 struct mlxsw_sp_rif *new_rif)
3981{
3982	struct mlxsw_sp_nexthop *nh;
3983
3984	list_splice_init(&old_rif->nexthop_list, &new_rif->nexthop_list);
3985	list_for_each_entry(nh, &new_rif->nexthop_list, rif_list_node)
3986		nh->rif = new_rif;
3987	mlxsw_sp_nexthop_rif_update(mlxsw_sp, new_rif);
3988}
3989
3990static void mlxsw_sp_nexthop_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
3991					   struct mlxsw_sp_rif *rif)
3992{
3993	struct mlxsw_sp_nexthop *nh, *tmp;
3994
3995	list_for_each_entry_safe(nh, tmp, &rif->nexthop_list, rif_list_node) {
3996		mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
3997		mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh->nh_grp);
3998	}
3999}
4000
4001static bool mlxsw_sp_fi_is_gateway(const struct mlxsw_sp *mlxsw_sp,
4002				   struct fib_info *fi)
4003{
4004	const struct fib_nh *nh = fib_info_nh(fi, 0);
4005
4006	return nh->fib_nh_gw_family ||
4007	       mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
4008}
4009
4010static struct mlxsw_sp_nexthop_group *
4011mlxsw_sp_nexthop4_group_create(struct mlxsw_sp *mlxsw_sp, struct fib_info *fi)
4012{
4013	unsigned int nhs = fib_info_num_path(fi);
4014	struct mlxsw_sp_nexthop_group *nh_grp;
4015	struct mlxsw_sp_nexthop *nh;
4016	struct fib_nh *fib_nh;
4017	int i;
4018	int err;
4019
4020	nh_grp = kzalloc(struct_size(nh_grp, nexthops, nhs), GFP_KERNEL);
4021	if (!nh_grp)
4022		return ERR_PTR(-ENOMEM);
4023	nh_grp->priv = fi;
4024	INIT_LIST_HEAD(&nh_grp->fib_list);
4025	nh_grp->neigh_tbl = &arp_tbl;
4026
4027	nh_grp->gateway = mlxsw_sp_fi_is_gateway(mlxsw_sp, fi);
4028	nh_grp->count = nhs;
4029	fib_info_hold(fi);
4030	for (i = 0; i < nh_grp->count; i++) {
4031		nh = &nh_grp->nexthops[i];
4032		fib_nh = fib_info_nh(fi, i);
4033		err = mlxsw_sp_nexthop4_init(mlxsw_sp, nh_grp, nh, fib_nh);
4034		if (err)
4035			goto err_nexthop4_init;
4036	}
4037	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
4038	if (err)
4039		goto err_nexthop_group_insert;
4040	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4041	return nh_grp;
4042
4043err_nexthop_group_insert:
4044err_nexthop4_init:
4045	for (i--; i >= 0; i--) {
4046		nh = &nh_grp->nexthops[i];
4047		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4048	}
4049	fib_info_put(fi);
4050	kfree(nh_grp);
4051	return ERR_PTR(err);
4052}
4053
4054static void
4055mlxsw_sp_nexthop4_group_destroy(struct mlxsw_sp *mlxsw_sp,
4056				struct mlxsw_sp_nexthop_group *nh_grp)
4057{
4058	struct mlxsw_sp_nexthop *nh;
4059	int i;
4060
4061	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
4062	for (i = 0; i < nh_grp->count; i++) {
4063		nh = &nh_grp->nexthops[i];
4064		mlxsw_sp_nexthop4_fini(mlxsw_sp, nh);
4065	}
4066	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
4067	WARN_ON_ONCE(nh_grp->adj_index_valid);
4068	fib_info_put(mlxsw_sp_nexthop4_group_fi(nh_grp));
4069	kfree(nh_grp);
4070}
4071
4072static int mlxsw_sp_nexthop4_group_get(struct mlxsw_sp *mlxsw_sp,
4073				       struct mlxsw_sp_fib_entry *fib_entry,
4074				       struct fib_info *fi)
4075{
4076	struct mlxsw_sp_nexthop_group *nh_grp;
4077
4078	nh_grp = mlxsw_sp_nexthop4_group_lookup(mlxsw_sp, fi);
4079	if (!nh_grp) {
4080		nh_grp = mlxsw_sp_nexthop4_group_create(mlxsw_sp, fi);
4081		if (IS_ERR(nh_grp))
4082			return PTR_ERR(nh_grp);
4083	}
4084	list_add_tail(&fib_entry->nexthop_group_node, &nh_grp->fib_list);
4085	fib_entry->nh_group = nh_grp;
4086	return 0;
4087}
4088
4089static void mlxsw_sp_nexthop4_group_put(struct mlxsw_sp *mlxsw_sp,
4090					struct mlxsw_sp_fib_entry *fib_entry)
4091{
4092	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
4093
4094	list_del(&fib_entry->nexthop_group_node);
4095	if (!list_empty(&nh_grp->fib_list))
4096		return;
4097	mlxsw_sp_nexthop4_group_destroy(mlxsw_sp, nh_grp);
4098}
4099
4100static bool
4101mlxsw_sp_fib4_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4102{
4103	struct mlxsw_sp_fib4_entry *fib4_entry;
4104
4105	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4106				  common);
4107	return !fib4_entry->tos;
4108}
4109
4110static bool
4111mlxsw_sp_fib_entry_should_offload(const struct mlxsw_sp_fib_entry *fib_entry)
4112{
4113	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4114
4115	switch (fib_entry->fib_node->fib->proto) {
4116	case MLXSW_SP_L3_PROTO_IPV4:
4117		if (!mlxsw_sp_fib4_entry_should_offload(fib_entry))
4118			return false;
4119		break;
4120	case MLXSW_SP_L3_PROTO_IPV6:
4121		break;
4122	}
4123
4124	switch (fib_entry->type) {
4125	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4126		return !!nh_group->adj_index_valid;
4127	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4128		return !!nh_group->nh_rif;
4129	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4130	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4131	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4132		return true;
4133	default:
4134		return false;
4135	}
4136}
4137
4138static struct mlxsw_sp_nexthop *
4139mlxsw_sp_rt6_nexthop(struct mlxsw_sp_nexthop_group *nh_grp,
4140		     const struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
4141{
4142	int i;
4143
4144	for (i = 0; i < nh_grp->count; i++) {
4145		struct mlxsw_sp_nexthop *nh = &nh_grp->nexthops[i];
4146		struct fib6_info *rt = mlxsw_sp_rt6->rt;
4147
4148		if (nh->rif && nh->rif->dev == rt->fib6_nh->fib_nh_dev &&
4149		    ipv6_addr_equal((const struct in6_addr *) &nh->gw_addr,
4150				    &rt->fib6_nh->fib_nh_gw6))
4151			return nh;
4152		continue;
4153	}
4154
4155	return NULL;
4156}
4157
4158static void
4159mlxsw_sp_fib4_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4160				 struct mlxsw_sp_fib_entry *fib_entry)
4161{
4162	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
4163	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4164	int dst_len = fib_entry->fib_node->key.prefix_len;
4165	struct mlxsw_sp_fib4_entry *fib4_entry;
4166	struct fib_rt_info fri;
4167	bool should_offload;
4168
4169	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4170	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4171				  common);
4172	fri.fi = fi;
4173	fri.tb_id = fib4_entry->tb_id;
4174	fri.dst = cpu_to_be32(*p_dst);
4175	fri.dst_len = dst_len;
4176	fri.tos = fib4_entry->tos;
4177	fri.type = fib4_entry->type;
4178	fri.offload = should_offload;
4179	fri.trap = !should_offload;
4180	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4181}
4182
4183static void
4184mlxsw_sp_fib4_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4185				   struct mlxsw_sp_fib_entry *fib_entry)
4186{
4187	struct fib_info *fi = mlxsw_sp_nexthop4_group_fi(fib_entry->nh_group);
4188	u32 *p_dst = (u32 *) fib_entry->fib_node->key.addr;
4189	int dst_len = fib_entry->fib_node->key.prefix_len;
4190	struct mlxsw_sp_fib4_entry *fib4_entry;
4191	struct fib_rt_info fri;
4192
4193	fib4_entry = container_of(fib_entry, struct mlxsw_sp_fib4_entry,
4194				  common);
4195	fri.fi = fi;
4196	fri.tb_id = fib4_entry->tb_id;
4197	fri.dst = cpu_to_be32(*p_dst);
4198	fri.dst_len = dst_len;
4199	fri.tos = fib4_entry->tos;
4200	fri.type = fib4_entry->type;
4201	fri.offload = false;
4202	fri.trap = false;
4203	fib_alias_hw_flags_set(mlxsw_sp_net(mlxsw_sp), &fri);
4204}
4205
4206static void
4207mlxsw_sp_fib6_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4208				 struct mlxsw_sp_fib_entry *fib_entry)
4209{
4210	struct mlxsw_sp_fib6_entry *fib6_entry;
4211	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4212	bool should_offload;
4213
4214	should_offload = mlxsw_sp_fib_entry_should_offload(fib_entry);
4215
4216	/* In IPv6 a multipath route is represented using multiple routes, so
4217	 * we need to set the flags on all of them.
4218	 */
4219	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4220				  common);
4221	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4222		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, should_offload,
4223				       !should_offload);
4224}
4225
4226static void
4227mlxsw_sp_fib6_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4228				   struct mlxsw_sp_fib_entry *fib_entry)
4229{
4230	struct mlxsw_sp_fib6_entry *fib6_entry;
4231	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
4232
4233	fib6_entry = container_of(fib_entry, struct mlxsw_sp_fib6_entry,
4234				  common);
4235	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list)
4236		fib6_info_hw_flags_set(mlxsw_sp_rt6->rt, false, false);
4237}
4238
4239static void
4240mlxsw_sp_fib_entry_hw_flags_set(struct mlxsw_sp *mlxsw_sp,
4241				struct mlxsw_sp_fib_entry *fib_entry)
4242{
4243	switch (fib_entry->fib_node->fib->proto) {
4244	case MLXSW_SP_L3_PROTO_IPV4:
4245		mlxsw_sp_fib4_entry_hw_flags_set(mlxsw_sp, fib_entry);
4246		break;
4247	case MLXSW_SP_L3_PROTO_IPV6:
4248		mlxsw_sp_fib6_entry_hw_flags_set(mlxsw_sp, fib_entry);
4249		break;
4250	}
4251}
4252
4253static void
4254mlxsw_sp_fib_entry_hw_flags_clear(struct mlxsw_sp *mlxsw_sp,
4255				  struct mlxsw_sp_fib_entry *fib_entry)
4256{
4257	switch (fib_entry->fib_node->fib->proto) {
4258	case MLXSW_SP_L3_PROTO_IPV4:
4259		mlxsw_sp_fib4_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4260		break;
4261	case MLXSW_SP_L3_PROTO_IPV6:
4262		mlxsw_sp_fib6_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4263		break;
4264	}
4265}
4266
4267static void
4268mlxsw_sp_fib_entry_hw_flags_refresh(struct mlxsw_sp *mlxsw_sp,
4269				    struct mlxsw_sp_fib_entry *fib_entry,
4270				    enum mlxsw_reg_ralue_op op)
4271{
4272	switch (op) {
4273	case MLXSW_REG_RALUE_OP_WRITE_WRITE:
4274		mlxsw_sp_fib_entry_hw_flags_set(mlxsw_sp, fib_entry);
4275		break;
4276	case MLXSW_REG_RALUE_OP_WRITE_DELETE:
4277		mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, fib_entry);
4278		break;
4279	default:
4280		break;
4281	}
4282}
4283
4284static void
4285mlxsw_sp_fib_entry_ralue_pack(char *ralue_pl,
4286			      const struct mlxsw_sp_fib_entry *fib_entry,
4287			      enum mlxsw_reg_ralue_op op)
4288{
4289	struct mlxsw_sp_fib *fib = fib_entry->fib_node->fib;
4290	enum mlxsw_reg_ralxx_protocol proto;
4291	u32 *p_dip;
4292
4293	proto = (enum mlxsw_reg_ralxx_protocol) fib->proto;
4294
4295	switch (fib->proto) {
4296	case MLXSW_SP_L3_PROTO_IPV4:
4297		p_dip = (u32 *) fib_entry->fib_node->key.addr;
4298		mlxsw_reg_ralue_pack4(ralue_pl, proto, op, fib->vr->id,
4299				      fib_entry->fib_node->key.prefix_len,
4300				      *p_dip);
4301		break;
4302	case MLXSW_SP_L3_PROTO_IPV6:
4303		mlxsw_reg_ralue_pack6(ralue_pl, proto, op, fib->vr->id,
4304				      fib_entry->fib_node->key.prefix_len,
4305				      fib_entry->fib_node->key.addr);
4306		break;
4307	}
4308}
4309
4310static int mlxsw_sp_adj_discard_write(struct mlxsw_sp *mlxsw_sp, u16 rif_index)
4311{
4312	enum mlxsw_reg_ratr_trap_action trap_action;
4313	char ratr_pl[MLXSW_REG_RATR_LEN];
4314	int err;
4315
4316	if (mlxsw_sp->router->adj_discard_index_valid)
4317		return 0;
4318
4319	err = mlxsw_sp_kvdl_alloc(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4320				  &mlxsw_sp->router->adj_discard_index);
4321	if (err)
4322		return err;
4323
4324	trap_action = MLXSW_REG_RATR_TRAP_ACTION_DISCARD_ERRORS;
4325	mlxsw_reg_ratr_pack(ratr_pl, MLXSW_REG_RATR_OP_WRITE_WRITE_ENTRY, true,
4326			    MLXSW_REG_RATR_TYPE_ETHERNET,
4327			    mlxsw_sp->router->adj_discard_index, rif_index);
4328	mlxsw_reg_ratr_trap_action_set(ratr_pl, trap_action);
4329	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ratr), ratr_pl);
4330	if (err)
4331		goto err_ratr_write;
4332
4333	mlxsw_sp->router->adj_discard_index_valid = true;
4334
4335	return 0;
4336
4337err_ratr_write:
4338	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
4339			   mlxsw_sp->router->adj_discard_index);
4340	return err;
4341}
4342
4343static int mlxsw_sp_fib_entry_op_remote(struct mlxsw_sp *mlxsw_sp,
4344					struct mlxsw_sp_fib_entry *fib_entry,
4345					enum mlxsw_reg_ralue_op op)
4346{
4347	struct mlxsw_sp_nexthop_group *nh_group = fib_entry->nh_group;
4348	char ralue_pl[MLXSW_REG_RALUE_LEN];
4349	enum mlxsw_reg_ralue_trap_action trap_action;
4350	u16 trap_id = 0;
4351	u32 adjacency_index = 0;
4352	u16 ecmp_size = 0;
4353	int err;
4354
4355	/* In case the nexthop group adjacency index is valid, use it
4356	 * with provided ECMP size. Otherwise, setup trap and pass
4357	 * traffic to kernel.
4358	 */
4359	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4360		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4361		adjacency_index = fib_entry->nh_group->adj_index;
4362		ecmp_size = fib_entry->nh_group->ecmp_size;
4363	} else if (!nh_group->adj_index_valid && nh_group->count &&
4364		   nh_group->nh_rif) {
4365		err = mlxsw_sp_adj_discard_write(mlxsw_sp,
4366						 nh_group->nh_rif->rif_index);
4367		if (err)
4368			return err;
4369		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4370		adjacency_index = mlxsw_sp->router->adj_discard_index;
4371		ecmp_size = 1;
4372	} else {
4373		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4374		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4375	}
4376
4377	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4378	mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id,
4379					adjacency_index, ecmp_size);
4380	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4381}
4382
4383static int mlxsw_sp_fib_entry_op_local(struct mlxsw_sp *mlxsw_sp,
4384				       struct mlxsw_sp_fib_entry *fib_entry,
4385				       enum mlxsw_reg_ralue_op op)
4386{
4387	struct mlxsw_sp_rif *rif = fib_entry->nh_group->nh_rif;
4388	enum mlxsw_reg_ralue_trap_action trap_action;
4389	char ralue_pl[MLXSW_REG_RALUE_LEN];
4390	u16 trap_id = 0;
4391	u16 rif_index = 0;
4392
4393	if (mlxsw_sp_fib_entry_should_offload(fib_entry)) {
4394		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_NOP;
4395		rif_index = rif->rif_index;
4396	} else {
4397		trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4398		trap_id = MLXSW_TRAP_ID_RTR_INGRESS0;
4399	}
4400
4401	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4402	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id,
4403				       rif_index);
4404	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4405}
4406
4407static int mlxsw_sp_fib_entry_op_trap(struct mlxsw_sp *mlxsw_sp,
4408				      struct mlxsw_sp_fib_entry *fib_entry,
4409				      enum mlxsw_reg_ralue_op op)
4410{
4411	char ralue_pl[MLXSW_REG_RALUE_LEN];
4412
4413	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4414	mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
4415	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4416}
4417
4418static int mlxsw_sp_fib_entry_op_blackhole(struct mlxsw_sp *mlxsw_sp,
4419					   struct mlxsw_sp_fib_entry *fib_entry,
4420					   enum mlxsw_reg_ralue_op op)
4421{
4422	enum mlxsw_reg_ralue_trap_action trap_action;
4423	char ralue_pl[MLXSW_REG_RALUE_LEN];
4424
4425	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_DISCARD_ERROR;
4426	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4427	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, 0, 0);
4428	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4429}
4430
4431static int
4432mlxsw_sp_fib_entry_op_unreachable(struct mlxsw_sp *mlxsw_sp,
4433				  struct mlxsw_sp_fib_entry *fib_entry,
4434				  enum mlxsw_reg_ralue_op op)
4435{
4436	enum mlxsw_reg_ralue_trap_action trap_action;
4437	char ralue_pl[MLXSW_REG_RALUE_LEN];
4438	u16 trap_id;
4439
4440	trap_action = MLXSW_REG_RALUE_TRAP_ACTION_TRAP;
4441	trap_id = MLXSW_TRAP_ID_RTR_INGRESS1;
4442
4443	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4444	mlxsw_reg_ralue_act_local_pack(ralue_pl, trap_action, trap_id, 0);
4445	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4446}
4447
4448static int
4449mlxsw_sp_fib_entry_op_ipip_decap(struct mlxsw_sp *mlxsw_sp,
4450				 struct mlxsw_sp_fib_entry *fib_entry,
4451				 enum mlxsw_reg_ralue_op op)
4452{
4453	struct mlxsw_sp_ipip_entry *ipip_entry = fib_entry->decap.ipip_entry;
4454	const struct mlxsw_sp_ipip_ops *ipip_ops;
4455
4456	if (WARN_ON(!ipip_entry))
4457		return -EINVAL;
4458
4459	ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
4460	return ipip_ops->fib_entry_op(mlxsw_sp, ipip_entry, op,
4461				      fib_entry->decap.tunnel_index);
4462}
4463
4464static int mlxsw_sp_fib_entry_op_nve_decap(struct mlxsw_sp *mlxsw_sp,
4465					   struct mlxsw_sp_fib_entry *fib_entry,
4466					   enum mlxsw_reg_ralue_op op)
4467{
4468	char ralue_pl[MLXSW_REG_RALUE_LEN];
4469
4470	mlxsw_sp_fib_entry_ralue_pack(ralue_pl, fib_entry, op);
4471	mlxsw_reg_ralue_act_ip2me_tun_pack(ralue_pl,
4472					   fib_entry->decap.tunnel_index);
4473	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl);
4474}
4475
4476static int __mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4477				   struct mlxsw_sp_fib_entry *fib_entry,
4478				   enum mlxsw_reg_ralue_op op)
4479{
4480	switch (fib_entry->type) {
4481	case MLXSW_SP_FIB_ENTRY_TYPE_REMOTE:
4482		return mlxsw_sp_fib_entry_op_remote(mlxsw_sp, fib_entry, op);
4483	case MLXSW_SP_FIB_ENTRY_TYPE_LOCAL:
4484		return mlxsw_sp_fib_entry_op_local(mlxsw_sp, fib_entry, op);
4485	case MLXSW_SP_FIB_ENTRY_TYPE_TRAP:
4486		return mlxsw_sp_fib_entry_op_trap(mlxsw_sp, fib_entry, op);
4487	case MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE:
4488		return mlxsw_sp_fib_entry_op_blackhole(mlxsw_sp, fib_entry, op);
4489	case MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE:
4490		return mlxsw_sp_fib_entry_op_unreachable(mlxsw_sp, fib_entry,
4491							 op);
4492	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4493		return mlxsw_sp_fib_entry_op_ipip_decap(mlxsw_sp,
4494							fib_entry, op);
4495	case MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP:
4496		return mlxsw_sp_fib_entry_op_nve_decap(mlxsw_sp, fib_entry, op);
4497	}
4498	return -EINVAL;
4499}
4500
4501static int mlxsw_sp_fib_entry_op(struct mlxsw_sp *mlxsw_sp,
4502				 struct mlxsw_sp_fib_entry *fib_entry,
4503				 enum mlxsw_reg_ralue_op op)
4504{
4505	int err = __mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry, op);
4506
4507	if (err)
4508		return err;
4509
4510	mlxsw_sp_fib_entry_hw_flags_refresh(mlxsw_sp, fib_entry, op);
4511
4512	return err;
4513}
4514
4515static int mlxsw_sp_fib_entry_update(struct mlxsw_sp *mlxsw_sp,
4516				     struct mlxsw_sp_fib_entry *fib_entry)
4517{
4518	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4519				     MLXSW_REG_RALUE_OP_WRITE_WRITE);
4520}
4521
4522static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp,
4523				  struct mlxsw_sp_fib_entry *fib_entry)
4524{
4525	return mlxsw_sp_fib_entry_op(mlxsw_sp, fib_entry,
4526				     MLXSW_REG_RALUE_OP_WRITE_DELETE);
4527}
4528
4529static int
4530mlxsw_sp_fib4_entry_type_set(struct mlxsw_sp *mlxsw_sp,
4531			     const struct fib_entry_notifier_info *fen_info,
4532			     struct mlxsw_sp_fib_entry *fib_entry)
4533{
4534	struct net_device *dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
4535	union mlxsw_sp_l3addr dip = { .addr4 = htonl(fen_info->dst) };
4536	struct mlxsw_sp_router *router = mlxsw_sp->router;
4537	u32 tb_id = mlxsw_sp_fix_tb_id(fen_info->tb_id);
4538	struct mlxsw_sp_ipip_entry *ipip_entry;
4539	struct fib_info *fi = fen_info->fi;
4540
4541	switch (fen_info->type) {
4542	case RTN_LOCAL:
4543		ipip_entry = mlxsw_sp_ipip_entry_find_by_decap(mlxsw_sp, dev,
4544						 MLXSW_SP_L3_PROTO_IPV4, dip);
4545		if (ipip_entry && ipip_entry->ol_dev->flags & IFF_UP) {
4546			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP;
4547			return mlxsw_sp_fib_entry_decap_init(mlxsw_sp,
4548							     fib_entry,
4549							     ipip_entry);
4550		}
4551		if (mlxsw_sp_router_nve_is_decap(mlxsw_sp, tb_id,
4552						 MLXSW_SP_L3_PROTO_IPV4,
4553						 &dip)) {
4554			u32 tunnel_index;
4555
4556			tunnel_index = router->nve_decap_config.tunnel_index;
4557			fib_entry->decap.tunnel_index = tunnel_index;
4558			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_NVE_DECAP;
4559			return 0;
4560		}
4561		fallthrough;
4562	case RTN_BROADCAST:
4563		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
4564		return 0;
4565	case RTN_BLACKHOLE:
4566		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
4567		return 0;
4568	case RTN_UNREACHABLE:
4569	case RTN_PROHIBIT:
4570		/* Packets hitting these routes need to be trapped, but
4571		 * can do so with a lower priority than packets directed
4572		 * at the host, so use action type local instead of trap.
4573		 */
4574		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
4575		return 0;
4576	case RTN_UNICAST:
4577		if (mlxsw_sp_fi_is_gateway(mlxsw_sp, fi))
4578			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
4579		else
4580			fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
4581		return 0;
4582	default:
4583		return -EINVAL;
4584	}
4585}
4586
4587static void
4588mlxsw_sp_fib4_entry_type_unset(struct mlxsw_sp *mlxsw_sp,
4589			       struct mlxsw_sp_fib_entry *fib_entry)
4590{
4591	switch (fib_entry->type) {
4592	case MLXSW_SP_FIB_ENTRY_TYPE_IPIP_DECAP:
4593		mlxsw_sp_fib_entry_decap_fini(mlxsw_sp, fib_entry);
4594		break;
4595	default:
4596		break;
4597	}
4598}
4599
4600static struct mlxsw_sp_fib4_entry *
4601mlxsw_sp_fib4_entry_create(struct mlxsw_sp *mlxsw_sp,
4602			   struct mlxsw_sp_fib_node *fib_node,
4603			   const struct fib_entry_notifier_info *fen_info)
4604{
4605	struct mlxsw_sp_fib4_entry *fib4_entry;
4606	struct mlxsw_sp_fib_entry *fib_entry;
4607	int err;
4608
4609	fib4_entry = kzalloc(sizeof(*fib4_entry), GFP_KERNEL);
4610	if (!fib4_entry)
4611		return ERR_PTR(-ENOMEM);
4612	fib_entry = &fib4_entry->common;
4613
4614	err = mlxsw_sp_fib4_entry_type_set(mlxsw_sp, fen_info, fib_entry);
4615	if (err)
4616		goto err_fib4_entry_type_set;
4617
4618	err = mlxsw_sp_nexthop4_group_get(mlxsw_sp, fib_entry, fen_info->fi);
4619	if (err)
4620		goto err_nexthop4_group_get;
4621
4622	fib4_entry->prio = fen_info->fi->fib_priority;
4623	fib4_entry->tb_id = fen_info->tb_id;
4624	fib4_entry->type = fen_info->type;
4625	fib4_entry->tos = fen_info->tos;
4626
4627	fib_entry->fib_node = fib_node;
4628
4629	return fib4_entry;
4630
4631err_nexthop4_group_get:
4632	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, fib_entry);
4633err_fib4_entry_type_set:
4634	kfree(fib4_entry);
4635	return ERR_PTR(err);
4636}
4637
4638static void mlxsw_sp_fib4_entry_destroy(struct mlxsw_sp *mlxsw_sp,
4639					struct mlxsw_sp_fib4_entry *fib4_entry)
4640{
4641	mlxsw_sp_nexthop4_group_put(mlxsw_sp, &fib4_entry->common);
4642	mlxsw_sp_fib4_entry_type_unset(mlxsw_sp, &fib4_entry->common);
4643	kfree(fib4_entry);
4644}
4645
4646static struct mlxsw_sp_fib4_entry *
4647mlxsw_sp_fib4_entry_lookup(struct mlxsw_sp *mlxsw_sp,
4648			   const struct fib_entry_notifier_info *fen_info)
4649{
4650	struct mlxsw_sp_fib4_entry *fib4_entry;
4651	struct mlxsw_sp_fib_node *fib_node;
4652	struct mlxsw_sp_fib *fib;
4653	struct mlxsw_sp_vr *vr;
4654
4655	vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id);
4656	if (!vr)
4657		return NULL;
4658	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV4);
4659
4660	fib_node = mlxsw_sp_fib_node_lookup(fib, &fen_info->dst,
4661					    sizeof(fen_info->dst),
4662					    fen_info->dst_len);
4663	if (!fib_node)
4664		return NULL;
4665
4666	fib4_entry = container_of(fib_node->fib_entry,
4667				  struct mlxsw_sp_fib4_entry, common);
4668	if (fib4_entry->tb_id == fen_info->tb_id &&
4669	    fib4_entry->tos == fen_info->tos &&
4670	    fib4_entry->type == fen_info->type &&
4671	    mlxsw_sp_nexthop4_group_fi(fib4_entry->common.nh_group) ==
4672	    fen_info->fi)
4673		return fib4_entry;
4674
4675	return NULL;
4676}
4677
4678static const struct rhashtable_params mlxsw_sp_fib_ht_params = {
4679	.key_offset = offsetof(struct mlxsw_sp_fib_node, key),
4680	.head_offset = offsetof(struct mlxsw_sp_fib_node, ht_node),
4681	.key_len = sizeof(struct mlxsw_sp_fib_key),
4682	.automatic_shrinking = true,
4683};
4684
4685static int mlxsw_sp_fib_node_insert(struct mlxsw_sp_fib *fib,
4686				    struct mlxsw_sp_fib_node *fib_node)
4687{
4688	return rhashtable_insert_fast(&fib->ht, &fib_node->ht_node,
4689				      mlxsw_sp_fib_ht_params);
4690}
4691
4692static void mlxsw_sp_fib_node_remove(struct mlxsw_sp_fib *fib,
4693				     struct mlxsw_sp_fib_node *fib_node)
4694{
4695	rhashtable_remove_fast(&fib->ht, &fib_node->ht_node,
4696			       mlxsw_sp_fib_ht_params);
4697}
4698
4699static struct mlxsw_sp_fib_node *
4700mlxsw_sp_fib_node_lookup(struct mlxsw_sp_fib *fib, const void *addr,
4701			 size_t addr_len, unsigned char prefix_len)
4702{
4703	struct mlxsw_sp_fib_key key;
4704
4705	memset(&key, 0, sizeof(key));
4706	memcpy(key.addr, addr, addr_len);
4707	key.prefix_len = prefix_len;
4708	return rhashtable_lookup_fast(&fib->ht, &key, mlxsw_sp_fib_ht_params);
4709}
4710
4711static struct mlxsw_sp_fib_node *
4712mlxsw_sp_fib_node_create(struct mlxsw_sp_fib *fib, const void *addr,
4713			 size_t addr_len, unsigned char prefix_len)
4714{
4715	struct mlxsw_sp_fib_node *fib_node;
4716
4717	fib_node = kzalloc(sizeof(*fib_node), GFP_KERNEL);
4718	if (!fib_node)
4719		return NULL;
4720
4721	list_add(&fib_node->list, &fib->node_list);
4722	memcpy(fib_node->key.addr, addr, addr_len);
4723	fib_node->key.prefix_len = prefix_len;
4724
4725	return fib_node;
4726}
4727
4728static void mlxsw_sp_fib_node_destroy(struct mlxsw_sp_fib_node *fib_node)
4729{
4730	list_del(&fib_node->list);
4731	kfree(fib_node);
4732}
4733
4734static int mlxsw_sp_fib_lpm_tree_link(struct mlxsw_sp *mlxsw_sp,
4735				      struct mlxsw_sp_fib_node *fib_node)
4736{
4737	struct mlxsw_sp_prefix_usage req_prefix_usage;
4738	struct mlxsw_sp_fib *fib = fib_node->fib;
4739	struct mlxsw_sp_lpm_tree *lpm_tree;
4740	int err;
4741
4742	lpm_tree = mlxsw_sp->router->lpm.proto_trees[fib->proto];
4743	if (lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4744		goto out;
4745
4746	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4747	mlxsw_sp_prefix_usage_set(&req_prefix_usage, fib_node->key.prefix_len);
4748	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4749					 fib->proto);
4750	if (IS_ERR(lpm_tree))
4751		return PTR_ERR(lpm_tree);
4752
4753	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4754	if (err)
4755		goto err_lpm_tree_replace;
4756
4757out:
4758	lpm_tree->prefix_ref_count[fib_node->key.prefix_len]++;
4759	return 0;
4760
4761err_lpm_tree_replace:
4762	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4763	return err;
4764}
4765
4766static void mlxsw_sp_fib_lpm_tree_unlink(struct mlxsw_sp *mlxsw_sp,
4767					 struct mlxsw_sp_fib_node *fib_node)
4768{
4769	struct mlxsw_sp_lpm_tree *lpm_tree = fib_node->fib->lpm_tree;
4770	struct mlxsw_sp_prefix_usage req_prefix_usage;
4771	struct mlxsw_sp_fib *fib = fib_node->fib;
4772	int err;
4773
4774	if (--lpm_tree->prefix_ref_count[fib_node->key.prefix_len] != 0)
4775		return;
4776	/* Try to construct a new LPM tree from the current prefix usage
4777	 * minus the unused one. If we fail, continue using the old one.
4778	 */
4779	mlxsw_sp_prefix_usage_cpy(&req_prefix_usage, &lpm_tree->prefix_usage);
4780	mlxsw_sp_prefix_usage_clear(&req_prefix_usage,
4781				    fib_node->key.prefix_len);
4782	lpm_tree = mlxsw_sp_lpm_tree_get(mlxsw_sp, &req_prefix_usage,
4783					 fib->proto);
4784	if (IS_ERR(lpm_tree))
4785		return;
4786
4787	err = mlxsw_sp_vrs_lpm_tree_replace(mlxsw_sp, fib, lpm_tree);
4788	if (err)
4789		goto err_lpm_tree_replace;
4790
4791	return;
4792
4793err_lpm_tree_replace:
4794	mlxsw_sp_lpm_tree_put(mlxsw_sp, lpm_tree);
4795}
4796
4797static int mlxsw_sp_fib_node_init(struct mlxsw_sp *mlxsw_sp,
4798				  struct mlxsw_sp_fib_node *fib_node,
4799				  struct mlxsw_sp_fib *fib)
4800{
4801	int err;
4802
4803	err = mlxsw_sp_fib_node_insert(fib, fib_node);
4804	if (err)
4805		return err;
4806	fib_node->fib = fib;
4807
4808	err = mlxsw_sp_fib_lpm_tree_link(mlxsw_sp, fib_node);
4809	if (err)
4810		goto err_fib_lpm_tree_link;
4811
4812	return 0;
4813
4814err_fib_lpm_tree_link:
4815	fib_node->fib = NULL;
4816	mlxsw_sp_fib_node_remove(fib, fib_node);
4817	return err;
4818}
4819
4820static void mlxsw_sp_fib_node_fini(struct mlxsw_sp *mlxsw_sp,
4821				   struct mlxsw_sp_fib_node *fib_node)
4822{
4823	struct mlxsw_sp_fib *fib = fib_node->fib;
4824
4825	mlxsw_sp_fib_lpm_tree_unlink(mlxsw_sp, fib_node);
4826	fib_node->fib = NULL;
4827	mlxsw_sp_fib_node_remove(fib, fib_node);
4828}
4829
4830static struct mlxsw_sp_fib_node *
4831mlxsw_sp_fib_node_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id, const void *addr,
4832		      size_t addr_len, unsigned char prefix_len,
4833		      enum mlxsw_sp_l3proto proto)
4834{
4835	struct mlxsw_sp_fib_node *fib_node;
4836	struct mlxsw_sp_fib *fib;
4837	struct mlxsw_sp_vr *vr;
4838	int err;
4839
4840	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, NULL);
4841	if (IS_ERR(vr))
4842		return ERR_CAST(vr);
4843	fib = mlxsw_sp_vr_fib(vr, proto);
4844
4845	fib_node = mlxsw_sp_fib_node_lookup(fib, addr, addr_len, prefix_len);
4846	if (fib_node)
4847		return fib_node;
4848
4849	fib_node = mlxsw_sp_fib_node_create(fib, addr, addr_len, prefix_len);
4850	if (!fib_node) {
4851		err = -ENOMEM;
4852		goto err_fib_node_create;
4853	}
4854
4855	err = mlxsw_sp_fib_node_init(mlxsw_sp, fib_node, fib);
4856	if (err)
4857		goto err_fib_node_init;
4858
4859	return fib_node;
4860
4861err_fib_node_init:
4862	mlxsw_sp_fib_node_destroy(fib_node);
4863err_fib_node_create:
4864	mlxsw_sp_vr_put(mlxsw_sp, vr);
4865	return ERR_PTR(err);
4866}
4867
4868static void mlxsw_sp_fib_node_put(struct mlxsw_sp *mlxsw_sp,
4869				  struct mlxsw_sp_fib_node *fib_node)
4870{
4871	struct mlxsw_sp_vr *vr = fib_node->fib->vr;
4872
4873	if (fib_node->fib_entry)
4874		return;
4875	mlxsw_sp_fib_node_fini(mlxsw_sp, fib_node);
4876	mlxsw_sp_fib_node_destroy(fib_node);
4877	mlxsw_sp_vr_put(mlxsw_sp, vr);
4878}
4879
4880static int mlxsw_sp_fib_node_entry_link(struct mlxsw_sp *mlxsw_sp,
4881					struct mlxsw_sp_fib_entry *fib_entry)
4882{
4883	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4884	int err;
4885
4886	fib_node->fib_entry = fib_entry;
4887
4888	err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry);
4889	if (err)
4890		goto err_fib_entry_update;
4891
4892	return 0;
4893
4894err_fib_entry_update:
4895	fib_node->fib_entry = NULL;
4896	return err;
4897}
4898
4899static void
4900mlxsw_sp_fib_node_entry_unlink(struct mlxsw_sp *mlxsw_sp,
4901			       struct mlxsw_sp_fib_entry *fib_entry)
4902{
4903	struct mlxsw_sp_fib_node *fib_node = fib_entry->fib_node;
4904
4905	mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry);
4906	fib_node->fib_entry = NULL;
4907}
4908
4909static bool mlxsw_sp_fib4_allow_replace(struct mlxsw_sp_fib4_entry *fib4_entry)
4910{
4911	struct mlxsw_sp_fib_node *fib_node = fib4_entry->common.fib_node;
4912	struct mlxsw_sp_fib4_entry *fib4_replaced;
4913
4914	if (!fib_node->fib_entry)
4915		return true;
4916
4917	fib4_replaced = container_of(fib_node->fib_entry,
4918				     struct mlxsw_sp_fib4_entry, common);
4919	if (fib4_entry->tb_id == RT_TABLE_MAIN &&
4920	    fib4_replaced->tb_id == RT_TABLE_LOCAL)
4921		return false;
4922
4923	return true;
4924}
4925
4926static int
4927mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
4928			     const struct fib_entry_notifier_info *fen_info)
4929{
4930	struct mlxsw_sp_fib4_entry *fib4_entry, *fib4_replaced;
4931	struct mlxsw_sp_fib_entry *replaced;
4932	struct mlxsw_sp_fib_node *fib_node;
4933	int err;
4934
4935	if (mlxsw_sp->router->aborted)
4936		return 0;
4937
4938	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
4939					 &fen_info->dst, sizeof(fen_info->dst),
4940					 fen_info->dst_len,
4941					 MLXSW_SP_L3_PROTO_IPV4);
4942	if (IS_ERR(fib_node)) {
4943		dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB node\n");
4944		return PTR_ERR(fib_node);
4945	}
4946
4947	fib4_entry = mlxsw_sp_fib4_entry_create(mlxsw_sp, fib_node, fen_info);
4948	if (IS_ERR(fib4_entry)) {
4949		dev_warn(mlxsw_sp->bus_info->dev, "Failed to create FIB entry\n");
4950		err = PTR_ERR(fib4_entry);
4951		goto err_fib4_entry_create;
4952	}
4953
4954	if (!mlxsw_sp_fib4_allow_replace(fib4_entry)) {
4955		mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4956		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4957		return 0;
4958	}
4959
4960	replaced = fib_node->fib_entry;
4961	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib4_entry->common);
4962	if (err) {
4963		dev_warn(mlxsw_sp->bus_info->dev, "Failed to link FIB entry to node\n");
4964		goto err_fib_node_entry_link;
4965	}
4966
4967	/* Nothing to replace */
4968	if (!replaced)
4969		return 0;
4970
4971	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
4972	fib4_replaced = container_of(replaced, struct mlxsw_sp_fib4_entry,
4973				     common);
4974	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_replaced);
4975
4976	return 0;
4977
4978err_fib_node_entry_link:
4979	fib_node->fib_entry = replaced;
4980	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
4981err_fib4_entry_create:
4982	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
4983	return err;
4984}
4985
4986static void mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp,
4987				     struct fib_entry_notifier_info *fen_info)
4988{
4989	struct mlxsw_sp_fib4_entry *fib4_entry;
4990	struct mlxsw_sp_fib_node *fib_node;
4991
4992	if (mlxsw_sp->router->aborted)
4993		return;
4994
4995	fib4_entry = mlxsw_sp_fib4_entry_lookup(mlxsw_sp, fen_info);
4996	if (!fib4_entry)
4997		return;
4998	fib_node = fib4_entry->common.fib_node;
4999
5000	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib4_entry->common);
5001	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5002	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5003}
5004
5005static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
5006{
5007	/* Multicast routes aren't supported, so ignore them. Neighbour
5008	 * Discovery packets are specifically trapped.
5009	 */
5010	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_MULTICAST)
5011		return true;
5012
5013	/* Cloned routes are irrelevant in the forwarding path. */
5014	if (rt->fib6_flags & RTF_CACHE)
5015		return true;
5016
5017	return false;
5018}
5019
5020static struct mlxsw_sp_rt6 *mlxsw_sp_rt6_create(struct fib6_info *rt)
5021{
5022	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5023
5024	mlxsw_sp_rt6 = kzalloc(sizeof(*mlxsw_sp_rt6), GFP_KERNEL);
5025	if (!mlxsw_sp_rt6)
5026		return ERR_PTR(-ENOMEM);
5027
5028	/* In case of route replace, replaced route is deleted with
5029	 * no notification. Take reference to prevent accessing freed
5030	 * memory.
5031	 */
5032	mlxsw_sp_rt6->rt = rt;
5033	fib6_info_hold(rt);
5034
5035	return mlxsw_sp_rt6;
5036}
5037
5038#if IS_ENABLED(CONFIG_IPV6)
5039static void mlxsw_sp_rt6_release(struct fib6_info *rt)
5040{
5041	fib6_info_release(rt);
5042}
5043#else
5044static void mlxsw_sp_rt6_release(struct fib6_info *rt)
5045{
5046}
5047#endif
5048
5049static void mlxsw_sp_rt6_destroy(struct mlxsw_sp_rt6 *mlxsw_sp_rt6)
5050{
5051	struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
5052
5053	fib6_nh->fib_nh_flags &= ~RTNH_F_OFFLOAD;
5054	mlxsw_sp_rt6_release(mlxsw_sp_rt6->rt);
5055	kfree(mlxsw_sp_rt6);
5056}
5057
5058static struct fib6_info *
5059mlxsw_sp_fib6_entry_rt(const struct mlxsw_sp_fib6_entry *fib6_entry)
5060{
5061	return list_first_entry(&fib6_entry->rt6_list, struct mlxsw_sp_rt6,
5062				list)->rt;
5063}
5064
5065static struct mlxsw_sp_rt6 *
5066mlxsw_sp_fib6_entry_rt_find(const struct mlxsw_sp_fib6_entry *fib6_entry,
5067			    const struct fib6_info *rt)
5068{
5069	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5070
5071	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
5072		if (mlxsw_sp_rt6->rt == rt)
5073			return mlxsw_sp_rt6;
5074	}
5075
5076	return NULL;
5077}
5078
5079static bool mlxsw_sp_nexthop6_ipip_type(const struct mlxsw_sp *mlxsw_sp,
5080					const struct fib6_info *rt,
5081					enum mlxsw_sp_ipip_type *ret)
5082{
5083	return rt->fib6_nh->fib_nh_dev &&
5084	       mlxsw_sp_netdev_ipip_type(mlxsw_sp, rt->fib6_nh->fib_nh_dev, ret);
5085}
5086
5087static int mlxsw_sp_nexthop6_type_init(struct mlxsw_sp *mlxsw_sp,
5088				       struct mlxsw_sp_nexthop_group *nh_grp,
5089				       struct mlxsw_sp_nexthop *nh,
5090				       const struct fib6_info *rt)
5091{
5092	const struct mlxsw_sp_ipip_ops *ipip_ops;
5093	struct mlxsw_sp_ipip_entry *ipip_entry;
5094	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5095	struct mlxsw_sp_rif *rif;
5096	int err;
5097
5098	ipip_entry = mlxsw_sp_ipip_entry_find_by_ol_dev(mlxsw_sp, dev);
5099	if (ipip_entry) {
5100		ipip_ops = mlxsw_sp->router->ipip_ops_arr[ipip_entry->ipipt];
5101		if (ipip_ops->can_offload(mlxsw_sp, dev,
5102					  MLXSW_SP_L3_PROTO_IPV6)) {
5103			nh->type = MLXSW_SP_NEXTHOP_TYPE_IPIP;
5104			mlxsw_sp_nexthop_ipip_init(mlxsw_sp, nh, ipip_entry);
5105			return 0;
5106		}
5107	}
5108
5109	nh->type = MLXSW_SP_NEXTHOP_TYPE_ETH;
5110	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
5111	if (!rif)
5112		return 0;
5113	mlxsw_sp_nexthop_rif_init(nh, rif);
5114
5115	err = mlxsw_sp_nexthop_neigh_init(mlxsw_sp, nh);
5116	if (err)
5117		goto err_nexthop_neigh_init;
5118
5119	return 0;
5120
5121err_nexthop_neigh_init:
5122	mlxsw_sp_nexthop_rif_fini(nh);
5123	return err;
5124}
5125
5126static void mlxsw_sp_nexthop6_type_fini(struct mlxsw_sp *mlxsw_sp,
5127					struct mlxsw_sp_nexthop *nh)
5128{
5129	mlxsw_sp_nexthop_type_fini(mlxsw_sp, nh);
5130}
5131
5132static int mlxsw_sp_nexthop6_init(struct mlxsw_sp *mlxsw_sp,
5133				  struct mlxsw_sp_nexthop_group *nh_grp,
5134				  struct mlxsw_sp_nexthop *nh,
5135				  const struct fib6_info *rt)
5136{
5137	struct net_device *dev = rt->fib6_nh->fib_nh_dev;
5138
5139	nh->nh_grp = nh_grp;
5140	nh->nh_weight = rt->fib6_nh->fib_nh_weight;
5141	memcpy(&nh->gw_addr, &rt->fib6_nh->fib_nh_gw6, sizeof(nh->gw_addr));
5142	mlxsw_sp_nexthop_counter_alloc(mlxsw_sp, nh);
5143
5144	list_add_tail(&nh->router_list_node, &mlxsw_sp->router->nexthop_list);
5145
5146	if (!dev)
5147		return 0;
5148	nh->ifindex = dev->ifindex;
5149
5150	return mlxsw_sp_nexthop6_type_init(mlxsw_sp, nh_grp, nh, rt);
5151}
5152
5153static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
5154				   struct mlxsw_sp_nexthop *nh)
5155{
5156	mlxsw_sp_nexthop6_type_fini(mlxsw_sp, nh);
5157	list_del(&nh->router_list_node);
5158	mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
5159}
5160
5161static bool mlxsw_sp_rt6_is_gateway(const struct mlxsw_sp *mlxsw_sp,
5162				    const struct fib6_info *rt)
5163{
5164	return rt->fib6_nh->fib_nh_gw_family ||
5165	       mlxsw_sp_nexthop6_ipip_type(mlxsw_sp, rt, NULL);
5166}
5167
5168static struct mlxsw_sp_nexthop_group *
5169mlxsw_sp_nexthop6_group_create(struct mlxsw_sp *mlxsw_sp,
5170			       struct mlxsw_sp_fib6_entry *fib6_entry)
5171{
5172	struct mlxsw_sp_nexthop_group *nh_grp;
5173	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5174	struct mlxsw_sp_nexthop *nh;
5175	int i = 0;
5176	int err;
5177
5178	nh_grp = kzalloc(struct_size(nh_grp, nexthops, fib6_entry->nrt6),
5179			 GFP_KERNEL);
5180	if (!nh_grp)
5181		return ERR_PTR(-ENOMEM);
5182	INIT_LIST_HEAD(&nh_grp->fib_list);
5183#if IS_ENABLED(CONFIG_IPV6)
5184	nh_grp->neigh_tbl = &nd_tbl;
5185#endif
5186	mlxsw_sp_rt6 = list_first_entry(&fib6_entry->rt6_list,
5187					struct mlxsw_sp_rt6, list);
5188	nh_grp->gateway = mlxsw_sp_rt6_is_gateway(mlxsw_sp, mlxsw_sp_rt6->rt);
5189	nh_grp->count = fib6_entry->nrt6;
5190	for (i = 0; i < nh_grp->count; i++) {
5191		struct fib6_info *rt = mlxsw_sp_rt6->rt;
5192
5193		nh = &nh_grp->nexthops[i];
5194		err = mlxsw_sp_nexthop6_init(mlxsw_sp, nh_grp, nh, rt);
5195		if (err)
5196			goto err_nexthop6_init;
5197		mlxsw_sp_rt6 = list_next_entry(mlxsw_sp_rt6, list);
5198	}
5199
5200	err = mlxsw_sp_nexthop_group_insert(mlxsw_sp, nh_grp);
5201	if (err)
5202		goto err_nexthop_group_insert;
5203
5204	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5205	return nh_grp;
5206
5207err_nexthop_group_insert:
5208err_nexthop6_init:
5209	for (i--; i >= 0; i--) {
5210		nh = &nh_grp->nexthops[i];
5211		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5212	}
5213	kfree(nh_grp);
5214	return ERR_PTR(err);
5215}
5216
5217static void
5218mlxsw_sp_nexthop6_group_destroy(struct mlxsw_sp *mlxsw_sp,
5219				struct mlxsw_sp_nexthop_group *nh_grp)
5220{
5221	struct mlxsw_sp_nexthop *nh;
5222	int i = nh_grp->count;
5223
5224	mlxsw_sp_nexthop_group_remove(mlxsw_sp, nh_grp);
5225	for (i--; i >= 0; i--) {
5226		nh = &nh_grp->nexthops[i];
5227		mlxsw_sp_nexthop6_fini(mlxsw_sp, nh);
5228	}
5229	mlxsw_sp_nexthop_group_refresh(mlxsw_sp, nh_grp);
5230	WARN_ON(nh_grp->adj_index_valid);
5231	kfree(nh_grp);
5232}
5233
5234static int mlxsw_sp_nexthop6_group_get(struct mlxsw_sp *mlxsw_sp,
5235				       struct mlxsw_sp_fib6_entry *fib6_entry)
5236{
5237	struct mlxsw_sp_nexthop_group *nh_grp;
5238
5239	nh_grp = mlxsw_sp_nexthop6_group_lookup(mlxsw_sp, fib6_entry);
5240	if (!nh_grp) {
5241		nh_grp = mlxsw_sp_nexthop6_group_create(mlxsw_sp, fib6_entry);
5242		if (IS_ERR(nh_grp))
5243			return PTR_ERR(nh_grp);
5244	}
5245
5246	list_add_tail(&fib6_entry->common.nexthop_group_node,
5247		      &nh_grp->fib_list);
5248	fib6_entry->common.nh_group = nh_grp;
5249
5250	/* The route and the nexthop are described by the same struct, so we
5251	 * need to the update the nexthop offload indication for the new route.
5252	 */
5253	__mlxsw_sp_nexthop6_group_offload_refresh(nh_grp, fib6_entry);
5254
5255	return 0;
5256}
5257
5258static void mlxsw_sp_nexthop6_group_put(struct mlxsw_sp *mlxsw_sp,
5259					struct mlxsw_sp_fib_entry *fib_entry)
5260{
5261	struct mlxsw_sp_nexthop_group *nh_grp = fib_entry->nh_group;
5262
5263	list_del(&fib_entry->nexthop_group_node);
5264	if (!list_empty(&nh_grp->fib_list))
5265		return;
5266	mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, nh_grp);
5267}
5268
5269static int
5270mlxsw_sp_nexthop6_group_update(struct mlxsw_sp *mlxsw_sp,
5271			       struct mlxsw_sp_fib6_entry *fib6_entry)
5272{
5273	struct mlxsw_sp_nexthop_group *old_nh_grp = fib6_entry->common.nh_group;
5274	int err;
5275
5276	fib6_entry->common.nh_group = NULL;
5277	list_del(&fib6_entry->common.nexthop_group_node);
5278
5279	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5280	if (err)
5281		goto err_nexthop6_group_get;
5282
5283	/* In case this entry is offloaded, then the adjacency index
5284	 * currently associated with it in the device's table is that
5285	 * of the old group. Start using the new one instead.
5286	 */
5287	err = mlxsw_sp_fib_entry_update(mlxsw_sp, &fib6_entry->common);
5288	if (err)
5289		goto err_fib_entry_update;
5290
5291	if (list_empty(&old_nh_grp->fib_list))
5292		mlxsw_sp_nexthop6_group_destroy(mlxsw_sp, old_nh_grp);
5293
5294	return 0;
5295
5296err_fib_entry_update:
5297	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5298err_nexthop6_group_get:
5299	list_add_tail(&fib6_entry->common.nexthop_group_node,
5300		      &old_nh_grp->fib_list);
5301	fib6_entry->common.nh_group = old_nh_grp;
5302	return err;
5303}
5304
5305static int
5306mlxsw_sp_fib6_entry_nexthop_add(struct mlxsw_sp *mlxsw_sp,
5307				struct mlxsw_sp_fib6_entry *fib6_entry,
5308				struct fib6_info **rt_arr, unsigned int nrt6)
5309{
5310	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5311	int err, i;
5312
5313	for (i = 0; i < nrt6; i++) {
5314		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5315		if (IS_ERR(mlxsw_sp_rt6)) {
5316			err = PTR_ERR(mlxsw_sp_rt6);
5317			goto err_rt6_create;
5318		}
5319
5320		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5321		fib6_entry->nrt6++;
5322	}
5323
5324	err = mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5325	if (err)
5326		goto err_nexthop6_group_update;
5327
5328	return 0;
5329
5330err_nexthop6_group_update:
5331	i = nrt6;
5332err_rt6_create:
5333	for (i--; i >= 0; i--) {
5334		fib6_entry->nrt6--;
5335		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5336					       struct mlxsw_sp_rt6, list);
5337		list_del(&mlxsw_sp_rt6->list);
5338		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5339	}
5340	return err;
5341}
5342
5343static void
5344mlxsw_sp_fib6_entry_nexthop_del(struct mlxsw_sp *mlxsw_sp,
5345				struct mlxsw_sp_fib6_entry *fib6_entry,
5346				struct fib6_info **rt_arr, unsigned int nrt6)
5347{
5348	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5349	int i;
5350
5351	for (i = 0; i < nrt6; i++) {
5352		mlxsw_sp_rt6 = mlxsw_sp_fib6_entry_rt_find(fib6_entry,
5353							   rt_arr[i]);
5354		if (WARN_ON_ONCE(!mlxsw_sp_rt6))
5355			continue;
5356
5357		fib6_entry->nrt6--;
5358		list_del(&mlxsw_sp_rt6->list);
5359		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5360	}
5361
5362	mlxsw_sp_nexthop6_group_update(mlxsw_sp, fib6_entry);
5363}
5364
5365static void mlxsw_sp_fib6_entry_type_set(struct mlxsw_sp *mlxsw_sp,
5366					 struct mlxsw_sp_fib_entry *fib_entry,
5367					 const struct fib6_info *rt)
5368{
5369	/* Packets hitting RTF_REJECT routes need to be discarded by the
5370	 * stack. We can rely on their destination device not having a
5371	 * RIF (it's the loopback device) and can thus use action type
5372	 * local, which will cause them to be trapped with a lower
5373	 * priority than packets that need to be locally received.
5374	 */
5375	if (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST))
5376		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP;
5377	else if (rt->fib6_type == RTN_BLACKHOLE)
5378		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_BLACKHOLE;
5379	else if (rt->fib6_flags & RTF_REJECT)
5380		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_UNREACHABLE;
5381	else if (mlxsw_sp_rt6_is_gateway(mlxsw_sp, rt))
5382		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE;
5383	else
5384		fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL;
5385}
5386
5387static void
5388mlxsw_sp_fib6_entry_rt_destroy_all(struct mlxsw_sp_fib6_entry *fib6_entry)
5389{
5390	struct mlxsw_sp_rt6 *mlxsw_sp_rt6, *tmp;
5391
5392	list_for_each_entry_safe(mlxsw_sp_rt6, tmp, &fib6_entry->rt6_list,
5393				 list) {
5394		fib6_entry->nrt6--;
5395		list_del(&mlxsw_sp_rt6->list);
5396		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5397	}
5398}
5399
5400static struct mlxsw_sp_fib6_entry *
5401mlxsw_sp_fib6_entry_create(struct mlxsw_sp *mlxsw_sp,
5402			   struct mlxsw_sp_fib_node *fib_node,
5403			   struct fib6_info **rt_arr, unsigned int nrt6)
5404{
5405	struct mlxsw_sp_fib6_entry *fib6_entry;
5406	struct mlxsw_sp_fib_entry *fib_entry;
5407	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
5408	int err, i;
5409
5410	fib6_entry = kzalloc(sizeof(*fib6_entry), GFP_KERNEL);
5411	if (!fib6_entry)
5412		return ERR_PTR(-ENOMEM);
5413	fib_entry = &fib6_entry->common;
5414
5415	INIT_LIST_HEAD(&fib6_entry->rt6_list);
5416
5417	for (i = 0; i < nrt6; i++) {
5418		mlxsw_sp_rt6 = mlxsw_sp_rt6_create(rt_arr[i]);
5419		if (IS_ERR(mlxsw_sp_rt6)) {
5420			err = PTR_ERR(mlxsw_sp_rt6);
5421			goto err_rt6_create;
5422		}
5423		list_add_tail(&mlxsw_sp_rt6->list, &fib6_entry->rt6_list);
5424		fib6_entry->nrt6++;
5425	}
5426
5427	mlxsw_sp_fib6_entry_type_set(mlxsw_sp, fib_entry, rt_arr[0]);
5428
5429	err = mlxsw_sp_nexthop6_group_get(mlxsw_sp, fib6_entry);
5430	if (err)
5431		goto err_nexthop6_group_get;
5432
5433	fib_entry->fib_node = fib_node;
5434
5435	return fib6_entry;
5436
5437err_nexthop6_group_get:
5438	i = nrt6;
5439err_rt6_create:
5440	for (i--; i >= 0; i--) {
5441		fib6_entry->nrt6--;
5442		mlxsw_sp_rt6 = list_last_entry(&fib6_entry->rt6_list,
5443					       struct mlxsw_sp_rt6, list);
5444		list_del(&mlxsw_sp_rt6->list);
5445		mlxsw_sp_rt6_destroy(mlxsw_sp_rt6);
5446	}
5447	kfree(fib6_entry);
5448	return ERR_PTR(err);
5449}
5450
5451static void mlxsw_sp_fib6_entry_destroy(struct mlxsw_sp *mlxsw_sp,
5452					struct mlxsw_sp_fib6_entry *fib6_entry)
5453{
5454	mlxsw_sp_nexthop6_group_put(mlxsw_sp, &fib6_entry->common);
5455	mlxsw_sp_fib6_entry_rt_destroy_all(fib6_entry);
5456	WARN_ON(fib6_entry->nrt6);
5457	kfree(fib6_entry);
5458}
5459
5460static struct mlxsw_sp_fib6_entry *
5461mlxsw_sp_fib6_entry_lookup(struct mlxsw_sp *mlxsw_sp,
5462			   const struct fib6_info *rt)
5463{
5464	struct mlxsw_sp_fib6_entry *fib6_entry;
5465	struct mlxsw_sp_fib_node *fib_node;
5466	struct mlxsw_sp_fib *fib;
5467	struct fib6_info *cmp_rt;
5468	struct mlxsw_sp_vr *vr;
5469
5470	vr = mlxsw_sp_vr_find(mlxsw_sp, rt->fib6_table->tb6_id);
5471	if (!vr)
5472		return NULL;
5473	fib = mlxsw_sp_vr_fib(vr, MLXSW_SP_L3_PROTO_IPV6);
5474
5475	fib_node = mlxsw_sp_fib_node_lookup(fib, &rt->fib6_dst.addr,
5476					    sizeof(rt->fib6_dst.addr),
5477					    rt->fib6_dst.plen);
5478	if (!fib_node)
5479		return NULL;
5480
5481	fib6_entry = container_of(fib_node->fib_entry,
5482				  struct mlxsw_sp_fib6_entry, common);
5483	cmp_rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5484	if (rt->fib6_table->tb6_id == cmp_rt->fib6_table->tb6_id &&
5485	    rt->fib6_metric == cmp_rt->fib6_metric &&
5486	    mlxsw_sp_fib6_entry_rt_find(fib6_entry, rt))
5487		return fib6_entry;
5488
5489	return NULL;
5490}
5491
5492static bool mlxsw_sp_fib6_allow_replace(struct mlxsw_sp_fib6_entry *fib6_entry)
5493{
5494	struct mlxsw_sp_fib_node *fib_node = fib6_entry->common.fib_node;
5495	struct mlxsw_sp_fib6_entry *fib6_replaced;
5496	struct fib6_info *rt, *rt_replaced;
5497
5498	if (!fib_node->fib_entry)
5499		return true;
5500
5501	fib6_replaced = container_of(fib_node->fib_entry,
5502				     struct mlxsw_sp_fib6_entry,
5503				     common);
5504	rt = mlxsw_sp_fib6_entry_rt(fib6_entry);
5505	rt_replaced = mlxsw_sp_fib6_entry_rt(fib6_replaced);
5506	if (rt->fib6_table->tb6_id == RT_TABLE_MAIN &&
5507	    rt_replaced->fib6_table->tb6_id == RT_TABLE_LOCAL)
5508		return false;
5509
5510	return true;
5511}
5512
5513static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
5514					struct fib6_info **rt_arr,
5515					unsigned int nrt6)
5516{
5517	struct mlxsw_sp_fib6_entry *fib6_entry, *fib6_replaced;
5518	struct mlxsw_sp_fib_entry *replaced;
5519	struct mlxsw_sp_fib_node *fib_node;
5520	struct fib6_info *rt = rt_arr[0];
5521	int err;
5522
5523	if (mlxsw_sp->router->aborted)
5524		return 0;
5525
5526	if (rt->fib6_src.plen)
5527		return -EINVAL;
5528
5529	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5530		return 0;
5531
5532	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5533					 &rt->fib6_dst.addr,
5534					 sizeof(rt->fib6_dst.addr),
5535					 rt->fib6_dst.plen,
5536					 MLXSW_SP_L3_PROTO_IPV6);
5537	if (IS_ERR(fib_node))
5538		return PTR_ERR(fib_node);
5539
5540	fib6_entry = mlxsw_sp_fib6_entry_create(mlxsw_sp, fib_node, rt_arr,
5541						nrt6);
5542	if (IS_ERR(fib6_entry)) {
5543		err = PTR_ERR(fib6_entry);
5544		goto err_fib6_entry_create;
5545	}
5546
5547	if (!mlxsw_sp_fib6_allow_replace(fib6_entry)) {
5548		mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5549		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5550		return 0;
5551	}
5552
5553	replaced = fib_node->fib_entry;
5554	err = mlxsw_sp_fib_node_entry_link(mlxsw_sp, &fib6_entry->common);
5555	if (err)
5556		goto err_fib_node_entry_link;
5557
5558	/* Nothing to replace */
5559	if (!replaced)
5560		return 0;
5561
5562	mlxsw_sp_fib_entry_hw_flags_clear(mlxsw_sp, replaced);
5563	fib6_replaced = container_of(replaced, struct mlxsw_sp_fib6_entry,
5564				     common);
5565	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_replaced);
5566
5567	return 0;
5568
5569err_fib_node_entry_link:
5570	fib_node->fib_entry = replaced;
5571	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5572err_fib6_entry_create:
5573	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5574	return err;
5575}
5576
5577static int mlxsw_sp_router_fib6_append(struct mlxsw_sp *mlxsw_sp,
5578				       struct fib6_info **rt_arr,
5579				       unsigned int nrt6)
5580{
5581	struct mlxsw_sp_fib6_entry *fib6_entry;
5582	struct mlxsw_sp_fib_node *fib_node;
5583	struct fib6_info *rt = rt_arr[0];
5584	int err;
5585
5586	if (mlxsw_sp->router->aborted)
5587		return 0;
5588
5589	if (rt->fib6_src.plen)
5590		return -EINVAL;
5591
5592	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5593		return 0;
5594
5595	fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
5596					 &rt->fib6_dst.addr,
5597					 sizeof(rt->fib6_dst.addr),
5598					 rt->fib6_dst.plen,
5599					 MLXSW_SP_L3_PROTO_IPV6);
5600	if (IS_ERR(fib_node))
5601		return PTR_ERR(fib_node);
5602
5603	if (WARN_ON_ONCE(!fib_node->fib_entry)) {
5604		mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5605		return -EINVAL;
5606	}
5607
5608	fib6_entry = container_of(fib_node->fib_entry,
5609				  struct mlxsw_sp_fib6_entry, common);
5610	err = mlxsw_sp_fib6_entry_nexthop_add(mlxsw_sp, fib6_entry, rt_arr,
5611					      nrt6);
5612	if (err)
5613		goto err_fib6_entry_nexthop_add;
5614
5615	return 0;
5616
5617err_fib6_entry_nexthop_add:
5618	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5619	return err;
5620}
5621
5622static void mlxsw_sp_router_fib6_del(struct mlxsw_sp *mlxsw_sp,
5623				     struct fib6_info **rt_arr,
5624				     unsigned int nrt6)
5625{
5626	struct mlxsw_sp_fib6_entry *fib6_entry;
5627	struct mlxsw_sp_fib_node *fib_node;
5628	struct fib6_info *rt = rt_arr[0];
5629
5630	if (mlxsw_sp->router->aborted)
5631		return;
5632
5633	if (mlxsw_sp_fib6_rt_should_ignore(rt))
5634		return;
5635
5636	/* Multipath routes are first added to the FIB trie and only then
5637	 * notified. If we vetoed the addition, we will get a delete
5638	 * notification for a route we do not have. Therefore, do not warn if
5639	 * route was not found.
5640	 */
5641	fib6_entry = mlxsw_sp_fib6_entry_lookup(mlxsw_sp, rt);
5642	if (!fib6_entry)
5643		return;
5644
5645	/* If not all the nexthops are deleted, then only reduce the nexthop
5646	 * group.
5647	 */
5648	if (nrt6 != fib6_entry->nrt6) {
5649		mlxsw_sp_fib6_entry_nexthop_del(mlxsw_sp, fib6_entry, rt_arr,
5650						nrt6);
5651		return;
5652	}
5653
5654	fib_node = fib6_entry->common.fib_node;
5655
5656	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, &fib6_entry->common);
5657	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5658	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5659}
5660
5661static int __mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp,
5662					    enum mlxsw_reg_ralxx_protocol proto,
5663					    u8 tree_id)
5664{
5665	char ralta_pl[MLXSW_REG_RALTA_LEN];
5666	char ralst_pl[MLXSW_REG_RALST_LEN];
5667	int i, err;
5668
5669	mlxsw_reg_ralta_pack(ralta_pl, true, proto, tree_id);
5670	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl);
5671	if (err)
5672		return err;
5673
5674	mlxsw_reg_ralst_pack(ralst_pl, 0xff, tree_id);
5675	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl);
5676	if (err)
5677		return err;
5678
5679	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5680		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5681		char raltb_pl[MLXSW_REG_RALTB_LEN];
5682		char ralue_pl[MLXSW_REG_RALUE_LEN];
5683
5684		mlxsw_reg_raltb_pack(raltb_pl, vr->id, proto, tree_id);
5685		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb),
5686				      raltb_pl);
5687		if (err)
5688			return err;
5689
5690		mlxsw_reg_ralue_pack(ralue_pl, proto,
5691				     MLXSW_REG_RALUE_OP_WRITE_WRITE, vr->id, 0);
5692		mlxsw_reg_ralue_act_ip2me_pack(ralue_pl);
5693		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue),
5694				      ralue_pl);
5695		if (err)
5696			return err;
5697	}
5698
5699	return 0;
5700}
5701
5702static struct mlxsw_sp_mr_table *
5703mlxsw_sp_router_fibmr_family_to_table(struct mlxsw_sp_vr *vr, int family)
5704{
5705	if (family == RTNL_FAMILY_IPMR)
5706		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV4];
5707	else
5708		return vr->mr_table[MLXSW_SP_L3_PROTO_IPV6];
5709}
5710
5711static int mlxsw_sp_router_fibmr_add(struct mlxsw_sp *mlxsw_sp,
5712				     struct mfc_entry_notifier_info *men_info,
5713				     bool replace)
5714{
5715	struct mlxsw_sp_mr_table *mrt;
5716	struct mlxsw_sp_vr *vr;
5717
5718	if (mlxsw_sp->router->aborted)
5719		return 0;
5720
5721	vr = mlxsw_sp_vr_get(mlxsw_sp, men_info->tb_id, NULL);
5722	if (IS_ERR(vr))
5723		return PTR_ERR(vr);
5724
5725	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5726	return mlxsw_sp_mr_route_add(mrt, men_info->mfc, replace);
5727}
5728
5729static void mlxsw_sp_router_fibmr_del(struct mlxsw_sp *mlxsw_sp,
5730				      struct mfc_entry_notifier_info *men_info)
5731{
5732	struct mlxsw_sp_mr_table *mrt;
5733	struct mlxsw_sp_vr *vr;
5734
5735	if (mlxsw_sp->router->aborted)
5736		return;
5737
5738	vr = mlxsw_sp_vr_find(mlxsw_sp, men_info->tb_id);
5739	if (WARN_ON(!vr))
5740		return;
5741
5742	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, men_info->info.family);
5743	mlxsw_sp_mr_route_del(mrt, men_info->mfc);
5744	mlxsw_sp_vr_put(mlxsw_sp, vr);
5745}
5746
5747static int
5748mlxsw_sp_router_fibmr_vif_add(struct mlxsw_sp *mlxsw_sp,
5749			      struct vif_entry_notifier_info *ven_info)
5750{
5751	struct mlxsw_sp_mr_table *mrt;
5752	struct mlxsw_sp_rif *rif;
5753	struct mlxsw_sp_vr *vr;
5754
5755	if (mlxsw_sp->router->aborted)
5756		return 0;
5757
5758	vr = mlxsw_sp_vr_get(mlxsw_sp, ven_info->tb_id, NULL);
5759	if (IS_ERR(vr))
5760		return PTR_ERR(vr);
5761
5762	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5763	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, ven_info->dev);
5764	return mlxsw_sp_mr_vif_add(mrt, ven_info->dev,
5765				   ven_info->vif_index,
5766				   ven_info->vif_flags, rif);
5767}
5768
5769static void
5770mlxsw_sp_router_fibmr_vif_del(struct mlxsw_sp *mlxsw_sp,
5771			      struct vif_entry_notifier_info *ven_info)
5772{
5773	struct mlxsw_sp_mr_table *mrt;
5774	struct mlxsw_sp_vr *vr;
5775
5776	if (mlxsw_sp->router->aborted)
5777		return;
5778
5779	vr = mlxsw_sp_vr_find(mlxsw_sp, ven_info->tb_id);
5780	if (WARN_ON(!vr))
5781		return;
5782
5783	mrt = mlxsw_sp_router_fibmr_family_to_table(vr, ven_info->info.family);
5784	mlxsw_sp_mr_vif_del(mrt, ven_info->vif_index);
5785	mlxsw_sp_vr_put(mlxsw_sp, vr);
5786}
5787
5788static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
5789{
5790	enum mlxsw_reg_ralxx_protocol proto = MLXSW_REG_RALXX_PROTOCOL_IPV4;
5791	int err;
5792
5793	err = __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5794					       MLXSW_SP_LPM_TREE_MIN);
5795	if (err)
5796		return err;
5797
5798	/* The multicast router code does not need an abort trap as by default,
5799	 * packets that don't match any routes are trapped to the CPU.
5800	 */
5801
5802	proto = MLXSW_REG_RALXX_PROTOCOL_IPV6;
5803	return __mlxsw_sp_router_set_abort_trap(mlxsw_sp, proto,
5804						MLXSW_SP_LPM_TREE_MIN + 1);
5805}
5806
5807static void mlxsw_sp_fib4_node_flush(struct mlxsw_sp *mlxsw_sp,
5808				     struct mlxsw_sp_fib_node *fib_node)
5809{
5810	struct mlxsw_sp_fib4_entry *fib4_entry;
5811
5812	fib4_entry = container_of(fib_node->fib_entry,
5813				  struct mlxsw_sp_fib4_entry, common);
5814	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
5815	mlxsw_sp_fib4_entry_destroy(mlxsw_sp, fib4_entry);
5816	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5817}
5818
5819static void mlxsw_sp_fib6_node_flush(struct mlxsw_sp *mlxsw_sp,
5820				     struct mlxsw_sp_fib_node *fib_node)
5821{
5822	struct mlxsw_sp_fib6_entry *fib6_entry;
5823
5824	fib6_entry = container_of(fib_node->fib_entry,
5825				  struct mlxsw_sp_fib6_entry, common);
5826	mlxsw_sp_fib_node_entry_unlink(mlxsw_sp, fib_node->fib_entry);
5827	mlxsw_sp_fib6_entry_destroy(mlxsw_sp, fib6_entry);
5828	mlxsw_sp_fib_node_put(mlxsw_sp, fib_node);
5829}
5830
5831static void mlxsw_sp_fib_node_flush(struct mlxsw_sp *mlxsw_sp,
5832				    struct mlxsw_sp_fib_node *fib_node)
5833{
5834	switch (fib_node->fib->proto) {
5835	case MLXSW_SP_L3_PROTO_IPV4:
5836		mlxsw_sp_fib4_node_flush(mlxsw_sp, fib_node);
5837		break;
5838	case MLXSW_SP_L3_PROTO_IPV6:
5839		mlxsw_sp_fib6_node_flush(mlxsw_sp, fib_node);
5840		break;
5841	}
5842}
5843
5844static void mlxsw_sp_vr_fib_flush(struct mlxsw_sp *mlxsw_sp,
5845				  struct mlxsw_sp_vr *vr,
5846				  enum mlxsw_sp_l3proto proto)
5847{
5848	struct mlxsw_sp_fib *fib = mlxsw_sp_vr_fib(vr, proto);
5849	struct mlxsw_sp_fib_node *fib_node, *tmp;
5850
5851	list_for_each_entry_safe(fib_node, tmp, &fib->node_list, list) {
5852		bool do_break = &tmp->list == &fib->node_list;
5853
5854		mlxsw_sp_fib_node_flush(mlxsw_sp, fib_node);
5855		if (do_break)
5856			break;
5857	}
5858}
5859
5860static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
5861{
5862	int i, j;
5863
5864	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
5865		struct mlxsw_sp_vr *vr = &mlxsw_sp->router->vrs[i];
5866
5867		if (!mlxsw_sp_vr_is_used(vr))
5868			continue;
5869
5870		for (j = 0; j < MLXSW_SP_L3_PROTO_MAX; j++)
5871			mlxsw_sp_mr_table_flush(vr->mr_table[j]);
5872		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV4);
5873
5874		/* If virtual router was only used for IPv4, then it's no
5875		 * longer used.
5876		 */
5877		if (!mlxsw_sp_vr_is_used(vr))
5878			continue;
5879		mlxsw_sp_vr_fib_flush(mlxsw_sp, vr, MLXSW_SP_L3_PROTO_IPV6);
5880	}
5881
5882	/* After flushing all the routes, it is not possible anyone is still
5883	 * using the adjacency index that is discarding packets, so free it in
5884	 * case it was allocated.
5885	 */
5886	if (!mlxsw_sp->router->adj_discard_index_valid)
5887		return;
5888	mlxsw_sp_kvdl_free(mlxsw_sp, MLXSW_SP_KVDL_ENTRY_TYPE_ADJ, 1,
5889			   mlxsw_sp->router->adj_discard_index);
5890	mlxsw_sp->router->adj_discard_index_valid = false;
5891}
5892
5893static void mlxsw_sp_router_fib_abort(struct mlxsw_sp *mlxsw_sp)
5894{
5895	int err;
5896
5897	if (mlxsw_sp->router->aborted)
5898		return;
5899	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
5900	mlxsw_sp_router_fib_flush(mlxsw_sp);
5901	mlxsw_sp->router->aborted = true;
5902	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
5903	if (err)
5904		dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n");
5905}
5906
5907struct mlxsw_sp_fib6_event_work {
5908	struct fib6_info **rt_arr;
5909	unsigned int nrt6;
5910};
5911
5912struct mlxsw_sp_fib_event_work {
5913	struct work_struct work;
5914	union {
5915		struct mlxsw_sp_fib6_event_work fib6_work;
5916		struct fib_entry_notifier_info fen_info;
5917		struct fib_rule_notifier_info fr_info;
5918		struct fib_nh_notifier_info fnh_info;
5919		struct mfc_entry_notifier_info men_info;
5920		struct vif_entry_notifier_info ven_info;
5921	};
5922	struct mlxsw_sp *mlxsw_sp;
5923	unsigned long event;
5924};
5925
5926static int
5927mlxsw_sp_router_fib6_work_init(struct mlxsw_sp_fib6_event_work *fib6_work,
5928			       struct fib6_entry_notifier_info *fen6_info)
5929{
5930	struct fib6_info *rt = fen6_info->rt;
5931	struct fib6_info **rt_arr;
5932	struct fib6_info *iter;
5933	unsigned int nrt6;
5934	int i = 0;
5935
5936	nrt6 = fen6_info->nsiblings + 1;
5937
5938	rt_arr = kcalloc(nrt6, sizeof(struct fib6_info *), GFP_ATOMIC);
5939	if (!rt_arr)
5940		return -ENOMEM;
5941
5942	fib6_work->rt_arr = rt_arr;
5943	fib6_work->nrt6 = nrt6;
5944
5945	rt_arr[0] = rt;
5946	fib6_info_hold(rt);
5947
5948	if (!fen6_info->nsiblings)
5949		return 0;
5950
5951	list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) {
5952		if (i == fen6_info->nsiblings)
5953			break;
5954
5955		rt_arr[i + 1] = iter;
5956		fib6_info_hold(iter);
5957		i++;
5958	}
5959	WARN_ON_ONCE(i != fen6_info->nsiblings);
5960
5961	return 0;
5962}
5963
5964static void
5965mlxsw_sp_router_fib6_work_fini(struct mlxsw_sp_fib6_event_work *fib6_work)
5966{
5967	int i;
5968
5969	for (i = 0; i < fib6_work->nrt6; i++)
5970		mlxsw_sp_rt6_release(fib6_work->rt_arr[i]);
5971	kfree(fib6_work->rt_arr);
5972}
5973
5974static void mlxsw_sp_router_fib4_event_work(struct work_struct *work)
5975{
5976	struct mlxsw_sp_fib_event_work *fib_work =
5977		container_of(work, struct mlxsw_sp_fib_event_work, work);
5978	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
5979	int err;
5980
5981	mutex_lock(&mlxsw_sp->router->lock);
5982	mlxsw_sp_span_respin(mlxsw_sp);
5983
5984	switch (fib_work->event) {
5985	case FIB_EVENT_ENTRY_REPLACE:
5986		err = mlxsw_sp_router_fib4_replace(mlxsw_sp,
5987						   &fib_work->fen_info);
5988		if (err)
5989			mlxsw_sp_router_fib_abort(mlxsw_sp);
5990		fib_info_put(fib_work->fen_info.fi);
5991		break;
5992	case FIB_EVENT_ENTRY_DEL:
5993		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
5994		fib_info_put(fib_work->fen_info.fi);
5995		break;
5996	case FIB_EVENT_NH_ADD:
5997	case FIB_EVENT_NH_DEL:
5998		mlxsw_sp_nexthop4_event(mlxsw_sp, fib_work->event,
5999					fib_work->fnh_info.fib_nh);
6000		fib_info_put(fib_work->fnh_info.fib_nh->nh_parent);
6001		break;
6002	}
6003	mutex_unlock(&mlxsw_sp->router->lock);
6004	kfree(fib_work);
6005}
6006
6007static void mlxsw_sp_router_fib6_event_work(struct work_struct *work)
6008{
6009	struct mlxsw_sp_fib_event_work *fib_work =
6010		container_of(work, struct mlxsw_sp_fib_event_work, work);
6011	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
6012	int err;
6013
6014	mutex_lock(&mlxsw_sp->router->lock);
6015	mlxsw_sp_span_respin(mlxsw_sp);
6016
6017	switch (fib_work->event) {
6018	case FIB_EVENT_ENTRY_REPLACE:
6019		err = mlxsw_sp_router_fib6_replace(mlxsw_sp,
6020						   fib_work->fib6_work.rt_arr,
6021						   fib_work->fib6_work.nrt6);
6022		if (err)
6023			mlxsw_sp_router_fib_abort(mlxsw_sp);
6024		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
6025		break;
6026	case FIB_EVENT_ENTRY_APPEND:
6027		err = mlxsw_sp_router_fib6_append(mlxsw_sp,
6028						  fib_work->fib6_work.rt_arr,
6029						  fib_work->fib6_work.nrt6);
6030		if (err)
6031			mlxsw_sp_router_fib_abort(mlxsw_sp);
6032		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
6033		break;
6034	case FIB_EVENT_ENTRY_DEL:
6035		mlxsw_sp_router_fib6_del(mlxsw_sp,
6036					 fib_work->fib6_work.rt_arr,
6037					 fib_work->fib6_work.nrt6);
6038		mlxsw_sp_router_fib6_work_fini(&fib_work->fib6_work);
6039		break;
6040	}
6041	mutex_unlock(&mlxsw_sp->router->lock);
6042	kfree(fib_work);
6043}
6044
6045static void mlxsw_sp_router_fibmr_event_work(struct work_struct *work)
6046{
6047	struct mlxsw_sp_fib_event_work *fib_work =
6048		container_of(work, struct mlxsw_sp_fib_event_work, work);
6049	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
6050	bool replace;
6051	int err;
6052
6053	rtnl_lock();
6054	mutex_lock(&mlxsw_sp->router->lock);
6055	switch (fib_work->event) {
6056	case FIB_EVENT_ENTRY_REPLACE:
6057	case FIB_EVENT_ENTRY_ADD:
6058		replace = fib_work->event == FIB_EVENT_ENTRY_REPLACE;
6059
6060		err = mlxsw_sp_router_fibmr_add(mlxsw_sp, &fib_work->men_info,
6061						replace);
6062		if (err)
6063			mlxsw_sp_router_fib_abort(mlxsw_sp);
6064		mr_cache_put(fib_work->men_info.mfc);
6065		break;
6066	case FIB_EVENT_ENTRY_DEL:
6067		mlxsw_sp_router_fibmr_del(mlxsw_sp, &fib_work->men_info);
6068		mr_cache_put(fib_work->men_info.mfc);
6069		break;
6070	case FIB_EVENT_VIF_ADD:
6071		err = mlxsw_sp_router_fibmr_vif_add(mlxsw_sp,
6072						    &fib_work->ven_info);
6073		if (err)
6074			mlxsw_sp_router_fib_abort(mlxsw_sp);
6075		dev_put(fib_work->ven_info.dev);
6076		break;
6077	case FIB_EVENT_VIF_DEL:
6078		mlxsw_sp_router_fibmr_vif_del(mlxsw_sp,
6079					      &fib_work->ven_info);
6080		dev_put(fib_work->ven_info.dev);
6081		break;
6082	}
6083	mutex_unlock(&mlxsw_sp->router->lock);
6084	rtnl_unlock();
6085	kfree(fib_work);
6086}
6087
6088static void mlxsw_sp_router_fib4_event(struct mlxsw_sp_fib_event_work *fib_work,
6089				       struct fib_notifier_info *info)
6090{
6091	struct fib_entry_notifier_info *fen_info;
6092	struct fib_nh_notifier_info *fnh_info;
6093
6094	switch (fib_work->event) {
6095	case FIB_EVENT_ENTRY_REPLACE:
6096	case FIB_EVENT_ENTRY_DEL:
6097		fen_info = container_of(info, struct fib_entry_notifier_info,
6098					info);
6099		fib_work->fen_info = *fen_info;
6100		/* Take reference on fib_info to prevent it from being
6101		 * freed while work is queued. Release it afterwards.
6102		 */
6103		fib_info_hold(fib_work->fen_info.fi);
6104		break;
6105	case FIB_EVENT_NH_ADD:
6106	case FIB_EVENT_NH_DEL:
6107		fnh_info = container_of(info, struct fib_nh_notifier_info,
6108					info);
6109		fib_work->fnh_info = *fnh_info;
6110		fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent);
6111		break;
6112	}
6113}
6114
6115static int mlxsw_sp_router_fib6_event(struct mlxsw_sp_fib_event_work *fib_work,
6116				      struct fib_notifier_info *info)
6117{
6118	struct fib6_entry_notifier_info *fen6_info;
6119	int err;
6120
6121	switch (fib_work->event) {
6122	case FIB_EVENT_ENTRY_REPLACE:
6123	case FIB_EVENT_ENTRY_APPEND:
6124	case FIB_EVENT_ENTRY_DEL:
6125		fen6_info = container_of(info, struct fib6_entry_notifier_info,
6126					 info);
6127		err = mlxsw_sp_router_fib6_work_init(&fib_work->fib6_work,
6128						     fen6_info);
6129		if (err)
6130			return err;
6131		break;
6132	}
6133
6134	return 0;
6135}
6136
6137static void
6138mlxsw_sp_router_fibmr_event(struct mlxsw_sp_fib_event_work *fib_work,
6139			    struct fib_notifier_info *info)
6140{
6141	switch (fib_work->event) {
6142	case FIB_EVENT_ENTRY_REPLACE:
6143	case FIB_EVENT_ENTRY_ADD:
6144	case FIB_EVENT_ENTRY_DEL:
6145		memcpy(&fib_work->men_info, info, sizeof(fib_work->men_info));
6146		mr_cache_hold(fib_work->men_info.mfc);
6147		break;
6148	case FIB_EVENT_VIF_ADD:
6149	case FIB_EVENT_VIF_DEL:
6150		memcpy(&fib_work->ven_info, info, sizeof(fib_work->ven_info));
6151		dev_hold(fib_work->ven_info.dev);
6152		break;
6153	}
6154}
6155
6156static int mlxsw_sp_router_fib_rule_event(unsigned long event,
6157					  struct fib_notifier_info *info,
6158					  struct mlxsw_sp *mlxsw_sp)
6159{
6160	struct netlink_ext_ack *extack = info->extack;
6161	struct fib_rule_notifier_info *fr_info;
6162	struct fib_rule *rule;
6163	int err = 0;
6164
6165	/* nothing to do at the moment */
6166	if (event == FIB_EVENT_RULE_DEL)
6167		return 0;
6168
6169	if (mlxsw_sp->router->aborted)
6170		return 0;
6171
6172	fr_info = container_of(info, struct fib_rule_notifier_info, info);
6173	rule = fr_info->rule;
6174
6175	/* Rule only affects locally generated traffic */
6176	if (rule->iifindex == mlxsw_sp_net(mlxsw_sp)->loopback_dev->ifindex)
6177		return 0;
6178
6179	switch (info->family) {
6180	case AF_INET:
6181		if (!fib4_rule_default(rule) && !rule->l3mdev)
6182			err = -EOPNOTSUPP;
6183		break;
6184	case AF_INET6:
6185		if (!fib6_rule_default(rule) && !rule->l3mdev)
6186			err = -EOPNOTSUPP;
6187		break;
6188	case RTNL_FAMILY_IPMR:
6189		if (!ipmr_rule_default(rule) && !rule->l3mdev)
6190			err = -EOPNOTSUPP;
6191		break;
6192	case RTNL_FAMILY_IP6MR:
6193		if (!ip6mr_rule_default(rule) && !rule->l3mdev)
6194			err = -EOPNOTSUPP;
6195		break;
6196	}
6197
6198	if (err < 0)
6199		NL_SET_ERR_MSG_MOD(extack, "FIB rules not supported");
6200
6201	return err;
6202}
6203
6204/* Called with rcu_read_lock() */
6205static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
6206				     unsigned long event, void *ptr)
6207{
6208	struct mlxsw_sp_fib_event_work *fib_work;
6209	struct fib_notifier_info *info = ptr;
6210	struct mlxsw_sp_router *router;
6211	int err;
6212
6213	if ((info->family != AF_INET && info->family != AF_INET6 &&
6214	     info->family != RTNL_FAMILY_IPMR &&
6215	     info->family != RTNL_FAMILY_IP6MR))
6216		return NOTIFY_DONE;
6217
6218	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
6219
6220	switch (event) {
6221	case FIB_EVENT_RULE_ADD:
6222	case FIB_EVENT_RULE_DEL:
6223		err = mlxsw_sp_router_fib_rule_event(event, info,
6224						     router->mlxsw_sp);
6225		return notifier_from_errno(err);
6226	case FIB_EVENT_ENTRY_ADD:
6227	case FIB_EVENT_ENTRY_REPLACE:
6228	case FIB_EVENT_ENTRY_APPEND:
6229		if (router->aborted) {
6230			NL_SET_ERR_MSG_MOD(info->extack, "FIB offload was aborted. Not configuring route");
6231			return notifier_from_errno(-EINVAL);
6232		}
6233		if (info->family == AF_INET) {
6234			struct fib_entry_notifier_info *fen_info = ptr;
6235
6236			if (fen_info->fi->fib_nh_is_v6) {
6237				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 gateway with IPv4 route is not supported");
6238				return notifier_from_errno(-EINVAL);
6239			}
6240			if (fen_info->fi->nh) {
6241				NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported");
6242				return notifier_from_errno(-EINVAL);
6243			}
6244		} else if (info->family == AF_INET6) {
6245			struct fib6_entry_notifier_info *fen6_info;
6246
6247			fen6_info = container_of(info,
6248						 struct fib6_entry_notifier_info,
6249						 info);
6250			if (fen6_info->rt->nh) {
6251				NL_SET_ERR_MSG_MOD(info->extack, "IPv6 route with nexthop objects is not supported");
6252				return notifier_from_errno(-EINVAL);
6253			}
6254		}
6255		break;
6256	}
6257
6258	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
6259	if (!fib_work)
6260		return NOTIFY_BAD;
6261
6262	fib_work->mlxsw_sp = router->mlxsw_sp;
6263	fib_work->event = event;
6264
6265	switch (info->family) {
6266	case AF_INET:
6267		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib4_event_work);
6268		mlxsw_sp_router_fib4_event(fib_work, info);
6269		break;
6270	case AF_INET6:
6271		INIT_WORK(&fib_work->work, mlxsw_sp_router_fib6_event_work);
6272		err = mlxsw_sp_router_fib6_event(fib_work, info);
6273		if (err)
6274			goto err_fib_event;
6275		break;
6276	case RTNL_FAMILY_IP6MR:
6277	case RTNL_FAMILY_IPMR:
6278		INIT_WORK(&fib_work->work, mlxsw_sp_router_fibmr_event_work);
6279		mlxsw_sp_router_fibmr_event(fib_work, info);
6280		break;
6281	}
6282
6283	mlxsw_core_schedule_work(&fib_work->work);
6284
6285	return NOTIFY_DONE;
6286
6287err_fib_event:
6288	kfree(fib_work);
6289	return NOTIFY_BAD;
6290}
6291
6292static struct mlxsw_sp_rif *
6293mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
6294			 const struct net_device *dev)
6295{
6296	int i;
6297
6298	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
6299		if (mlxsw_sp->router->rifs[i] &&
6300		    mlxsw_sp->router->rifs[i]->dev == dev)
6301			return mlxsw_sp->router->rifs[i];
6302
6303	return NULL;
6304}
6305
6306bool mlxsw_sp_rif_exists(struct mlxsw_sp *mlxsw_sp,
6307			 const struct net_device *dev)
6308{
6309	struct mlxsw_sp_rif *rif;
6310
6311	mutex_lock(&mlxsw_sp->router->lock);
6312	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6313	mutex_unlock(&mlxsw_sp->router->lock);
6314
6315	return rif;
6316}
6317
6318u16 mlxsw_sp_rif_vid(struct mlxsw_sp *mlxsw_sp, const struct net_device *dev)
6319{
6320	struct mlxsw_sp_rif *rif;
6321	u16 vid = 0;
6322
6323	mutex_lock(&mlxsw_sp->router->lock);
6324	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6325	if (!rif)
6326		goto out;
6327
6328	/* We only return the VID for VLAN RIFs. Otherwise we return an
6329	 * invalid value (0).
6330	 */
6331	if (rif->ops->type != MLXSW_SP_RIF_TYPE_VLAN)
6332		goto out;
6333
6334	vid = mlxsw_sp_fid_8021q_vid(rif->fid);
6335
6336out:
6337	mutex_unlock(&mlxsw_sp->router->lock);
6338	return vid;
6339}
6340
6341static int mlxsw_sp_router_rif_disable(struct mlxsw_sp *mlxsw_sp, u16 rif)
6342{
6343	char ritr_pl[MLXSW_REG_RITR_LEN];
6344	int err;
6345
6346	mlxsw_reg_ritr_rif_pack(ritr_pl, rif);
6347	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6348	if (err)
6349		return err;
6350
6351	mlxsw_reg_ritr_enable_set(ritr_pl, false);
6352	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6353}
6354
6355static void mlxsw_sp_router_rif_gone_sync(struct mlxsw_sp *mlxsw_sp,
6356					  struct mlxsw_sp_rif *rif)
6357{
6358	mlxsw_sp_router_rif_disable(mlxsw_sp, rif->rif_index);
6359	mlxsw_sp_nexthop_rif_gone_sync(mlxsw_sp, rif);
6360	mlxsw_sp_neigh_rif_gone_sync(mlxsw_sp, rif);
6361}
6362
6363static bool
6364mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *rif, struct net_device *dev,
6365			   unsigned long event)
6366{
6367	struct inet6_dev *inet6_dev;
6368	bool addr_list_empty = true;
6369	struct in_device *idev;
6370
6371	switch (event) {
6372	case NETDEV_UP:
6373		return rif == NULL;
6374	case NETDEV_DOWN:
6375		rcu_read_lock();
6376		idev = __in_dev_get_rcu(dev);
6377		if (idev && idev->ifa_list)
6378			addr_list_empty = false;
6379
6380		inet6_dev = __in6_dev_get(dev);
6381		if (addr_list_empty && inet6_dev &&
6382		    !list_empty(&inet6_dev->addr_list))
6383			addr_list_empty = false;
6384		rcu_read_unlock();
6385
6386		/* macvlans do not have a RIF, but rather piggy back on the
6387		 * RIF of their lower device.
6388		 */
6389		if (netif_is_macvlan(dev) && addr_list_empty)
6390			return true;
6391
6392		if (rif && addr_list_empty &&
6393		    !netif_is_l3_slave(rif->dev))
6394			return true;
6395		/* It is possible we already removed the RIF ourselves
6396		 * if it was assigned to a netdev that is now a bridge
6397		 * or LAG slave.
6398		 */
6399		return false;
6400	}
6401
6402	return false;
6403}
6404
6405static enum mlxsw_sp_rif_type
6406mlxsw_sp_dev_rif_type(const struct mlxsw_sp *mlxsw_sp,
6407		      const struct net_device *dev)
6408{
6409	enum mlxsw_sp_fid_type type;
6410
6411	if (mlxsw_sp_netdev_ipip_type(mlxsw_sp, dev, NULL))
6412		return MLXSW_SP_RIF_TYPE_IPIP_LB;
6413
6414	/* Otherwise RIF type is derived from the type of the underlying FID. */
6415	if (is_vlan_dev(dev) && netif_is_bridge_master(vlan_dev_real_dev(dev)))
6416		type = MLXSW_SP_FID_TYPE_8021Q;
6417	else if (netif_is_bridge_master(dev) && br_vlan_enabled(dev))
6418		type = MLXSW_SP_FID_TYPE_8021Q;
6419	else if (netif_is_bridge_master(dev))
6420		type = MLXSW_SP_FID_TYPE_8021D;
6421	else
6422		type = MLXSW_SP_FID_TYPE_RFID;
6423
6424	return mlxsw_sp_fid_type_rif_type(mlxsw_sp, type);
6425}
6426
6427static int mlxsw_sp_rif_index_alloc(struct mlxsw_sp *mlxsw_sp, u16 *p_rif_index)
6428{
6429	int i;
6430
6431	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
6432		if (!mlxsw_sp->router->rifs[i]) {
6433			*p_rif_index = i;
6434			return 0;
6435		}
6436	}
6437
6438	return -ENOBUFS;
6439}
6440
6441static struct mlxsw_sp_rif *mlxsw_sp_rif_alloc(size_t rif_size, u16 rif_index,
6442					       u16 vr_id,
6443					       struct net_device *l3_dev)
6444{
6445	struct mlxsw_sp_rif *rif;
6446
6447	rif = kzalloc(rif_size, GFP_KERNEL);
6448	if (!rif)
6449		return NULL;
6450
6451	INIT_LIST_HEAD(&rif->nexthop_list);
6452	INIT_LIST_HEAD(&rif->neigh_list);
6453	if (l3_dev) {
6454		ether_addr_copy(rif->addr, l3_dev->dev_addr);
6455		rif->mtu = l3_dev->mtu;
6456		rif->dev = l3_dev;
6457	}
6458	rif->vr_id = vr_id;
6459	rif->rif_index = rif_index;
6460
6461	return rif;
6462}
6463
6464struct mlxsw_sp_rif *mlxsw_sp_rif_by_index(const struct mlxsw_sp *mlxsw_sp,
6465					   u16 rif_index)
6466{
6467	return mlxsw_sp->router->rifs[rif_index];
6468}
6469
6470u16 mlxsw_sp_rif_index(const struct mlxsw_sp_rif *rif)
6471{
6472	return rif->rif_index;
6473}
6474
6475u16 mlxsw_sp_ipip_lb_rif_index(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6476{
6477	return lb_rif->common.rif_index;
6478}
6479
6480u16 mlxsw_sp_ipip_lb_ul_vr_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6481{
6482	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(lb_rif->common.dev);
6483	struct mlxsw_sp_vr *ul_vr;
6484
6485	ul_vr = mlxsw_sp_vr_get(lb_rif->common.mlxsw_sp, ul_tb_id, NULL);
6486	if (WARN_ON(IS_ERR(ul_vr)))
6487		return 0;
6488
6489	return ul_vr->id;
6490}
6491
6492u16 mlxsw_sp_ipip_lb_ul_rif_id(const struct mlxsw_sp_rif_ipip_lb *lb_rif)
6493{
6494	return lb_rif->ul_rif_id;
6495}
6496
6497int mlxsw_sp_rif_dev_ifindex(const struct mlxsw_sp_rif *rif)
6498{
6499	return rif->dev->ifindex;
6500}
6501
6502const struct net_device *mlxsw_sp_rif_dev(const struct mlxsw_sp_rif *rif)
6503{
6504	return rif->dev;
6505}
6506
6507static struct mlxsw_sp_rif *
6508mlxsw_sp_rif_create(struct mlxsw_sp *mlxsw_sp,
6509		    const struct mlxsw_sp_rif_params *params,
6510		    struct netlink_ext_ack *extack)
6511{
6512	u32 tb_id = l3mdev_fib_table(params->dev);
6513	const struct mlxsw_sp_rif_ops *ops;
6514	struct mlxsw_sp_fid *fid = NULL;
6515	enum mlxsw_sp_rif_type type;
6516	struct mlxsw_sp_rif *rif;
6517	struct mlxsw_sp_vr *vr;
6518	u16 rif_index;
6519	int i, err;
6520
6521	type = mlxsw_sp_dev_rif_type(mlxsw_sp, params->dev);
6522	ops = mlxsw_sp->rif_ops_arr[type];
6523
6524	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id ? : RT_TABLE_MAIN, extack);
6525	if (IS_ERR(vr))
6526		return ERR_CAST(vr);
6527	vr->rif_count++;
6528
6529	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
6530	if (err) {
6531		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
6532		goto err_rif_index_alloc;
6533	}
6534
6535	rif = mlxsw_sp_rif_alloc(ops->rif_size, rif_index, vr->id, params->dev);
6536	if (!rif) {
6537		err = -ENOMEM;
6538		goto err_rif_alloc;
6539	}
6540	dev_hold(rif->dev);
6541	mlxsw_sp->router->rifs[rif_index] = rif;
6542	rif->mlxsw_sp = mlxsw_sp;
6543	rif->ops = ops;
6544
6545	if (ops->fid_get) {
6546		fid = ops->fid_get(rif, extack);
6547		if (IS_ERR(fid)) {
6548			err = PTR_ERR(fid);
6549			goto err_fid_get;
6550		}
6551		rif->fid = fid;
6552	}
6553
6554	if (ops->setup)
6555		ops->setup(rif, params);
6556
6557	err = ops->configure(rif);
6558	if (err)
6559		goto err_configure;
6560
6561	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++) {
6562		err = mlxsw_sp_mr_rif_add(vr->mr_table[i], rif);
6563		if (err)
6564			goto err_mr_rif_add;
6565	}
6566
6567	mlxsw_sp_rif_counters_alloc(rif);
6568
6569	return rif;
6570
6571err_mr_rif_add:
6572	for (i--; i >= 0; i--)
6573		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6574	ops->deconfigure(rif);
6575err_configure:
6576	if (fid)
6577		mlxsw_sp_fid_put(fid);
6578err_fid_get:
6579	mlxsw_sp->router->rifs[rif_index] = NULL;
6580	dev_put(rif->dev);
6581	kfree(rif);
6582err_rif_alloc:
6583err_rif_index_alloc:
6584	vr->rif_count--;
6585	mlxsw_sp_vr_put(mlxsw_sp, vr);
6586	return ERR_PTR(err);
6587}
6588
6589static void mlxsw_sp_rif_destroy(struct mlxsw_sp_rif *rif)
6590{
6591	const struct mlxsw_sp_rif_ops *ops = rif->ops;
6592	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
6593	struct mlxsw_sp_fid *fid = rif->fid;
6594	struct mlxsw_sp_vr *vr;
6595	int i;
6596
6597	mlxsw_sp_router_rif_gone_sync(mlxsw_sp, rif);
6598	vr = &mlxsw_sp->router->vrs[rif->vr_id];
6599
6600	mlxsw_sp_rif_counters_free(rif);
6601	for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
6602		mlxsw_sp_mr_rif_del(vr->mr_table[i], rif);
6603	ops->deconfigure(rif);
6604	if (fid)
6605		/* Loopback RIFs are not associated with a FID. */
6606		mlxsw_sp_fid_put(fid);
6607	mlxsw_sp->router->rifs[rif->rif_index] = NULL;
6608	dev_put(rif->dev);
6609	kfree(rif);
6610	vr->rif_count--;
6611	mlxsw_sp_vr_put(mlxsw_sp, vr);
6612}
6613
6614void mlxsw_sp_rif_destroy_by_dev(struct mlxsw_sp *mlxsw_sp,
6615				 struct net_device *dev)
6616{
6617	struct mlxsw_sp_rif *rif;
6618
6619	mutex_lock(&mlxsw_sp->router->lock);
6620	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
6621	if (!rif)
6622		goto out;
6623	mlxsw_sp_rif_destroy(rif);
6624out:
6625	mutex_unlock(&mlxsw_sp->router->lock);
6626}
6627
6628static void
6629mlxsw_sp_rif_subport_params_init(struct mlxsw_sp_rif_params *params,
6630				 struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6631{
6632	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6633
6634	params->vid = mlxsw_sp_port_vlan->vid;
6635	params->lag = mlxsw_sp_port->lagged;
6636	if (params->lag)
6637		params->lag_id = mlxsw_sp_port->lag_id;
6638	else
6639		params->system_port = mlxsw_sp_port->local_port;
6640}
6641
6642static struct mlxsw_sp_rif_subport *
6643mlxsw_sp_rif_subport_rif(const struct mlxsw_sp_rif *rif)
6644{
6645	return container_of(rif, struct mlxsw_sp_rif_subport, common);
6646}
6647
6648static struct mlxsw_sp_rif *
6649mlxsw_sp_rif_subport_get(struct mlxsw_sp *mlxsw_sp,
6650			 const struct mlxsw_sp_rif_params *params,
6651			 struct netlink_ext_ack *extack)
6652{
6653	struct mlxsw_sp_rif_subport *rif_subport;
6654	struct mlxsw_sp_rif *rif;
6655
6656	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, params->dev);
6657	if (!rif)
6658		return mlxsw_sp_rif_create(mlxsw_sp, params, extack);
6659
6660	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6661	refcount_inc(&rif_subport->ref_count);
6662	return rif;
6663}
6664
6665static void mlxsw_sp_rif_subport_put(struct mlxsw_sp_rif *rif)
6666{
6667	struct mlxsw_sp_rif_subport *rif_subport;
6668
6669	rif_subport = mlxsw_sp_rif_subport_rif(rif);
6670	if (!refcount_dec_and_test(&rif_subport->ref_count))
6671		return;
6672
6673	mlxsw_sp_rif_destroy(rif);
6674}
6675
6676static int
6677mlxsw_sp_port_vlan_router_join(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan,
6678			       struct net_device *l3_dev,
6679			       struct netlink_ext_ack *extack)
6680{
6681	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6682	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
6683	struct mlxsw_sp_rif_params params = {
6684		.dev = l3_dev,
6685	};
6686	u16 vid = mlxsw_sp_port_vlan->vid;
6687	struct mlxsw_sp_rif *rif;
6688	struct mlxsw_sp_fid *fid;
6689	int err;
6690
6691	mlxsw_sp_rif_subport_params_init(&params, mlxsw_sp_port_vlan);
6692	rif = mlxsw_sp_rif_subport_get(mlxsw_sp, &params, extack);
6693	if (IS_ERR(rif))
6694		return PTR_ERR(rif);
6695
6696	/* FID was already created, just take a reference */
6697	fid = rif->ops->fid_get(rif, extack);
6698	err = mlxsw_sp_fid_port_vid_map(fid, mlxsw_sp_port, vid);
6699	if (err)
6700		goto err_fid_port_vid_map;
6701
6702	err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, false);
6703	if (err)
6704		goto err_port_vid_learning_set;
6705
6706	err = mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid,
6707					BR_STATE_FORWARDING);
6708	if (err)
6709		goto err_port_vid_stp_set;
6710
6711	mlxsw_sp_port_vlan->fid = fid;
6712
6713	return 0;
6714
6715err_port_vid_stp_set:
6716	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6717err_port_vid_learning_set:
6718	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6719err_fid_port_vid_map:
6720	mlxsw_sp_fid_put(fid);
6721	mlxsw_sp_rif_subport_put(rif);
6722	return err;
6723}
6724
6725static void
6726__mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6727{
6728	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp_port_vlan->mlxsw_sp_port;
6729	struct mlxsw_sp_fid *fid = mlxsw_sp_port_vlan->fid;
6730	struct mlxsw_sp_rif *rif = mlxsw_sp_fid_rif(fid);
6731	u16 vid = mlxsw_sp_port_vlan->vid;
6732
6733	if (WARN_ON(mlxsw_sp_fid_type(fid) != MLXSW_SP_FID_TYPE_RFID))
6734		return;
6735
6736	mlxsw_sp_port_vlan->fid = NULL;
6737	mlxsw_sp_port_vid_stp_set(mlxsw_sp_port, vid, BR_STATE_BLOCKING);
6738	mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, true);
6739	mlxsw_sp_fid_port_vid_unmap(fid, mlxsw_sp_port, vid);
6740	mlxsw_sp_fid_put(fid);
6741	mlxsw_sp_rif_subport_put(rif);
6742}
6743
6744void
6745mlxsw_sp_port_vlan_router_leave(struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan)
6746{
6747	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port_vlan->mlxsw_sp_port->mlxsw_sp;
6748
6749	mutex_lock(&mlxsw_sp->router->lock);
6750	__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6751	mutex_unlock(&mlxsw_sp->router->lock);
6752}
6753
6754static int mlxsw_sp_inetaddr_port_vlan_event(struct net_device *l3_dev,
6755					     struct net_device *port_dev,
6756					     unsigned long event, u16 vid,
6757					     struct netlink_ext_ack *extack)
6758{
6759	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(port_dev);
6760	struct mlxsw_sp_port_vlan *mlxsw_sp_port_vlan;
6761
6762	mlxsw_sp_port_vlan = mlxsw_sp_port_vlan_find_by_vid(mlxsw_sp_port, vid);
6763	if (WARN_ON(!mlxsw_sp_port_vlan))
6764		return -EINVAL;
6765
6766	switch (event) {
6767	case NETDEV_UP:
6768		return mlxsw_sp_port_vlan_router_join(mlxsw_sp_port_vlan,
6769						      l3_dev, extack);
6770	case NETDEV_DOWN:
6771		__mlxsw_sp_port_vlan_router_leave(mlxsw_sp_port_vlan);
6772		break;
6773	}
6774
6775	return 0;
6776}
6777
6778static int mlxsw_sp_inetaddr_port_event(struct net_device *port_dev,
6779					unsigned long event,
6780					struct netlink_ext_ack *extack)
6781{
6782	if (netif_is_bridge_port(port_dev) ||
6783	    netif_is_lag_port(port_dev) ||
6784	    netif_is_ovs_port(port_dev))
6785		return 0;
6786
6787	return mlxsw_sp_inetaddr_port_vlan_event(port_dev, port_dev, event,
6788						 MLXSW_SP_DEFAULT_VID, extack);
6789}
6790
6791static int __mlxsw_sp_inetaddr_lag_event(struct net_device *l3_dev,
6792					 struct net_device *lag_dev,
6793					 unsigned long event, u16 vid,
6794					 struct netlink_ext_ack *extack)
6795{
6796	struct net_device *port_dev;
6797	struct list_head *iter;
6798	int err;
6799
6800	netdev_for_each_lower_dev(lag_dev, port_dev, iter) {
6801		if (mlxsw_sp_port_dev_check(port_dev)) {
6802			err = mlxsw_sp_inetaddr_port_vlan_event(l3_dev,
6803								port_dev,
6804								event, vid,
6805								extack);
6806			if (err)
6807				return err;
6808		}
6809	}
6810
6811	return 0;
6812}
6813
6814static int mlxsw_sp_inetaddr_lag_event(struct net_device *lag_dev,
6815				       unsigned long event,
6816				       struct netlink_ext_ack *extack)
6817{
6818	if (netif_is_bridge_port(lag_dev))
6819		return 0;
6820
6821	return __mlxsw_sp_inetaddr_lag_event(lag_dev, lag_dev, event,
6822					     MLXSW_SP_DEFAULT_VID, extack);
6823}
6824
6825static int mlxsw_sp_inetaddr_bridge_event(struct mlxsw_sp *mlxsw_sp,
6826					  struct net_device *l3_dev,
6827					  unsigned long event,
6828					  struct netlink_ext_ack *extack)
6829{
6830	struct mlxsw_sp_rif_params params = {
6831		.dev = l3_dev,
6832	};
6833	struct mlxsw_sp_rif *rif;
6834
6835	switch (event) {
6836	case NETDEV_UP:
6837		rif = mlxsw_sp_rif_create(mlxsw_sp, &params, extack);
6838		if (IS_ERR(rif))
6839			return PTR_ERR(rif);
6840		break;
6841	case NETDEV_DOWN:
6842		rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
6843		mlxsw_sp_rif_destroy(rif);
6844		break;
6845	}
6846
6847	return 0;
6848}
6849
6850static int mlxsw_sp_inetaddr_vlan_event(struct mlxsw_sp *mlxsw_sp,
6851					struct net_device *vlan_dev,
6852					unsigned long event,
6853					struct netlink_ext_ack *extack)
6854{
6855	struct net_device *real_dev = vlan_dev_real_dev(vlan_dev);
6856	u16 vid = vlan_dev_vlan_id(vlan_dev);
6857
6858	if (netif_is_bridge_port(vlan_dev))
6859		return 0;
6860
6861	if (mlxsw_sp_port_dev_check(real_dev))
6862		return mlxsw_sp_inetaddr_port_vlan_event(vlan_dev, real_dev,
6863							 event, vid, extack);
6864	else if (netif_is_lag_master(real_dev))
6865		return __mlxsw_sp_inetaddr_lag_event(vlan_dev, real_dev, event,
6866						     vid, extack);
6867	else if (netif_is_bridge_master(real_dev) && br_vlan_enabled(real_dev))
6868		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, vlan_dev, event,
6869						      extack);
6870
6871	return 0;
6872}
6873
6874static bool mlxsw_sp_rif_macvlan_is_vrrp4(const u8 *mac)
6875{
6876	u8 vrrp4[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x01, 0x00 };
6877	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6878
6879	return ether_addr_equal_masked(mac, vrrp4, mask);
6880}
6881
6882static bool mlxsw_sp_rif_macvlan_is_vrrp6(const u8 *mac)
6883{
6884	u8 vrrp6[ETH_ALEN] = { 0x00, 0x00, 0x5e, 0x00, 0x02, 0x00 };
6885	u8 mask[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0x00 };
6886
6887	return ether_addr_equal_masked(mac, vrrp6, mask);
6888}
6889
6890static int mlxsw_sp_rif_vrrp_op(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
6891				const u8 *mac, bool adding)
6892{
6893	char ritr_pl[MLXSW_REG_RITR_LEN];
6894	u8 vrrp_id = adding ? mac[5] : 0;
6895	int err;
6896
6897	if (!mlxsw_sp_rif_macvlan_is_vrrp4(mac) &&
6898	    !mlxsw_sp_rif_macvlan_is_vrrp6(mac))
6899		return 0;
6900
6901	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
6902	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6903	if (err)
6904		return err;
6905
6906	if (mlxsw_sp_rif_macvlan_is_vrrp4(mac))
6907		mlxsw_reg_ritr_if_vrrp_id_ipv4_set(ritr_pl, vrrp_id);
6908	else
6909		mlxsw_reg_ritr_if_vrrp_id_ipv6_set(ritr_pl, vrrp_id);
6910
6911	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
6912}
6913
6914static int mlxsw_sp_rif_macvlan_add(struct mlxsw_sp *mlxsw_sp,
6915				    const struct net_device *macvlan_dev,
6916				    struct netlink_ext_ack *extack)
6917{
6918	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6919	struct mlxsw_sp_rif *rif;
6920	int err;
6921
6922	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6923	if (!rif) {
6924		NL_SET_ERR_MSG_MOD(extack, "macvlan is only supported on top of router interfaces");
6925		return -EOPNOTSUPP;
6926	}
6927
6928	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6929				  mlxsw_sp_fid_index(rif->fid), true);
6930	if (err)
6931		return err;
6932
6933	err = mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index,
6934				   macvlan_dev->dev_addr, true);
6935	if (err)
6936		goto err_rif_vrrp_add;
6937
6938	/* Make sure the bridge driver does not have this MAC pointing at
6939	 * some other port.
6940	 */
6941	if (rif->ops->fdb_del)
6942		rif->ops->fdb_del(rif, macvlan_dev->dev_addr);
6943
6944	return 0;
6945
6946err_rif_vrrp_add:
6947	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6948			    mlxsw_sp_fid_index(rif->fid), false);
6949	return err;
6950}
6951
6952static void __mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6953				       const struct net_device *macvlan_dev)
6954{
6955	struct macvlan_dev *vlan = netdev_priv(macvlan_dev);
6956	struct mlxsw_sp_rif *rif;
6957
6958	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, vlan->lowerdev);
6959	/* If we do not have a RIF, then we already took care of
6960	 * removing the macvlan's MAC during RIF deletion.
6961	 */
6962	if (!rif)
6963		return;
6964	mlxsw_sp_rif_vrrp_op(mlxsw_sp, rif->rif_index, macvlan_dev->dev_addr,
6965			     false);
6966	mlxsw_sp_rif_fdb_op(mlxsw_sp, macvlan_dev->dev_addr,
6967			    mlxsw_sp_fid_index(rif->fid), false);
6968}
6969
6970void mlxsw_sp_rif_macvlan_del(struct mlxsw_sp *mlxsw_sp,
6971			      const struct net_device *macvlan_dev)
6972{
6973	mutex_lock(&mlxsw_sp->router->lock);
6974	__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6975	mutex_unlock(&mlxsw_sp->router->lock);
6976}
6977
6978static int mlxsw_sp_inetaddr_macvlan_event(struct mlxsw_sp *mlxsw_sp,
6979					   struct net_device *macvlan_dev,
6980					   unsigned long event,
6981					   struct netlink_ext_ack *extack)
6982{
6983	switch (event) {
6984	case NETDEV_UP:
6985		return mlxsw_sp_rif_macvlan_add(mlxsw_sp, macvlan_dev, extack);
6986	case NETDEV_DOWN:
6987		__mlxsw_sp_rif_macvlan_del(mlxsw_sp, macvlan_dev);
6988		break;
6989	}
6990
6991	return 0;
6992}
6993
6994static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp,
6995					       struct net_device *dev,
6996					       const unsigned char *dev_addr,
6997					       struct netlink_ext_ack *extack)
6998{
6999	struct mlxsw_sp_rif *rif;
7000	int i;
7001
7002	/* A RIF is not created for macvlan netdevs. Their MAC is used to
7003	 * populate the FDB
7004	 */
7005	if (netif_is_macvlan(dev) || netif_is_l3_master(dev))
7006		return 0;
7007
7008	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) {
7009		rif = mlxsw_sp->router->rifs[i];
7010		if (rif && rif->ops &&
7011		    rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB)
7012			continue;
7013		if (rif && rif->dev && rif->dev != dev &&
7014		    !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr,
7015					     mlxsw_sp->mac_mask)) {
7016			NL_SET_ERR_MSG_MOD(extack, "All router interface MAC addresses must have the same prefix");
7017			return -EINVAL;
7018		}
7019	}
7020
7021	return 0;
7022}
7023
7024static int __mlxsw_sp_inetaddr_event(struct mlxsw_sp *mlxsw_sp,
7025				     struct net_device *dev,
7026				     unsigned long event,
7027				     struct netlink_ext_ack *extack)
7028{
7029	if (mlxsw_sp_port_dev_check(dev))
7030		return mlxsw_sp_inetaddr_port_event(dev, event, extack);
7031	else if (netif_is_lag_master(dev))
7032		return mlxsw_sp_inetaddr_lag_event(dev, event, extack);
7033	else if (netif_is_bridge_master(dev))
7034		return mlxsw_sp_inetaddr_bridge_event(mlxsw_sp, dev, event,
7035						      extack);
7036	else if (is_vlan_dev(dev))
7037		return mlxsw_sp_inetaddr_vlan_event(mlxsw_sp, dev, event,
7038						    extack);
7039	else if (netif_is_macvlan(dev))
7040		return mlxsw_sp_inetaddr_macvlan_event(mlxsw_sp, dev, event,
7041						       extack);
7042	else
7043		return 0;
7044}
7045
7046static int mlxsw_sp_inetaddr_event(struct notifier_block *nb,
7047				   unsigned long event, void *ptr)
7048{
7049	struct in_ifaddr *ifa = (struct in_ifaddr *) ptr;
7050	struct net_device *dev = ifa->ifa_dev->dev;
7051	struct mlxsw_sp_router *router;
7052	struct mlxsw_sp_rif *rif;
7053	int err = 0;
7054
7055	/* NETDEV_UP event is handled by mlxsw_sp_inetaddr_valid_event */
7056	if (event == NETDEV_UP)
7057		return NOTIFY_DONE;
7058
7059	router = container_of(nb, struct mlxsw_sp_router, inetaddr_nb);
7060	mutex_lock(&router->lock);
7061	rif = mlxsw_sp_rif_find_by_dev(router->mlxsw_sp, dev);
7062	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7063		goto out;
7064
7065	err = __mlxsw_sp_inetaddr_event(router->mlxsw_sp, dev, event, NULL);
7066out:
7067	mutex_unlock(&router->lock);
7068	return notifier_from_errno(err);
7069}
7070
7071int mlxsw_sp_inetaddr_valid_event(struct notifier_block *unused,
7072				  unsigned long event, void *ptr)
7073{
7074	struct in_validator_info *ivi = (struct in_validator_info *) ptr;
7075	struct net_device *dev = ivi->ivi_dev->dev;
7076	struct mlxsw_sp *mlxsw_sp;
7077	struct mlxsw_sp_rif *rif;
7078	int err = 0;
7079
7080	mlxsw_sp = mlxsw_sp_lower_get(dev);
7081	if (!mlxsw_sp)
7082		return NOTIFY_DONE;
7083
7084	mutex_lock(&mlxsw_sp->router->lock);
7085	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7086	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7087		goto out;
7088
7089	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7090						  ivi->extack);
7091	if (err)
7092		goto out;
7093
7094	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, ivi->extack);
7095out:
7096	mutex_unlock(&mlxsw_sp->router->lock);
7097	return notifier_from_errno(err);
7098}
7099
7100struct mlxsw_sp_inet6addr_event_work {
7101	struct work_struct work;
7102	struct mlxsw_sp *mlxsw_sp;
7103	struct net_device *dev;
7104	unsigned long event;
7105};
7106
7107static void mlxsw_sp_inet6addr_event_work(struct work_struct *work)
7108{
7109	struct mlxsw_sp_inet6addr_event_work *inet6addr_work =
7110		container_of(work, struct mlxsw_sp_inet6addr_event_work, work);
7111	struct mlxsw_sp *mlxsw_sp = inet6addr_work->mlxsw_sp;
7112	struct net_device *dev = inet6addr_work->dev;
7113	unsigned long event = inet6addr_work->event;
7114	struct mlxsw_sp_rif *rif;
7115
7116	rtnl_lock();
7117	mutex_lock(&mlxsw_sp->router->lock);
7118
7119	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7120	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7121		goto out;
7122
7123	__mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, NULL);
7124out:
7125	mutex_unlock(&mlxsw_sp->router->lock);
7126	rtnl_unlock();
7127	dev_put(dev);
7128	kfree(inet6addr_work);
7129}
7130
7131/* Called with rcu_read_lock() */
7132static int mlxsw_sp_inet6addr_event(struct notifier_block *nb,
7133				    unsigned long event, void *ptr)
7134{
7135	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *) ptr;
7136	struct mlxsw_sp_inet6addr_event_work *inet6addr_work;
7137	struct net_device *dev = if6->idev->dev;
7138	struct mlxsw_sp_router *router;
7139
7140	/* NETDEV_UP event is handled by mlxsw_sp_inet6addr_valid_event */
7141	if (event == NETDEV_UP)
7142		return NOTIFY_DONE;
7143
7144	inet6addr_work = kzalloc(sizeof(*inet6addr_work), GFP_ATOMIC);
7145	if (!inet6addr_work)
7146		return NOTIFY_BAD;
7147
7148	router = container_of(nb, struct mlxsw_sp_router, inet6addr_nb);
7149	INIT_WORK(&inet6addr_work->work, mlxsw_sp_inet6addr_event_work);
7150	inet6addr_work->mlxsw_sp = router->mlxsw_sp;
7151	inet6addr_work->dev = dev;
7152	inet6addr_work->event = event;
7153	dev_hold(dev);
7154	mlxsw_core_schedule_work(&inet6addr_work->work);
7155
7156	return NOTIFY_DONE;
7157}
7158
7159int mlxsw_sp_inet6addr_valid_event(struct notifier_block *unused,
7160				   unsigned long event, void *ptr)
7161{
7162	struct in6_validator_info *i6vi = (struct in6_validator_info *) ptr;
7163	struct net_device *dev = i6vi->i6vi_dev->dev;
7164	struct mlxsw_sp *mlxsw_sp;
7165	struct mlxsw_sp_rif *rif;
7166	int err = 0;
7167
7168	mlxsw_sp = mlxsw_sp_lower_get(dev);
7169	if (!mlxsw_sp)
7170		return NOTIFY_DONE;
7171
7172	mutex_lock(&mlxsw_sp->router->lock);
7173	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7174	if (!mlxsw_sp_rif_should_config(rif, dev, event))
7175		goto out;
7176
7177	err = mlxsw_sp_router_port_check_rif_addr(mlxsw_sp, dev, dev->dev_addr,
7178						  i6vi->extack);
7179	if (err)
7180		goto out;
7181
7182	err = __mlxsw_sp_inetaddr_event(mlxsw_sp, dev, event, i6vi->extack);
7183out:
7184	mutex_unlock(&mlxsw_sp->router->lock);
7185	return notifier_from_errno(err);
7186}
7187
7188static int mlxsw_sp_rif_edit(struct mlxsw_sp *mlxsw_sp, u16 rif_index,
7189			     const char *mac, int mtu)
7190{
7191	char ritr_pl[MLXSW_REG_RITR_LEN];
7192	int err;
7193
7194	mlxsw_reg_ritr_rif_pack(ritr_pl, rif_index);
7195	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7196	if (err)
7197		return err;
7198
7199	mlxsw_reg_ritr_mtu_set(ritr_pl, mtu);
7200	mlxsw_reg_ritr_if_mac_memcpy_to(ritr_pl, mac);
7201	mlxsw_reg_ritr_op_set(ritr_pl, MLXSW_REG_RITR_RIF_CREATE);
7202	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7203}
7204
7205static int
7206mlxsw_sp_router_port_change_event(struct mlxsw_sp *mlxsw_sp,
7207				  struct mlxsw_sp_rif *rif)
7208{
7209	struct net_device *dev = rif->dev;
7210	u16 fid_index;
7211	int err;
7212
7213	fid_index = mlxsw_sp_fid_index(rif->fid);
7214
7215	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, false);
7216	if (err)
7217		return err;
7218
7219	err = mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, dev->dev_addr,
7220				dev->mtu);
7221	if (err)
7222		goto err_rif_edit;
7223
7224	err = mlxsw_sp_rif_fdb_op(mlxsw_sp, dev->dev_addr, fid_index, true);
7225	if (err)
7226		goto err_rif_fdb_op;
7227
7228	if (rif->mtu != dev->mtu) {
7229		struct mlxsw_sp_vr *vr;
7230		int i;
7231
7232		/* The RIF is relevant only to its mr_table instance, as unlike
7233		 * unicast routing, in multicast routing a RIF cannot be shared
7234		 * between several multicast routing tables.
7235		 */
7236		vr = &mlxsw_sp->router->vrs[rif->vr_id];
7237		for (i = 0; i < MLXSW_SP_L3_PROTO_MAX; i++)
7238			mlxsw_sp_mr_rif_mtu_update(vr->mr_table[i],
7239						   rif, dev->mtu);
7240	}
7241
7242	ether_addr_copy(rif->addr, dev->dev_addr);
7243	rif->mtu = dev->mtu;
7244
7245	netdev_dbg(dev, "Updated RIF=%d\n", rif->rif_index);
7246
7247	return 0;
7248
7249err_rif_fdb_op:
7250	mlxsw_sp_rif_edit(mlxsw_sp, rif->rif_index, rif->addr, rif->mtu);
7251err_rif_edit:
7252	mlxsw_sp_rif_fdb_op(mlxsw_sp, rif->addr, fid_index, true);
7253	return err;
7254}
7255
7256static int mlxsw_sp_router_port_pre_changeaddr_event(struct mlxsw_sp_rif *rif,
7257			    struct netdev_notifier_pre_changeaddr_info *info)
7258{
7259	struct netlink_ext_ack *extack;
7260
7261	extack = netdev_notifier_info_to_extack(&info->info);
7262	return mlxsw_sp_router_port_check_rif_addr(rif->mlxsw_sp, rif->dev,
7263						   info->dev_addr, extack);
7264}
7265
7266int mlxsw_sp_netdevice_router_port_event(struct net_device *dev,
7267					 unsigned long event, void *ptr)
7268{
7269	struct mlxsw_sp *mlxsw_sp;
7270	struct mlxsw_sp_rif *rif;
7271	int err = 0;
7272
7273	mlxsw_sp = mlxsw_sp_lower_get(dev);
7274	if (!mlxsw_sp)
7275		return 0;
7276
7277	mutex_lock(&mlxsw_sp->router->lock);
7278	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, dev);
7279	if (!rif)
7280		goto out;
7281
7282	switch (event) {
7283	case NETDEV_CHANGEMTU:
7284	case NETDEV_CHANGEADDR:
7285		err = mlxsw_sp_router_port_change_event(mlxsw_sp, rif);
7286		break;
7287	case NETDEV_PRE_CHANGEADDR:
7288		err = mlxsw_sp_router_port_pre_changeaddr_event(rif, ptr);
7289		break;
7290	}
7291
7292out:
7293	mutex_unlock(&mlxsw_sp->router->lock);
7294	return err;
7295}
7296
7297static int mlxsw_sp_port_vrf_join(struct mlxsw_sp *mlxsw_sp,
7298				  struct net_device *l3_dev,
7299				  struct netlink_ext_ack *extack)
7300{
7301	struct mlxsw_sp_rif *rif;
7302
7303	/* If netdev is already associated with a RIF, then we need to
7304	 * destroy it and create a new one with the new virtual router ID.
7305	 */
7306	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7307	if (rif)
7308		__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN,
7309					  extack);
7310
7311	return __mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_UP, extack);
7312}
7313
7314static void mlxsw_sp_port_vrf_leave(struct mlxsw_sp *mlxsw_sp,
7315				    struct net_device *l3_dev)
7316{
7317	struct mlxsw_sp_rif *rif;
7318
7319	rif = mlxsw_sp_rif_find_by_dev(mlxsw_sp, l3_dev);
7320	if (!rif)
7321		return;
7322	__mlxsw_sp_inetaddr_event(mlxsw_sp, l3_dev, NETDEV_DOWN, NULL);
7323}
7324
7325int mlxsw_sp_netdevice_vrf_event(struct net_device *l3_dev, unsigned long event,
7326				 struct netdev_notifier_changeupper_info *info)
7327{
7328	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_lower_get(l3_dev);
7329	int err = 0;
7330
7331	/* We do not create a RIF for a macvlan, but only use it to
7332	 * direct more MAC addresses to the router.
7333	 */
7334	if (!mlxsw_sp || netif_is_macvlan(l3_dev))
7335		return 0;
7336
7337	mutex_lock(&mlxsw_sp->router->lock);
7338	switch (event) {
7339	case NETDEV_PRECHANGEUPPER:
7340		break;
7341	case NETDEV_CHANGEUPPER:
7342		if (info->linking) {
7343			struct netlink_ext_ack *extack;
7344
7345			extack = netdev_notifier_info_to_extack(&info->info);
7346			err = mlxsw_sp_port_vrf_join(mlxsw_sp, l3_dev, extack);
7347		} else {
7348			mlxsw_sp_port_vrf_leave(mlxsw_sp, l3_dev);
7349		}
7350		break;
7351	}
7352	mutex_unlock(&mlxsw_sp->router->lock);
7353
7354	return err;
7355}
7356
7357static int __mlxsw_sp_rif_macvlan_flush(struct net_device *dev,
7358					struct netdev_nested_priv *priv)
7359{
7360	struct mlxsw_sp_rif *rif = (struct mlxsw_sp_rif *)priv->data;
7361
7362	if (!netif_is_macvlan(dev))
7363		return 0;
7364
7365	return mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, dev->dev_addr,
7366				   mlxsw_sp_fid_index(rif->fid), false);
7367}
7368
7369static int mlxsw_sp_rif_macvlan_flush(struct mlxsw_sp_rif *rif)
7370{
7371	struct netdev_nested_priv priv = {
7372		.data = (void *)rif,
7373	};
7374
7375	if (!netif_is_macvlan_port(rif->dev))
7376		return 0;
7377
7378	netdev_warn(rif->dev, "Router interface is deleted. Upper macvlans will not work\n");
7379	return netdev_walk_all_upper_dev_rcu(rif->dev,
7380					     __mlxsw_sp_rif_macvlan_flush, &priv);
7381}
7382
7383static void mlxsw_sp_rif_subport_setup(struct mlxsw_sp_rif *rif,
7384				       const struct mlxsw_sp_rif_params *params)
7385{
7386	struct mlxsw_sp_rif_subport *rif_subport;
7387
7388	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7389	refcount_set(&rif_subport->ref_count, 1);
7390	rif_subport->vid = params->vid;
7391	rif_subport->lag = params->lag;
7392	if (params->lag)
7393		rif_subport->lag_id = params->lag_id;
7394	else
7395		rif_subport->system_port = params->system_port;
7396}
7397
7398static int mlxsw_sp_rif_subport_op(struct mlxsw_sp_rif *rif, bool enable)
7399{
7400	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7401	struct mlxsw_sp_rif_subport *rif_subport;
7402	char ritr_pl[MLXSW_REG_RITR_LEN];
7403
7404	rif_subport = mlxsw_sp_rif_subport_rif(rif);
7405	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_SP_IF,
7406			    rif->rif_index, rif->vr_id, rif->dev->mtu);
7407	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7408	mlxsw_reg_ritr_sp_if_pack(ritr_pl, rif_subport->lag,
7409				  rif_subport->lag ? rif_subport->lag_id :
7410						     rif_subport->system_port,
7411				  rif_subport->vid);
7412
7413	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7414}
7415
7416static int mlxsw_sp_rif_subport_configure(struct mlxsw_sp_rif *rif)
7417{
7418	int err;
7419
7420	err = mlxsw_sp_rif_subport_op(rif, true);
7421	if (err)
7422		return err;
7423
7424	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7425				  mlxsw_sp_fid_index(rif->fid), true);
7426	if (err)
7427		goto err_rif_fdb_op;
7428
7429	mlxsw_sp_fid_rif_set(rif->fid, rif);
7430	return 0;
7431
7432err_rif_fdb_op:
7433	mlxsw_sp_rif_subport_op(rif, false);
7434	return err;
7435}
7436
7437static void mlxsw_sp_rif_subport_deconfigure(struct mlxsw_sp_rif *rif)
7438{
7439	struct mlxsw_sp_fid *fid = rif->fid;
7440
7441	mlxsw_sp_fid_rif_set(fid, NULL);
7442	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7443			    mlxsw_sp_fid_index(fid), false);
7444	mlxsw_sp_rif_macvlan_flush(rif);
7445	mlxsw_sp_rif_subport_op(rif, false);
7446}
7447
7448static struct mlxsw_sp_fid *
7449mlxsw_sp_rif_subport_fid_get(struct mlxsw_sp_rif *rif,
7450			     struct netlink_ext_ack *extack)
7451{
7452	return mlxsw_sp_fid_rfid_get(rif->mlxsw_sp, rif->rif_index);
7453}
7454
7455static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_subport_ops = {
7456	.type			= MLXSW_SP_RIF_TYPE_SUBPORT,
7457	.rif_size		= sizeof(struct mlxsw_sp_rif_subport),
7458	.setup			= mlxsw_sp_rif_subport_setup,
7459	.configure		= mlxsw_sp_rif_subport_configure,
7460	.deconfigure		= mlxsw_sp_rif_subport_deconfigure,
7461	.fid_get		= mlxsw_sp_rif_subport_fid_get,
7462};
7463
7464static int mlxsw_sp_rif_vlan_fid_op(struct mlxsw_sp_rif *rif,
7465				    enum mlxsw_reg_ritr_if_type type,
7466				    u16 vid_fid, bool enable)
7467{
7468	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7469	char ritr_pl[MLXSW_REG_RITR_LEN];
7470
7471	mlxsw_reg_ritr_pack(ritr_pl, enable, type, rif->rif_index, rif->vr_id,
7472			    rif->dev->mtu);
7473	mlxsw_reg_ritr_mac_pack(ritr_pl, rif->dev->dev_addr);
7474	mlxsw_reg_ritr_fid_set(ritr_pl, type, vid_fid);
7475
7476	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7477}
7478
7479u8 mlxsw_sp_router_port(const struct mlxsw_sp *mlxsw_sp)
7480{
7481	return mlxsw_core_max_ports(mlxsw_sp->core) + 1;
7482}
7483
7484static int mlxsw_sp_rif_fid_configure(struct mlxsw_sp_rif *rif)
7485{
7486	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7487	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7488	int err;
7489
7490	err = mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index,
7491				       true);
7492	if (err)
7493		return err;
7494
7495	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7496				     mlxsw_sp_router_port(mlxsw_sp), true);
7497	if (err)
7498		goto err_fid_mc_flood_set;
7499
7500	err = mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7501				     mlxsw_sp_router_port(mlxsw_sp), true);
7502	if (err)
7503		goto err_fid_bc_flood_set;
7504
7505	err = mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7506				  mlxsw_sp_fid_index(rif->fid), true);
7507	if (err)
7508		goto err_rif_fdb_op;
7509
7510	mlxsw_sp_fid_rif_set(rif->fid, rif);
7511	return 0;
7512
7513err_rif_fdb_op:
7514	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7515			       mlxsw_sp_router_port(mlxsw_sp), false);
7516err_fid_bc_flood_set:
7517	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7518			       mlxsw_sp_router_port(mlxsw_sp), false);
7519err_fid_mc_flood_set:
7520	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7521	return err;
7522}
7523
7524static void mlxsw_sp_rif_fid_deconfigure(struct mlxsw_sp_rif *rif)
7525{
7526	u16 fid_index = mlxsw_sp_fid_index(rif->fid);
7527	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7528	struct mlxsw_sp_fid *fid = rif->fid;
7529
7530	mlxsw_sp_fid_rif_set(fid, NULL);
7531	mlxsw_sp_rif_fdb_op(rif->mlxsw_sp, rif->dev->dev_addr,
7532			    mlxsw_sp_fid_index(fid), false);
7533	mlxsw_sp_rif_macvlan_flush(rif);
7534	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_BC,
7535			       mlxsw_sp_router_port(mlxsw_sp), false);
7536	mlxsw_sp_fid_flood_set(rif->fid, MLXSW_SP_FLOOD_TYPE_MC,
7537			       mlxsw_sp_router_port(mlxsw_sp), false);
7538	mlxsw_sp_rif_vlan_fid_op(rif, MLXSW_REG_RITR_FID_IF, fid_index, false);
7539}
7540
7541static struct mlxsw_sp_fid *
7542mlxsw_sp_rif_fid_fid_get(struct mlxsw_sp_rif *rif,
7543			 struct netlink_ext_ack *extack)
7544{
7545	return mlxsw_sp_fid_8021d_get(rif->mlxsw_sp, rif->dev->ifindex);
7546}
7547
7548static void mlxsw_sp_rif_fid_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7549{
7550	struct switchdev_notifier_fdb_info info;
7551	struct net_device *dev;
7552
7553	dev = br_fdb_find_port(rif->dev, mac, 0);
7554	if (!dev)
7555		return;
7556
7557	info.addr = mac;
7558	info.vid = 0;
7559	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7560				 NULL);
7561}
7562
7563static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_fid_ops = {
7564	.type			= MLXSW_SP_RIF_TYPE_FID,
7565	.rif_size		= sizeof(struct mlxsw_sp_rif),
7566	.configure		= mlxsw_sp_rif_fid_configure,
7567	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7568	.fid_get		= mlxsw_sp_rif_fid_fid_get,
7569	.fdb_del		= mlxsw_sp_rif_fid_fdb_del,
7570};
7571
7572static struct mlxsw_sp_fid *
7573mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
7574			  struct netlink_ext_ack *extack)
7575{
7576	struct net_device *br_dev;
7577	u16 vid;
7578	int err;
7579
7580	if (is_vlan_dev(rif->dev)) {
7581		vid = vlan_dev_vlan_id(rif->dev);
7582		br_dev = vlan_dev_real_dev(rif->dev);
7583		if (WARN_ON(!netif_is_bridge_master(br_dev)))
7584			return ERR_PTR(-EINVAL);
7585	} else {
7586		err = br_vlan_get_pvid(rif->dev, &vid);
7587		if (err < 0 || !vid) {
7588			NL_SET_ERR_MSG_MOD(extack, "Couldn't determine bridge PVID");
7589			return ERR_PTR(-EINVAL);
7590		}
7591	}
7592
7593	return mlxsw_sp_fid_8021q_get(rif->mlxsw_sp, vid);
7594}
7595
7596static void mlxsw_sp_rif_vlan_fdb_del(struct mlxsw_sp_rif *rif, const char *mac)
7597{
7598	u16 vid = mlxsw_sp_fid_8021q_vid(rif->fid);
7599	struct switchdev_notifier_fdb_info info;
7600	struct net_device *br_dev;
7601	struct net_device *dev;
7602
7603	br_dev = is_vlan_dev(rif->dev) ? vlan_dev_real_dev(rif->dev) : rif->dev;
7604	dev = br_fdb_find_port(br_dev, mac, vid);
7605	if (!dev)
7606		return;
7607
7608	info.addr = mac;
7609	info.vid = vid;
7610	call_switchdev_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE, dev, &info.info,
7611				 NULL);
7612}
7613
7614static const struct mlxsw_sp_rif_ops mlxsw_sp_rif_vlan_emu_ops = {
7615	.type			= MLXSW_SP_RIF_TYPE_VLAN,
7616	.rif_size		= sizeof(struct mlxsw_sp_rif),
7617	.configure		= mlxsw_sp_rif_fid_configure,
7618	.deconfigure		= mlxsw_sp_rif_fid_deconfigure,
7619	.fid_get		= mlxsw_sp_rif_vlan_fid_get,
7620	.fdb_del		= mlxsw_sp_rif_vlan_fdb_del,
7621};
7622
7623static struct mlxsw_sp_rif_ipip_lb *
7624mlxsw_sp_rif_ipip_lb_rif(struct mlxsw_sp_rif *rif)
7625{
7626	return container_of(rif, struct mlxsw_sp_rif_ipip_lb, common);
7627}
7628
7629static void
7630mlxsw_sp_rif_ipip_lb_setup(struct mlxsw_sp_rif *rif,
7631			   const struct mlxsw_sp_rif_params *params)
7632{
7633	struct mlxsw_sp_rif_params_ipip_lb *params_lb;
7634	struct mlxsw_sp_rif_ipip_lb *rif_lb;
7635
7636	params_lb = container_of(params, struct mlxsw_sp_rif_params_ipip_lb,
7637				 common);
7638	rif_lb = mlxsw_sp_rif_ipip_lb_rif(rif);
7639	rif_lb->lb_config = params_lb->lb_config;
7640}
7641
7642static int
7643mlxsw_sp1_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7644{
7645	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7646	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7647	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7648	struct mlxsw_sp_vr *ul_vr;
7649	int err;
7650
7651	ul_vr = mlxsw_sp_vr_get(mlxsw_sp, ul_tb_id, NULL);
7652	if (IS_ERR(ul_vr))
7653		return PTR_ERR(ul_vr);
7654
7655	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, true);
7656	if (err)
7657		goto err_loopback_op;
7658
7659	lb_rif->ul_vr_id = ul_vr->id;
7660	lb_rif->ul_rif_id = 0;
7661	++ul_vr->rif_count;
7662	return 0;
7663
7664err_loopback_op:
7665	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7666	return err;
7667}
7668
7669static void mlxsw_sp1_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7670{
7671	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7672	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7673	struct mlxsw_sp_vr *ul_vr;
7674
7675	ul_vr = &mlxsw_sp->router->vrs[lb_rif->ul_vr_id];
7676	mlxsw_sp_rif_ipip_lb_op(lb_rif, ul_vr->id, 0, false);
7677
7678	--ul_vr->rif_count;
7679	mlxsw_sp_vr_put(mlxsw_sp, ul_vr);
7680}
7681
7682static const struct mlxsw_sp_rif_ops mlxsw_sp1_rif_ipip_lb_ops = {
7683	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7684	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7685	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7686	.configure		= mlxsw_sp1_rif_ipip_lb_configure,
7687	.deconfigure		= mlxsw_sp1_rif_ipip_lb_deconfigure,
7688};
7689
7690const struct mlxsw_sp_rif_ops *mlxsw_sp1_rif_ops_arr[] = {
7691	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7692	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7693	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7694	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp1_rif_ipip_lb_ops,
7695};
7696
7697static int
7698mlxsw_sp_rif_ipip_lb_ul_rif_op(struct mlxsw_sp_rif *ul_rif, bool enable)
7699{
7700	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7701	char ritr_pl[MLXSW_REG_RITR_LEN];
7702
7703	mlxsw_reg_ritr_pack(ritr_pl, enable, MLXSW_REG_RITR_LOOPBACK_IF,
7704			    ul_rif->rif_index, ul_rif->vr_id, IP_MAX_MTU);
7705	mlxsw_reg_ritr_loopback_protocol_set(ritr_pl,
7706					     MLXSW_REG_RITR_LOOPBACK_GENERIC);
7707
7708	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ritr), ritr_pl);
7709}
7710
7711static struct mlxsw_sp_rif *
7712mlxsw_sp_ul_rif_create(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr,
7713		       struct netlink_ext_ack *extack)
7714{
7715	struct mlxsw_sp_rif *ul_rif;
7716	u16 rif_index;
7717	int err;
7718
7719	err = mlxsw_sp_rif_index_alloc(mlxsw_sp, &rif_index);
7720	if (err) {
7721		NL_SET_ERR_MSG_MOD(extack, "Exceeded number of supported router interfaces");
7722		return ERR_PTR(err);
7723	}
7724
7725	ul_rif = mlxsw_sp_rif_alloc(sizeof(*ul_rif), rif_index, vr->id, NULL);
7726	if (!ul_rif)
7727		return ERR_PTR(-ENOMEM);
7728
7729	mlxsw_sp->router->rifs[rif_index] = ul_rif;
7730	ul_rif->mlxsw_sp = mlxsw_sp;
7731	err = mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, true);
7732	if (err)
7733		goto ul_rif_op_err;
7734
7735	return ul_rif;
7736
7737ul_rif_op_err:
7738	mlxsw_sp->router->rifs[rif_index] = NULL;
7739	kfree(ul_rif);
7740	return ERR_PTR(err);
7741}
7742
7743static void mlxsw_sp_ul_rif_destroy(struct mlxsw_sp_rif *ul_rif)
7744{
7745	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7746
7747	mlxsw_sp_rif_ipip_lb_ul_rif_op(ul_rif, false);
7748	mlxsw_sp->router->rifs[ul_rif->rif_index] = NULL;
7749	kfree(ul_rif);
7750}
7751
7752static struct mlxsw_sp_rif *
7753mlxsw_sp_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 tb_id,
7754		    struct netlink_ext_ack *extack)
7755{
7756	struct mlxsw_sp_vr *vr;
7757	int err;
7758
7759	vr = mlxsw_sp_vr_get(mlxsw_sp, tb_id, extack);
7760	if (IS_ERR(vr))
7761		return ERR_CAST(vr);
7762
7763	if (refcount_inc_not_zero(&vr->ul_rif_refcnt))
7764		return vr->ul_rif;
7765
7766	vr->ul_rif = mlxsw_sp_ul_rif_create(mlxsw_sp, vr, extack);
7767	if (IS_ERR(vr->ul_rif)) {
7768		err = PTR_ERR(vr->ul_rif);
7769		goto err_ul_rif_create;
7770	}
7771
7772	vr->rif_count++;
7773	refcount_set(&vr->ul_rif_refcnt, 1);
7774
7775	return vr->ul_rif;
7776
7777err_ul_rif_create:
7778	mlxsw_sp_vr_put(mlxsw_sp, vr);
7779	return ERR_PTR(err);
7780}
7781
7782static void mlxsw_sp_ul_rif_put(struct mlxsw_sp_rif *ul_rif)
7783{
7784	struct mlxsw_sp *mlxsw_sp = ul_rif->mlxsw_sp;
7785	struct mlxsw_sp_vr *vr;
7786
7787	vr = &mlxsw_sp->router->vrs[ul_rif->vr_id];
7788
7789	if (!refcount_dec_and_test(&vr->ul_rif_refcnt))
7790		return;
7791
7792	vr->rif_count--;
7793	mlxsw_sp_ul_rif_destroy(ul_rif);
7794	mlxsw_sp_vr_put(mlxsw_sp, vr);
7795}
7796
7797int mlxsw_sp_router_ul_rif_get(struct mlxsw_sp *mlxsw_sp, u32 ul_tb_id,
7798			       u16 *ul_rif_index)
7799{
7800	struct mlxsw_sp_rif *ul_rif;
7801	int err = 0;
7802
7803	mutex_lock(&mlxsw_sp->router->lock);
7804	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7805	if (IS_ERR(ul_rif)) {
7806		err = PTR_ERR(ul_rif);
7807		goto out;
7808	}
7809	*ul_rif_index = ul_rif->rif_index;
7810out:
7811	mutex_unlock(&mlxsw_sp->router->lock);
7812	return err;
7813}
7814
7815void mlxsw_sp_router_ul_rif_put(struct mlxsw_sp *mlxsw_sp, u16 ul_rif_index)
7816{
7817	struct mlxsw_sp_rif *ul_rif;
7818
7819	mutex_lock(&mlxsw_sp->router->lock);
7820	ul_rif = mlxsw_sp->router->rifs[ul_rif_index];
7821	if (WARN_ON(!ul_rif))
7822		goto out;
7823
7824	mlxsw_sp_ul_rif_put(ul_rif);
7825out:
7826	mutex_unlock(&mlxsw_sp->router->lock);
7827}
7828
7829static int
7830mlxsw_sp2_rif_ipip_lb_configure(struct mlxsw_sp_rif *rif)
7831{
7832	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7833	u32 ul_tb_id = mlxsw_sp_ipip_dev_ul_tb_id(rif->dev);
7834	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7835	struct mlxsw_sp_rif *ul_rif;
7836	int err;
7837
7838	ul_rif = mlxsw_sp_ul_rif_get(mlxsw_sp, ul_tb_id, NULL);
7839	if (IS_ERR(ul_rif))
7840		return PTR_ERR(ul_rif);
7841
7842	err = mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, ul_rif->rif_index, true);
7843	if (err)
7844		goto err_loopback_op;
7845
7846	lb_rif->ul_vr_id = 0;
7847	lb_rif->ul_rif_id = ul_rif->rif_index;
7848
7849	return 0;
7850
7851err_loopback_op:
7852	mlxsw_sp_ul_rif_put(ul_rif);
7853	return err;
7854}
7855
7856static void mlxsw_sp2_rif_ipip_lb_deconfigure(struct mlxsw_sp_rif *rif)
7857{
7858	struct mlxsw_sp_rif_ipip_lb *lb_rif = mlxsw_sp_rif_ipip_lb_rif(rif);
7859	struct mlxsw_sp *mlxsw_sp = rif->mlxsw_sp;
7860	struct mlxsw_sp_rif *ul_rif;
7861
7862	ul_rif = mlxsw_sp_rif_by_index(mlxsw_sp, lb_rif->ul_rif_id);
7863	mlxsw_sp_rif_ipip_lb_op(lb_rif, 0, lb_rif->ul_rif_id, false);
7864	mlxsw_sp_ul_rif_put(ul_rif);
7865}
7866
7867static const struct mlxsw_sp_rif_ops mlxsw_sp2_rif_ipip_lb_ops = {
7868	.type			= MLXSW_SP_RIF_TYPE_IPIP_LB,
7869	.rif_size		= sizeof(struct mlxsw_sp_rif_ipip_lb),
7870	.setup                  = mlxsw_sp_rif_ipip_lb_setup,
7871	.configure		= mlxsw_sp2_rif_ipip_lb_configure,
7872	.deconfigure		= mlxsw_sp2_rif_ipip_lb_deconfigure,
7873};
7874
7875const struct mlxsw_sp_rif_ops *mlxsw_sp2_rif_ops_arr[] = {
7876	[MLXSW_SP_RIF_TYPE_SUBPORT]	= &mlxsw_sp_rif_subport_ops,
7877	[MLXSW_SP_RIF_TYPE_VLAN]	= &mlxsw_sp_rif_vlan_emu_ops,
7878	[MLXSW_SP_RIF_TYPE_FID]		= &mlxsw_sp_rif_fid_ops,
7879	[MLXSW_SP_RIF_TYPE_IPIP_LB]	= &mlxsw_sp2_rif_ipip_lb_ops,
7880};
7881
7882static int mlxsw_sp_rifs_init(struct mlxsw_sp *mlxsw_sp)
7883{
7884	u64 max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
7885
7886	mlxsw_sp->router->rifs = kcalloc(max_rifs,
7887					 sizeof(struct mlxsw_sp_rif *),
7888					 GFP_KERNEL);
7889	if (!mlxsw_sp->router->rifs)
7890		return -ENOMEM;
7891
7892	return 0;
7893}
7894
7895static void mlxsw_sp_rifs_fini(struct mlxsw_sp *mlxsw_sp)
7896{
7897	int i;
7898
7899	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
7900		WARN_ON_ONCE(mlxsw_sp->router->rifs[i]);
7901
7902	kfree(mlxsw_sp->router->rifs);
7903}
7904
7905static int
7906mlxsw_sp_ipip_config_tigcr(struct mlxsw_sp *mlxsw_sp)
7907{
7908	char tigcr_pl[MLXSW_REG_TIGCR_LEN];
7909
7910	mlxsw_reg_tigcr_pack(tigcr_pl, true, 0);
7911	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(tigcr), tigcr_pl);
7912}
7913
7914static int mlxsw_sp_ipips_init(struct mlxsw_sp *mlxsw_sp)
7915{
7916	int err;
7917
7918	mlxsw_sp->router->ipip_ops_arr = mlxsw_sp_ipip_ops_arr;
7919	INIT_LIST_HEAD(&mlxsw_sp->router->ipip_list);
7920
7921	err = mlxsw_sp_ipip_ecn_encap_init(mlxsw_sp);
7922	if (err)
7923		return err;
7924	err = mlxsw_sp_ipip_ecn_decap_init(mlxsw_sp);
7925	if (err)
7926		return err;
7927
7928	return mlxsw_sp_ipip_config_tigcr(mlxsw_sp);
7929}
7930
7931static void mlxsw_sp_ipips_fini(struct mlxsw_sp *mlxsw_sp)
7932{
7933	WARN_ON(!list_empty(&mlxsw_sp->router->ipip_list));
7934}
7935
7936static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
7937{
7938	struct mlxsw_sp_router *router;
7939
7940	/* Flush pending FIB notifications and then flush the device's
7941	 * table before requesting another dump. The FIB notification
7942	 * block is unregistered, so no need to take RTNL.
7943	 */
7944	mlxsw_core_flush_owq();
7945	router = container_of(nb, struct mlxsw_sp_router, fib_nb);
7946	mlxsw_sp_router_fib_flush(router->mlxsw_sp);
7947}
7948
7949#ifdef CONFIG_IP_ROUTE_MULTIPATH
7950static void mlxsw_sp_mp_hash_header_set(char *recr2_pl, int header)
7951{
7952	mlxsw_reg_recr2_outer_header_enables_set(recr2_pl, header, true);
7953}
7954
7955static void mlxsw_sp_mp_hash_field_set(char *recr2_pl, int field)
7956{
7957	mlxsw_reg_recr2_outer_header_fields_enable_set(recr2_pl, field, true);
7958}
7959
7960static void mlxsw_sp_mp4_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
7961{
7962	struct net *net = mlxsw_sp_net(mlxsw_sp);
7963	bool only_l3 = !net->ipv4.sysctl_fib_multipath_hash_policy;
7964
7965	mlxsw_sp_mp_hash_header_set(recr2_pl,
7966				    MLXSW_REG_RECR2_IPV4_EN_NOT_TCP_NOT_UDP);
7967	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV4_EN_TCP_UDP);
7968	mlxsw_reg_recr2_ipv4_sip_enable(recr2_pl);
7969	mlxsw_reg_recr2_ipv4_dip_enable(recr2_pl);
7970	if (only_l3)
7971		return;
7972	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_EN_IPV4);
7973	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV4_PROTOCOL);
7974	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_SPORT);
7975	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_TCP_UDP_DPORT);
7976}
7977
7978static void mlxsw_sp_mp6_hash_init(struct mlxsw_sp *mlxsw_sp, char *recr2_pl)
7979{
7980	bool only_l3 = !ip6_multipath_hash_policy(mlxsw_sp_net(mlxsw_sp));
7981
7982	mlxsw_sp_mp_hash_header_set(recr2_pl,
7983				    MLXSW_REG_RECR2_IPV6_EN_NOT_TCP_NOT_UDP);
7984	mlxsw_sp_mp_hash_header_set(recr2_pl, MLXSW_REG_RECR2_IPV6_EN_TCP_UDP);
7985	mlxsw_reg_recr2_ipv6_sip_enable(recr2_pl);
7986	mlxsw_reg_recr2_ipv6_dip_enable(recr2_pl);
7987	mlxsw_sp_mp_hash_field_set(recr2_pl, MLXSW_REG_RECR2_IPV6_NEXT_HEADER);
7988	if (only_l3) {
7989		mlxsw_sp_mp_hash_field_set(recr2_pl,
7990					   MLXSW_REG_RECR2_IPV6_FLOW_LABEL);
7991	} else {
7992		mlxsw_sp_mp_hash_header_set(recr2_pl,
7993					    MLXSW_REG_RECR2_TCP_UDP_EN_IPV6);
7994		mlxsw_sp_mp_hash_field_set(recr2_pl,
7995					   MLXSW_REG_RECR2_TCP_UDP_SPORT);
7996		mlxsw_sp_mp_hash_field_set(recr2_pl,
7997					   MLXSW_REG_RECR2_TCP_UDP_DPORT);
7998	}
7999}
8000
8001static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
8002{
8003	char recr2_pl[MLXSW_REG_RECR2_LEN];
8004	u32 seed;
8005
8006	seed = jhash(mlxsw_sp->base_mac, sizeof(mlxsw_sp->base_mac), 0);
8007	mlxsw_reg_recr2_pack(recr2_pl, seed);
8008	mlxsw_sp_mp4_hash_init(mlxsw_sp, recr2_pl);
8009	mlxsw_sp_mp6_hash_init(mlxsw_sp, recr2_pl);
8010
8011	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(recr2), recr2_pl);
8012}
8013#else
8014static int mlxsw_sp_mp_hash_init(struct mlxsw_sp *mlxsw_sp)
8015{
8016	return 0;
8017}
8018#endif
8019
8020static int mlxsw_sp_dscp_init(struct mlxsw_sp *mlxsw_sp)
8021{
8022	char rdpm_pl[MLXSW_REG_RDPM_LEN];
8023	unsigned int i;
8024
8025	MLXSW_REG_ZERO(rdpm, rdpm_pl);
8026
8027	/* HW is determining switch priority based on DSCP-bits, but the
8028	 * kernel is still doing that based on the ToS. Since there's a
8029	 * mismatch in bits we need to make sure to translate the right
8030	 * value ToS would observe, skipping the 2 least-significant ECN bits.
8031	 */
8032	for (i = 0; i < MLXSW_REG_RDPM_DSCP_ENTRY_REC_MAX_COUNT; i++)
8033		mlxsw_reg_rdpm_pack(rdpm_pl, i, rt_tos2priority(i << 2));
8034
8035	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rdpm), rdpm_pl);
8036}
8037
8038static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
8039{
8040	struct net *net = mlxsw_sp_net(mlxsw_sp);
8041	char rgcr_pl[MLXSW_REG_RGCR_LEN];
8042	u64 max_rifs;
8043	bool usp;
8044
8045	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
8046		return -EIO;
8047	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
8048	usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
8049
8050	mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
8051	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
8052	mlxsw_reg_rgcr_usp_set(rgcr_pl, usp);
8053	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
8054}
8055
8056static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8057{
8058	char rgcr_pl[MLXSW_REG_RGCR_LEN];
8059
8060	mlxsw_reg_rgcr_pack(rgcr_pl, false, false);
8061	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
8062}
8063
8064int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp,
8065			 struct netlink_ext_ack *extack)
8066{
8067	struct mlxsw_sp_router *router;
8068	int err;
8069
8070	router = kzalloc(sizeof(*mlxsw_sp->router), GFP_KERNEL);
8071	if (!router)
8072		return -ENOMEM;
8073	mutex_init(&router->lock);
8074	mlxsw_sp->router = router;
8075	router->mlxsw_sp = mlxsw_sp;
8076
8077	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_neighs_list);
8078	err = __mlxsw_sp_router_init(mlxsw_sp);
8079	if (err)
8080		goto err_router_init;
8081
8082	err = mlxsw_sp_rifs_init(mlxsw_sp);
8083	if (err)
8084		goto err_rifs_init;
8085
8086	err = mlxsw_sp_ipips_init(mlxsw_sp);
8087	if (err)
8088		goto err_ipips_init;
8089
8090	err = rhashtable_init(&mlxsw_sp->router->nexthop_ht,
8091			      &mlxsw_sp_nexthop_ht_params);
8092	if (err)
8093		goto err_nexthop_ht_init;
8094
8095	err = rhashtable_init(&mlxsw_sp->router->nexthop_group_ht,
8096			      &mlxsw_sp_nexthop_group_ht_params);
8097	if (err)
8098		goto err_nexthop_group_ht_init;
8099
8100	INIT_LIST_HEAD(&mlxsw_sp->router->nexthop_list);
8101	err = mlxsw_sp_lpm_init(mlxsw_sp);
8102	if (err)
8103		goto err_lpm_init;
8104
8105	err = mlxsw_sp_mr_init(mlxsw_sp, &mlxsw_sp_mr_tcam_ops);
8106	if (err)
8107		goto err_mr_init;
8108
8109	err = mlxsw_sp_vrs_init(mlxsw_sp);
8110	if (err)
8111		goto err_vrs_init;
8112
8113	err = mlxsw_sp_neigh_init(mlxsw_sp);
8114	if (err)
8115		goto err_neigh_init;
8116
8117	err = mlxsw_sp_mp_hash_init(mlxsw_sp);
8118	if (err)
8119		goto err_mp_hash_init;
8120
8121	err = mlxsw_sp_dscp_init(mlxsw_sp);
8122	if (err)
8123		goto err_dscp_init;
8124
8125	router->inetaddr_nb.notifier_call = mlxsw_sp_inetaddr_event;
8126	err = register_inetaddr_notifier(&router->inetaddr_nb);
8127	if (err)
8128		goto err_register_inetaddr_notifier;
8129
8130	router->inet6addr_nb.notifier_call = mlxsw_sp_inet6addr_event;
8131	err = register_inet6addr_notifier(&router->inet6addr_nb);
8132	if (err)
8133		goto err_register_inet6addr_notifier;
8134
8135	mlxsw_sp->router->netevent_nb.notifier_call =
8136		mlxsw_sp_router_netevent_event;
8137	err = register_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8138	if (err)
8139		goto err_register_netevent_notifier;
8140
8141	mlxsw_sp->router->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
8142	err = register_fib_notifier(mlxsw_sp_net(mlxsw_sp),
8143				    &mlxsw_sp->router->fib_nb,
8144				    mlxsw_sp_router_fib_dump_flush, extack);
8145	if (err)
8146		goto err_register_fib_notifier;
8147
8148	return 0;
8149
8150err_register_fib_notifier:
8151	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8152err_register_netevent_notifier:
8153	unregister_inet6addr_notifier(&router->inet6addr_nb);
8154err_register_inet6addr_notifier:
8155	unregister_inetaddr_notifier(&router->inetaddr_nb);
8156err_register_inetaddr_notifier:
8157	mlxsw_core_flush_owq();
8158err_dscp_init:
8159err_mp_hash_init:
8160	mlxsw_sp_neigh_fini(mlxsw_sp);
8161err_neigh_init:
8162	mlxsw_sp_vrs_fini(mlxsw_sp);
8163err_vrs_init:
8164	mlxsw_sp_mr_fini(mlxsw_sp);
8165err_mr_init:
8166	mlxsw_sp_lpm_fini(mlxsw_sp);
8167err_lpm_init:
8168	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8169err_nexthop_group_ht_init:
8170	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8171err_nexthop_ht_init:
8172	mlxsw_sp_ipips_fini(mlxsw_sp);
8173err_ipips_init:
8174	mlxsw_sp_rifs_fini(mlxsw_sp);
8175err_rifs_init:
8176	__mlxsw_sp_router_fini(mlxsw_sp);
8177err_router_init:
8178	mutex_destroy(&mlxsw_sp->router->lock);
8179	kfree(mlxsw_sp->router);
8180	return err;
8181}
8182
8183void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
8184{
8185	unregister_fib_notifier(mlxsw_sp_net(mlxsw_sp),
8186				&mlxsw_sp->router->fib_nb);
8187	unregister_netevent_notifier(&mlxsw_sp->router->netevent_nb);
8188	unregister_inet6addr_notifier(&mlxsw_sp->router->inet6addr_nb);
8189	unregister_inetaddr_notifier(&mlxsw_sp->router->inetaddr_nb);
8190	mlxsw_core_flush_owq();
8191	mlxsw_sp_neigh_fini(mlxsw_sp);
8192	mlxsw_sp_vrs_fini(mlxsw_sp);
8193	mlxsw_sp_mr_fini(mlxsw_sp);
8194	mlxsw_sp_lpm_fini(mlxsw_sp);
8195	rhashtable_destroy(&mlxsw_sp->router->nexthop_group_ht);
8196	rhashtable_destroy(&mlxsw_sp->router->nexthop_ht);
8197	mlxsw_sp_ipips_fini(mlxsw_sp);
8198	mlxsw_sp_rifs_fini(mlxsw_sp);
8199	__mlxsw_sp_router_fini(mlxsw_sp);
8200	mutex_destroy(&mlxsw_sp->router->lock);
8201	kfree(mlxsw_sp->router);
8202}
8203