1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/* Copyright (c) 2020 Mellanox Technologies Ltd. */
3
4#include <linux/module.h>
5#include <linux/vdpa.h>
6#include <linux/vringh.h>
7#include <uapi/linux/virtio_net.h>
8#include <uapi/linux/virtio_ids.h>
9#include <uapi/linux/vdpa.h>
10#include <linux/virtio_config.h>
11#include <linux/auxiliary_bus.h>
12#include <linux/mlx5/cq.h>
13#include <linux/mlx5/qp.h>
14#include <linux/mlx5/device.h>
15#include <linux/mlx5/driver.h>
16#include <linux/mlx5/vport.h>
17#include <linux/mlx5/fs.h>
18#include <linux/mlx5/mlx5_ifc_vdpa.h>
19#include <linux/mlx5/mpfs.h>
20#include "mlx5_vdpa.h"
21#include "mlx5_vnet.h"
22
23MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
24MODULE_DESCRIPTION("Mellanox VDPA driver");
25MODULE_LICENSE("Dual BSD/GPL");
26
27#define VALID_FEATURES_MASK                                                                        \
28	(BIT_ULL(VIRTIO_NET_F_CSUM) | BIT_ULL(VIRTIO_NET_F_GUEST_CSUM) |                                   \
29	 BIT_ULL(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) | BIT_ULL(VIRTIO_NET_F_MTU) | BIT_ULL(VIRTIO_NET_F_MAC) |   \
30	 BIT_ULL(VIRTIO_NET_F_GUEST_TSO4) | BIT_ULL(VIRTIO_NET_F_GUEST_TSO6) |                             \
31	 BIT_ULL(VIRTIO_NET_F_GUEST_ECN) | BIT_ULL(VIRTIO_NET_F_GUEST_UFO) | BIT_ULL(VIRTIO_NET_F_HOST_TSO4) | \
32	 BIT_ULL(VIRTIO_NET_F_HOST_TSO6) | BIT_ULL(VIRTIO_NET_F_HOST_ECN) | BIT_ULL(VIRTIO_NET_F_HOST_UFO) |   \
33	 BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) |      \
34	 BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_CTRL_VLAN) |                                 \
35	 BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_GUEST_ANNOUNCE) |                      \
36	 BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | BIT_ULL(VIRTIO_NET_F_HASH_REPORT) |  \
37	 BIT_ULL(VIRTIO_NET_F_RSS) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | BIT_ULL(VIRTIO_NET_F_STANDBY) |           \
38	 BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX) | BIT_ULL(VIRTIO_F_NOTIFY_ON_EMPTY) |                          \
39	 BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM) |      \
40	 BIT_ULL(VIRTIO_F_RING_PACKED) | BIT_ULL(VIRTIO_F_ORDER_PLATFORM) | BIT_ULL(VIRTIO_F_SR_IOV))
41
42#define VALID_STATUS_MASK                                                                          \
43	(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK |        \
44	 VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
45
46#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
47
48#define MLX5V_UNTAGGED 0x1000
49
50struct mlx5_vdpa_cq_buf {
51	struct mlx5_frag_buf_ctrl fbc;
52	struct mlx5_frag_buf frag_buf;
53	int cqe_size;
54	int nent;
55};
56
57struct mlx5_vdpa_cq {
58	struct mlx5_core_cq mcq;
59	struct mlx5_vdpa_cq_buf buf;
60	struct mlx5_db db;
61	int cqe;
62};
63
64struct mlx5_vdpa_umem {
65	struct mlx5_frag_buf_ctrl fbc;
66	struct mlx5_frag_buf frag_buf;
67	int size;
68	u32 id;
69};
70
71struct mlx5_vdpa_qp {
72	struct mlx5_core_qp mqp;
73	struct mlx5_frag_buf frag_buf;
74	struct mlx5_db db;
75	u16 head;
76	bool fw;
77};
78
79struct mlx5_vq_restore_info {
80	u32 num_ent;
81	u64 desc_addr;
82	u64 device_addr;
83	u64 driver_addr;
84	u16 avail_index;
85	u16 used_index;
86	struct msi_map map;
87	bool ready;
88	bool restore;
89};
90
91struct mlx5_vdpa_virtqueue {
92	bool ready;
93	u64 desc_addr;
94	u64 device_addr;
95	u64 driver_addr;
96	u32 num_ent;
97
98	/* Resources for implementing the notification channel from the device
99	 * to the driver. fwqp is the firmware end of an RC connection; the
100	 * other end is vqqp used by the driver. cq is where completions are
101	 * reported.
102	 */
103	struct mlx5_vdpa_cq cq;
104	struct mlx5_vdpa_qp fwqp;
105	struct mlx5_vdpa_qp vqqp;
106
107	/* umem resources are required for the virtqueue operation. They're use
108	 * is internal and they must be provided by the driver.
109	 */
110	struct mlx5_vdpa_umem umem1;
111	struct mlx5_vdpa_umem umem2;
112	struct mlx5_vdpa_umem umem3;
113
114	u32 counter_set_id;
115	bool initialized;
116	int index;
117	u32 virtq_id;
118	struct mlx5_vdpa_net *ndev;
119	u16 avail_idx;
120	u16 used_idx;
121	int fw_state;
122	struct msi_map map;
123
124	/* keep last in the struct */
125	struct mlx5_vq_restore_info ri;
126};
127
128static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
129{
130	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ))) {
131		if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
132			return idx < 2;
133		else
134			return idx < 3;
135	}
136
137	return idx <= mvdev->max_idx;
138}
139
140static void free_resources(struct mlx5_vdpa_net *ndev);
141static void init_mvqs(struct mlx5_vdpa_net *ndev);
142static int setup_driver(struct mlx5_vdpa_dev *mvdev);
143static void teardown_driver(struct mlx5_vdpa_net *ndev);
144
145static bool mlx5_vdpa_debug;
146
147#define MLX5_LOG_VIO_FLAG(_feature)                                                                \
148	do {                                                                                       \
149		if (features & BIT_ULL(_feature))                                                  \
150			mlx5_vdpa_info(mvdev, "%s\n", #_feature);                                  \
151	} while (0)
152
153#define MLX5_LOG_VIO_STAT(_status)                                                                 \
154	do {                                                                                       \
155		if (status & (_status))                                                            \
156			mlx5_vdpa_info(mvdev, "%s\n", #_status);                                   \
157	} while (0)
158
159/* TODO: cross-endian support */
160static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
161{
162	return virtio_legacy_is_little_endian() ||
163		(mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
164}
165
166static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
167{
168	return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
169}
170
171static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
172{
173	return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
174}
175
176static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
177{
178	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
179		return 2;
180
181	return mvdev->max_vqs;
182}
183
184static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
185{
186	return idx == ctrl_vq_idx(mvdev);
187}
188
189static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
190{
191	if (status & ~VALID_STATUS_MASK)
192		mlx5_vdpa_warn(mvdev, "Warning: there are invalid status bits 0x%x\n",
193			       status & ~VALID_STATUS_MASK);
194
195	if (!mlx5_vdpa_debug)
196		return;
197
198	mlx5_vdpa_info(mvdev, "driver status %s", set ? "set" : "get");
199	if (set && !status) {
200		mlx5_vdpa_info(mvdev, "driver resets the device\n");
201		return;
202	}
203
204	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_ACKNOWLEDGE);
205	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER);
206	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_DRIVER_OK);
207	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FEATURES_OK);
208	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_NEEDS_RESET);
209	MLX5_LOG_VIO_STAT(VIRTIO_CONFIG_S_FAILED);
210}
211
212static void print_features(struct mlx5_vdpa_dev *mvdev, u64 features, bool set)
213{
214	if (features & ~VALID_FEATURES_MASK)
215		mlx5_vdpa_warn(mvdev, "There are invalid feature bits 0x%llx\n",
216			       features & ~VALID_FEATURES_MASK);
217
218	if (!mlx5_vdpa_debug)
219		return;
220
221	mlx5_vdpa_info(mvdev, "driver %s feature bits:\n", set ? "sets" : "reads");
222	if (!features)
223		mlx5_vdpa_info(mvdev, "all feature bits are cleared\n");
224
225	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CSUM);
226	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_CSUM);
227	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
228	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MTU);
229	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MAC);
230	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO4);
231	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_TSO6);
232	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ECN);
233	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_UFO);
234	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO4);
235	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_TSO6);
236	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_ECN);
237	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HOST_UFO);
238	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MRG_RXBUF);
239	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STATUS);
240	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VQ);
241	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX);
242	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_VLAN);
243	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_RX_EXTRA);
244	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_GUEST_ANNOUNCE);
245	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_MQ);
246	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_CTRL_MAC_ADDR);
247	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_HASH_REPORT);
248	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSS);
249	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_RSC_EXT);
250	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_STANDBY);
251	MLX5_LOG_VIO_FLAG(VIRTIO_NET_F_SPEED_DUPLEX);
252	MLX5_LOG_VIO_FLAG(VIRTIO_F_NOTIFY_ON_EMPTY);
253	MLX5_LOG_VIO_FLAG(VIRTIO_F_ANY_LAYOUT);
254	MLX5_LOG_VIO_FLAG(VIRTIO_F_VERSION_1);
255	MLX5_LOG_VIO_FLAG(VIRTIO_F_ACCESS_PLATFORM);
256	MLX5_LOG_VIO_FLAG(VIRTIO_F_RING_PACKED);
257	MLX5_LOG_VIO_FLAG(VIRTIO_F_ORDER_PLATFORM);
258	MLX5_LOG_VIO_FLAG(VIRTIO_F_SR_IOV);
259}
260
261static int create_tis(struct mlx5_vdpa_net *ndev)
262{
263	struct mlx5_vdpa_dev *mvdev = &ndev->mvdev;
264	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
265	void *tisc;
266	int err;
267
268	tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
269	MLX5_SET(tisc, tisc, transport_domain, ndev->res.tdn);
270	err = mlx5_vdpa_create_tis(mvdev, in, &ndev->res.tisn);
271	if (err)
272		mlx5_vdpa_warn(mvdev, "create TIS (%d)\n", err);
273
274	return err;
275}
276
277static void destroy_tis(struct mlx5_vdpa_net *ndev)
278{
279	mlx5_vdpa_destroy_tis(&ndev->mvdev, ndev->res.tisn);
280}
281
282#define MLX5_VDPA_CQE_SIZE 64
283#define MLX5_VDPA_LOG_CQE_SIZE ilog2(MLX5_VDPA_CQE_SIZE)
284
285static int cq_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf, int nent)
286{
287	struct mlx5_frag_buf *frag_buf = &buf->frag_buf;
288	u8 log_wq_stride = MLX5_VDPA_LOG_CQE_SIZE;
289	u8 log_wq_sz = MLX5_VDPA_LOG_CQE_SIZE;
290	int err;
291
292	err = mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, nent * MLX5_VDPA_CQE_SIZE, frag_buf,
293				       ndev->mvdev.mdev->priv.numa_node);
294	if (err)
295		return err;
296
297	mlx5_init_fbc(frag_buf->frags, log_wq_stride, log_wq_sz, &buf->fbc);
298
299	buf->cqe_size = MLX5_VDPA_CQE_SIZE;
300	buf->nent = nent;
301
302	return 0;
303}
304
305static int umem_frag_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem, int size)
306{
307	struct mlx5_frag_buf *frag_buf = &umem->frag_buf;
308
309	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev, size, frag_buf,
310					ndev->mvdev.mdev->priv.numa_node);
311}
312
313static void cq_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_cq_buf *buf)
314{
315	mlx5_frag_buf_free(ndev->mvdev.mdev, &buf->frag_buf);
316}
317
318static void *get_cqe(struct mlx5_vdpa_cq *vcq, int n)
319{
320	return mlx5_frag_buf_get_wqe(&vcq->buf.fbc, n);
321}
322
323static void cq_frag_buf_init(struct mlx5_vdpa_cq *vcq, struct mlx5_vdpa_cq_buf *buf)
324{
325	struct mlx5_cqe64 *cqe64;
326	void *cqe;
327	int i;
328
329	for (i = 0; i < buf->nent; i++) {
330		cqe = get_cqe(vcq, i);
331		cqe64 = cqe;
332		cqe64->op_own = MLX5_CQE_INVALID << 4;
333	}
334}
335
336static void *get_sw_cqe(struct mlx5_vdpa_cq *cq, int n)
337{
338	struct mlx5_cqe64 *cqe64 = get_cqe(cq, n & (cq->cqe - 1));
339
340	if (likely(get_cqe_opcode(cqe64) != MLX5_CQE_INVALID) &&
341	    !((cqe64->op_own & MLX5_CQE_OWNER_MASK) ^ !!(n & cq->cqe)))
342		return cqe64;
343
344	return NULL;
345}
346
347static void rx_post(struct mlx5_vdpa_qp *vqp, int n)
348{
349	vqp->head += n;
350	vqp->db.db[0] = cpu_to_be32(vqp->head);
351}
352
353static void qp_prepare(struct mlx5_vdpa_net *ndev, bool fw, void *in,
354		       struct mlx5_vdpa_virtqueue *mvq, u32 num_ent)
355{
356	struct mlx5_vdpa_qp *vqp;
357	__be64 *pas;
358	void *qpc;
359
360	vqp = fw ? &mvq->fwqp : &mvq->vqqp;
361	MLX5_SET(create_qp_in, in, uid, ndev->mvdev.res.uid);
362	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
363	if (vqp->fw) {
364		/* Firmware QP is allocated by the driver for the firmware's
365		 * use so we can skip part of the params as they will be chosen by firmware
366		 */
367		qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
368		MLX5_SET(qpc, qpc, rq_type, MLX5_ZERO_LEN_RQ);
369		MLX5_SET(qpc, qpc, no_sq, 1);
370		return;
371	}
372
373	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
374	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
375	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
376	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
377	MLX5_SET(qpc, qpc, uar_page, ndev->mvdev.res.uar->index);
378	MLX5_SET(qpc, qpc, log_page_size, vqp->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
379	MLX5_SET(qpc, qpc, no_sq, 1);
380	MLX5_SET(qpc, qpc, cqn_rcv, mvq->cq.mcq.cqn);
381	MLX5_SET(qpc, qpc, log_rq_size, ilog2(num_ent));
382	MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
383	pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas);
384	mlx5_fill_page_frag_array(&vqp->frag_buf, pas);
385}
386
387static int rq_buf_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp, u32 num_ent)
388{
389	return mlx5_frag_buf_alloc_node(ndev->mvdev.mdev,
390					num_ent * sizeof(struct mlx5_wqe_data_seg), &vqp->frag_buf,
391					ndev->mvdev.mdev->priv.numa_node);
392}
393
394static void rq_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
395{
396	mlx5_frag_buf_free(ndev->mvdev.mdev, &vqp->frag_buf);
397}
398
399static int qp_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
400		     struct mlx5_vdpa_qp *vqp)
401{
402	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
403	int inlen = MLX5_ST_SZ_BYTES(create_qp_in);
404	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
405	void *qpc;
406	void *in;
407	int err;
408
409	if (!vqp->fw) {
410		vqp = &mvq->vqqp;
411		err = rq_buf_alloc(ndev, vqp, mvq->num_ent);
412		if (err)
413			return err;
414
415		err = mlx5_db_alloc(ndev->mvdev.mdev, &vqp->db);
416		if (err)
417			goto err_db;
418		inlen += vqp->frag_buf.npages * sizeof(__be64);
419	}
420
421	in = kzalloc(inlen, GFP_KERNEL);
422	if (!in) {
423		err = -ENOMEM;
424		goto err_kzalloc;
425	}
426
427	qp_prepare(ndev, vqp->fw, in, mvq, mvq->num_ent);
428	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
429	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
430	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
431	MLX5_SET(qpc, qpc, pd, ndev->mvdev.res.pdn);
432	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
433	if (!vqp->fw)
434		MLX5_SET64(qpc, qpc, dbr_addr, vqp->db.dma);
435	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
436	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
437	kfree(in);
438	if (err)
439		goto err_kzalloc;
440
441	vqp->mqp.uid = ndev->mvdev.res.uid;
442	vqp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
443
444	if (!vqp->fw)
445		rx_post(vqp, mvq->num_ent);
446
447	return 0;
448
449err_kzalloc:
450	if (!vqp->fw)
451		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
452err_db:
453	if (!vqp->fw)
454		rq_buf_free(ndev, vqp);
455
456	return err;
457}
458
459static void qp_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_qp *vqp)
460{
461	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
462
463	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
464	MLX5_SET(destroy_qp_in, in, qpn, vqp->mqp.qpn);
465	MLX5_SET(destroy_qp_in, in, uid, ndev->mvdev.res.uid);
466	if (mlx5_cmd_exec_in(ndev->mvdev.mdev, destroy_qp, in))
467		mlx5_vdpa_warn(&ndev->mvdev, "destroy qp 0x%x\n", vqp->mqp.qpn);
468	if (!vqp->fw) {
469		mlx5_db_free(ndev->mvdev.mdev, &vqp->db);
470		rq_buf_free(ndev, vqp);
471	}
472}
473
474static void *next_cqe_sw(struct mlx5_vdpa_cq *cq)
475{
476	return get_sw_cqe(cq, cq->mcq.cons_index);
477}
478
479static int mlx5_vdpa_poll_one(struct mlx5_vdpa_cq *vcq)
480{
481	struct mlx5_cqe64 *cqe64;
482
483	cqe64 = next_cqe_sw(vcq);
484	if (!cqe64)
485		return -EAGAIN;
486
487	vcq->mcq.cons_index++;
488	return 0;
489}
490
491static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
492{
493	struct mlx5_vdpa_net *ndev = mvq->ndev;
494	struct vdpa_callback *event_cb;
495
496	event_cb = &ndev->event_cbs[mvq->index];
497	mlx5_cq_set_ci(&mvq->cq.mcq);
498
499	/* make sure CQ cosumer update is visible to the hardware before updating
500	 * RX doorbell record.
501	 */
502	dma_wmb();
503	rx_post(&mvq->vqqp, num);
504	if (event_cb->callback)
505		event_cb->callback(event_cb->private);
506}
507
508static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
509{
510	struct mlx5_vdpa_virtqueue *mvq = container_of(mcq, struct mlx5_vdpa_virtqueue, cq.mcq);
511	struct mlx5_vdpa_net *ndev = mvq->ndev;
512	void __iomem *uar_page = ndev->mvdev.res.uar->map;
513	int num = 0;
514
515	while (!mlx5_vdpa_poll_one(&mvq->cq)) {
516		num++;
517		if (num > mvq->num_ent / 2) {
518			/* If completions keep coming while we poll, we want to
519			 * let the hardware know that we consumed them by
520			 * updating the doorbell record.  We also let vdpa core
521			 * know about this so it passes it on the virtio driver
522			 * on the guest.
523			 */
524			mlx5_vdpa_handle_completions(mvq, num);
525			num = 0;
526		}
527	}
528
529	if (num)
530		mlx5_vdpa_handle_completions(mvq, num);
531
532	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
533}
534
535static int cq_create(struct mlx5_vdpa_net *ndev, u16 idx, u32 num_ent)
536{
537	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
538	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
539	void __iomem *uar_page = ndev->mvdev.res.uar->map;
540	u32 out[MLX5_ST_SZ_DW(create_cq_out)];
541	struct mlx5_vdpa_cq *vcq = &mvq->cq;
542	__be64 *pas;
543	int inlen;
544	void *cqc;
545	void *in;
546	int err;
547	int eqn;
548
549	err = mlx5_db_alloc(mdev, &vcq->db);
550	if (err)
551		return err;
552
553	vcq->mcq.set_ci_db = vcq->db.db;
554	vcq->mcq.arm_db = vcq->db.db + 1;
555	vcq->mcq.cqe_sz = 64;
556
557	err = cq_frag_buf_alloc(ndev, &vcq->buf, num_ent);
558	if (err)
559		goto err_db;
560
561	cq_frag_buf_init(vcq, &vcq->buf);
562
563	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
564		MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * vcq->buf.frag_buf.npages;
565	in = kzalloc(inlen, GFP_KERNEL);
566	if (!in) {
567		err = -ENOMEM;
568		goto err_vzalloc;
569	}
570
571	MLX5_SET(create_cq_in, in, uid, ndev->mvdev.res.uid);
572	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
573	mlx5_fill_page_frag_array(&vcq->buf.frag_buf, pas);
574
575	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
576	MLX5_SET(cqc, cqc, log_page_size, vcq->buf.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
577
578	/* Use vector 0 by default. Consider adding code to choose least used
579	 * vector.
580	 */
581	err = mlx5_comp_eqn_get(mdev, 0, &eqn);
582	if (err)
583		goto err_vec;
584
585	cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
586	MLX5_SET(cqc, cqc, log_cq_size, ilog2(num_ent));
587	MLX5_SET(cqc, cqc, uar_page, ndev->mvdev.res.uar->index);
588	MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
589	MLX5_SET64(cqc, cqc, dbr_addr, vcq->db.dma);
590
591	err = mlx5_core_create_cq(mdev, &vcq->mcq, in, inlen, out, sizeof(out));
592	if (err)
593		goto err_vec;
594
595	vcq->mcq.comp = mlx5_vdpa_cq_comp;
596	vcq->cqe = num_ent;
597	vcq->mcq.set_ci_db = vcq->db.db;
598	vcq->mcq.arm_db = vcq->db.db + 1;
599	mlx5_cq_arm(&mvq->cq.mcq, MLX5_CQ_DB_REQ_NOT, uar_page, mvq->cq.mcq.cons_index);
600	kfree(in);
601	return 0;
602
603err_vec:
604	kfree(in);
605err_vzalloc:
606	cq_frag_buf_free(ndev, &vcq->buf);
607err_db:
608	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
609	return err;
610}
611
612static void cq_destroy(struct mlx5_vdpa_net *ndev, u16 idx)
613{
614	struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
615	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
616	struct mlx5_vdpa_cq *vcq = &mvq->cq;
617
618	if (mlx5_core_destroy_cq(mdev, &vcq->mcq)) {
619		mlx5_vdpa_warn(&ndev->mvdev, "destroy CQ 0x%x\n", vcq->mcq.cqn);
620		return;
621	}
622	cq_frag_buf_free(ndev, &vcq->buf);
623	mlx5_db_free(ndev->mvdev.mdev, &vcq->db);
624}
625
626static int read_umem_params(struct mlx5_vdpa_net *ndev)
627{
628	u32 in[MLX5_ST_SZ_DW(query_hca_cap_in)] = {};
629	u16 opmod = (MLX5_CAP_VDPA_EMULATION << 1) | (HCA_CAP_OPMOD_GET_CUR & 0x01);
630	struct mlx5_core_dev *mdev = ndev->mvdev.mdev;
631	int out_size;
632	void *caps;
633	void *out;
634	int err;
635
636	out_size = MLX5_ST_SZ_BYTES(query_hca_cap_out);
637	out = kzalloc(out_size, GFP_KERNEL);
638	if (!out)
639		return -ENOMEM;
640
641	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
642	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
643	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
644	if (err) {
645		mlx5_vdpa_warn(&ndev->mvdev,
646			"Failed reading vdpa umem capabilities with err %d\n", err);
647		goto out;
648	}
649
650	caps =  MLX5_ADDR_OF(query_hca_cap_out, out, capability);
651
652	ndev->umem_1_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_a);
653	ndev->umem_1_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_1_buffer_param_b);
654
655	ndev->umem_2_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_a);
656	ndev->umem_2_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_2_buffer_param_b);
657
658	ndev->umem_3_buffer_param_a = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_a);
659	ndev->umem_3_buffer_param_b = MLX5_GET(virtio_emulation_cap, caps, umem_3_buffer_param_b);
660
661out:
662	kfree(out);
663	return 0;
664}
665
666static void set_umem_size(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num,
667			  struct mlx5_vdpa_umem **umemp)
668{
669	u32 p_a;
670	u32 p_b;
671
672	switch (num) {
673	case 1:
674		p_a = ndev->umem_1_buffer_param_a;
675		p_b = ndev->umem_1_buffer_param_b;
676		*umemp = &mvq->umem1;
677		break;
678	case 2:
679		p_a = ndev->umem_2_buffer_param_a;
680		p_b = ndev->umem_2_buffer_param_b;
681		*umemp = &mvq->umem2;
682		break;
683	case 3:
684		p_a = ndev->umem_3_buffer_param_a;
685		p_b = ndev->umem_3_buffer_param_b;
686		*umemp = &mvq->umem3;
687		break;
688	}
689
690	(*umemp)->size = p_a * mvq->num_ent + p_b;
691}
692
693static void umem_frag_buf_free(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_umem *umem)
694{
695	mlx5_frag_buf_free(ndev->mvdev.mdev, &umem->frag_buf);
696}
697
698static int create_umem(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
699{
700	int inlen;
701	u32 out[MLX5_ST_SZ_DW(create_umem_out)] = {};
702	void *um;
703	void *in;
704	int err;
705	__be64 *pas;
706	struct mlx5_vdpa_umem *umem;
707
708	set_umem_size(ndev, mvq, num, &umem);
709	err = umem_frag_buf_alloc(ndev, umem, umem->size);
710	if (err)
711		return err;
712
713	inlen = MLX5_ST_SZ_BYTES(create_umem_in) + MLX5_ST_SZ_BYTES(mtt) * umem->frag_buf.npages;
714
715	in = kzalloc(inlen, GFP_KERNEL);
716	if (!in) {
717		err = -ENOMEM;
718		goto err_in;
719	}
720
721	MLX5_SET(create_umem_in, in, opcode, MLX5_CMD_OP_CREATE_UMEM);
722	MLX5_SET(create_umem_in, in, uid, ndev->mvdev.res.uid);
723	um = MLX5_ADDR_OF(create_umem_in, in, umem);
724	MLX5_SET(umem, um, log_page_size, umem->frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
725	MLX5_SET64(umem, um, num_of_mtt, umem->frag_buf.npages);
726
727	pas = (__be64 *)MLX5_ADDR_OF(umem, um, mtt[0]);
728	mlx5_fill_page_frag_array_perm(&umem->frag_buf, pas, MLX5_MTT_PERM_RW);
729
730	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
731	if (err) {
732		mlx5_vdpa_warn(&ndev->mvdev, "create umem(%d)\n", err);
733		goto err_cmd;
734	}
735
736	kfree(in);
737	umem->id = MLX5_GET(create_umem_out, out, umem_id);
738
739	return 0;
740
741err_cmd:
742	kfree(in);
743err_in:
744	umem_frag_buf_free(ndev, umem);
745	return err;
746}
747
748static void umem_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int num)
749{
750	u32 in[MLX5_ST_SZ_DW(destroy_umem_in)] = {};
751	u32 out[MLX5_ST_SZ_DW(destroy_umem_out)] = {};
752	struct mlx5_vdpa_umem *umem;
753
754	switch (num) {
755	case 1:
756		umem = &mvq->umem1;
757		break;
758	case 2:
759		umem = &mvq->umem2;
760		break;
761	case 3:
762		umem = &mvq->umem3;
763		break;
764	}
765
766	MLX5_SET(destroy_umem_in, in, opcode, MLX5_CMD_OP_DESTROY_UMEM);
767	MLX5_SET(destroy_umem_in, in, umem_id, umem->id);
768	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
769		return;
770
771	umem_frag_buf_free(ndev, umem);
772}
773
774static int umems_create(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
775{
776	int num;
777	int err;
778
779	for (num = 1; num <= 3; num++) {
780		err = create_umem(ndev, mvq, num);
781		if (err)
782			goto err_umem;
783	}
784	return 0;
785
786err_umem:
787	for (num--; num > 0; num--)
788		umem_destroy(ndev, mvq, num);
789
790	return err;
791}
792
793static void umems_destroy(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
794{
795	int num;
796
797	for (num = 3; num > 0; num--)
798		umem_destroy(ndev, mvq, num);
799}
800
801static int get_queue_type(struct mlx5_vdpa_net *ndev)
802{
803	u32 type_mask;
804
805	type_mask = MLX5_CAP_DEV_VDPA_EMULATION(ndev->mvdev.mdev, virtio_queue_type);
806
807	/* prefer split queue */
808	if (type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)
809		return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_SPLIT;
810
811	WARN_ON(!(type_mask & MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_PACKED));
812
813	return MLX5_VIRTIO_EMULATION_VIRTIO_QUEUE_TYPE_PACKED;
814}
815
816static bool vq_is_tx(u16 idx)
817{
818	return idx % 2;
819}
820
821enum {
822	MLX5_VIRTIO_NET_F_MRG_RXBUF = 2,
823	MLX5_VIRTIO_NET_F_HOST_ECN = 4,
824	MLX5_VIRTIO_NET_F_GUEST_ECN = 6,
825	MLX5_VIRTIO_NET_F_GUEST_TSO6 = 7,
826	MLX5_VIRTIO_NET_F_GUEST_TSO4 = 8,
827	MLX5_VIRTIO_NET_F_GUEST_CSUM = 9,
828	MLX5_VIRTIO_NET_F_CSUM = 10,
829	MLX5_VIRTIO_NET_F_HOST_TSO6 = 11,
830	MLX5_VIRTIO_NET_F_HOST_TSO4 = 12,
831};
832
833static u16 get_features(u64 features)
834{
835	return (!!(features & BIT_ULL(VIRTIO_NET_F_MRG_RXBUF)) << MLX5_VIRTIO_NET_F_MRG_RXBUF) |
836	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_ECN)) << MLX5_VIRTIO_NET_F_HOST_ECN) |
837	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_ECN)) << MLX5_VIRTIO_NET_F_GUEST_ECN) |
838	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO6)) << MLX5_VIRTIO_NET_F_GUEST_TSO6) |
839	       (!!(features & BIT_ULL(VIRTIO_NET_F_GUEST_TSO4)) << MLX5_VIRTIO_NET_F_GUEST_TSO4) |
840	       (!!(features & BIT_ULL(VIRTIO_NET_F_CSUM)) << MLX5_VIRTIO_NET_F_CSUM) |
841	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO6)) << MLX5_VIRTIO_NET_F_HOST_TSO6) |
842	       (!!(features & BIT_ULL(VIRTIO_NET_F_HOST_TSO4)) << MLX5_VIRTIO_NET_F_HOST_TSO4);
843}
844
845static bool counters_supported(const struct mlx5_vdpa_dev *mvdev)
846{
847	return MLX5_CAP_GEN_64(mvdev->mdev, general_obj_types) &
848	       BIT_ULL(MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
849}
850
851static bool msix_mode_supported(struct mlx5_vdpa_dev *mvdev)
852{
853	return MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, event_mode) &
854		(1 << MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE) &&
855		pci_msix_can_alloc_dyn(mvdev->mdev->pdev);
856}
857
858static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
859{
860	int inlen = MLX5_ST_SZ_BYTES(create_virtio_net_q_in);
861	u32 out[MLX5_ST_SZ_DW(create_virtio_net_q_out)] = {};
862	void *obj_context;
863	u16 mlx_features;
864	void *cmd_hdr;
865	void *vq_ctx;
866	void *in;
867	int err;
868
869	err = umems_create(ndev, mvq);
870	if (err)
871		return err;
872
873	in = kzalloc(inlen, GFP_KERNEL);
874	if (!in) {
875		err = -ENOMEM;
876		goto err_alloc;
877	}
878
879	mlx_features = get_features(ndev->mvdev.actual_features);
880	cmd_hdr = MLX5_ADDR_OF(create_virtio_net_q_in, in, general_obj_in_cmd_hdr);
881
882	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
883	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
884	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
885
886	obj_context = MLX5_ADDR_OF(create_virtio_net_q_in, in, obj_context);
887	MLX5_SET(virtio_net_q_object, obj_context, hw_available_index, mvq->avail_idx);
888	MLX5_SET(virtio_net_q_object, obj_context, hw_used_index, mvq->used_idx);
889	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_12_3,
890		 mlx_features >> 3);
891	MLX5_SET(virtio_net_q_object, obj_context, queue_feature_bit_mask_2_0,
892		 mlx_features & 7);
893	vq_ctx = MLX5_ADDR_OF(virtio_net_q_object, obj_context, virtio_q_context);
894	MLX5_SET(virtio_q, vq_ctx, virtio_q_type, get_queue_type(ndev));
895
896	if (vq_is_tx(mvq->index))
897		MLX5_SET(virtio_net_q_object, obj_context, tisn_or_qpn, ndev->res.tisn);
898
899	if (mvq->map.virq) {
900		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_MSIX_MODE);
901		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->map.index);
902	} else {
903		MLX5_SET(virtio_q, vq_ctx, event_mode, MLX5_VIRTIO_Q_EVENT_MODE_QP_MODE);
904		MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
905	}
906
907	MLX5_SET(virtio_q, vq_ctx, queue_index, mvq->index);
908	MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
909	MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
910		 !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
911	MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
912	MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
913	MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
914	MLX5_SET(virtio_q, vq_ctx, virtio_q_mkey, ndev->mvdev.mr.mkey);
915	MLX5_SET(virtio_q, vq_ctx, umem_1_id, mvq->umem1.id);
916	MLX5_SET(virtio_q, vq_ctx, umem_1_size, mvq->umem1.size);
917	MLX5_SET(virtio_q, vq_ctx, umem_2_id, mvq->umem2.id);
918	MLX5_SET(virtio_q, vq_ctx, umem_2_size, mvq->umem2.size);
919	MLX5_SET(virtio_q, vq_ctx, umem_3_id, mvq->umem3.id);
920	MLX5_SET(virtio_q, vq_ctx, umem_3_size, mvq->umem3.size);
921	MLX5_SET(virtio_q, vq_ctx, pd, ndev->mvdev.res.pdn);
922	if (counters_supported(&ndev->mvdev))
923		MLX5_SET(virtio_q, vq_ctx, counter_set_id, mvq->counter_set_id);
924
925	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
926	if (err)
927		goto err_cmd;
928
929	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT;
930	kfree(in);
931	mvq->virtq_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
932
933	return 0;
934
935err_cmd:
936	kfree(in);
937err_alloc:
938	umems_destroy(ndev, mvq);
939	return err;
940}
941
942static void destroy_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
943{
944	u32 in[MLX5_ST_SZ_DW(destroy_virtio_net_q_in)] = {};
945	u32 out[MLX5_ST_SZ_DW(destroy_virtio_net_q_out)] = {};
946
947	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.opcode,
948		 MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
949	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_id, mvq->virtq_id);
950	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.uid, ndev->mvdev.res.uid);
951	MLX5_SET(destroy_virtio_net_q_in, in, general_obj_out_cmd_hdr.obj_type,
952		 MLX5_OBJ_TYPE_VIRTIO_NET_Q);
953	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out))) {
954		mlx5_vdpa_warn(&ndev->mvdev, "destroy virtqueue 0x%x\n", mvq->virtq_id);
955		return;
956	}
957	mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
958	umems_destroy(ndev, mvq);
959}
960
961static u32 get_rqpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
962{
963	return fw ? mvq->vqqp.mqp.qpn : mvq->fwqp.mqp.qpn;
964}
965
966static u32 get_qpn(struct mlx5_vdpa_virtqueue *mvq, bool fw)
967{
968	return fw ? mvq->fwqp.mqp.qpn : mvq->vqqp.mqp.qpn;
969}
970
971static void alloc_inout(struct mlx5_vdpa_net *ndev, int cmd, void **in, int *inlen, void **out,
972			int *outlen, u32 qpn, u32 rqpn)
973{
974	void *qpc;
975	void *pp;
976
977	switch (cmd) {
978	case MLX5_CMD_OP_2RST_QP:
979		*inlen = MLX5_ST_SZ_BYTES(qp_2rst_in);
980		*outlen = MLX5_ST_SZ_BYTES(qp_2rst_out);
981		*in = kzalloc(*inlen, GFP_KERNEL);
982		*out = kzalloc(*outlen, GFP_KERNEL);
983		if (!*in || !*out)
984			goto outerr;
985
986		MLX5_SET(qp_2rst_in, *in, opcode, cmd);
987		MLX5_SET(qp_2rst_in, *in, uid, ndev->mvdev.res.uid);
988		MLX5_SET(qp_2rst_in, *in, qpn, qpn);
989		break;
990	case MLX5_CMD_OP_RST2INIT_QP:
991		*inlen = MLX5_ST_SZ_BYTES(rst2init_qp_in);
992		*outlen = MLX5_ST_SZ_BYTES(rst2init_qp_out);
993		*in = kzalloc(*inlen, GFP_KERNEL);
994		*out = kzalloc(MLX5_ST_SZ_BYTES(rst2init_qp_out), GFP_KERNEL);
995		if (!*in || !*out)
996			goto outerr;
997
998		MLX5_SET(rst2init_qp_in, *in, opcode, cmd);
999		MLX5_SET(rst2init_qp_in, *in, uid, ndev->mvdev.res.uid);
1000		MLX5_SET(rst2init_qp_in, *in, qpn, qpn);
1001		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1002		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1003		MLX5_SET(qpc, qpc, rwe, 1);
1004		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1005		MLX5_SET(ads, pp, vhca_port_num, 1);
1006		break;
1007	case MLX5_CMD_OP_INIT2RTR_QP:
1008		*inlen = MLX5_ST_SZ_BYTES(init2rtr_qp_in);
1009		*outlen = MLX5_ST_SZ_BYTES(init2rtr_qp_out);
1010		*in = kzalloc(*inlen, GFP_KERNEL);
1011		*out = kzalloc(MLX5_ST_SZ_BYTES(init2rtr_qp_out), GFP_KERNEL);
1012		if (!*in || !*out)
1013			goto outerr;
1014
1015		MLX5_SET(init2rtr_qp_in, *in, opcode, cmd);
1016		MLX5_SET(init2rtr_qp_in, *in, uid, ndev->mvdev.res.uid);
1017		MLX5_SET(init2rtr_qp_in, *in, qpn, qpn);
1018		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1019		MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_256_BYTES);
1020		MLX5_SET(qpc, qpc, log_msg_max, 30);
1021		MLX5_SET(qpc, qpc, remote_qpn, rqpn);
1022		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1023		MLX5_SET(ads, pp, fl, 1);
1024		break;
1025	case MLX5_CMD_OP_RTR2RTS_QP:
1026		*inlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_in);
1027		*outlen = MLX5_ST_SZ_BYTES(rtr2rts_qp_out);
1028		*in = kzalloc(*inlen, GFP_KERNEL);
1029		*out = kzalloc(MLX5_ST_SZ_BYTES(rtr2rts_qp_out), GFP_KERNEL);
1030		if (!*in || !*out)
1031			goto outerr;
1032
1033		MLX5_SET(rtr2rts_qp_in, *in, opcode, cmd);
1034		MLX5_SET(rtr2rts_qp_in, *in, uid, ndev->mvdev.res.uid);
1035		MLX5_SET(rtr2rts_qp_in, *in, qpn, qpn);
1036		qpc = MLX5_ADDR_OF(rst2init_qp_in, *in, qpc);
1037		pp = MLX5_ADDR_OF(qpc, qpc, primary_address_path);
1038		MLX5_SET(ads, pp, ack_timeout, 14);
1039		MLX5_SET(qpc, qpc, retry_count, 7);
1040		MLX5_SET(qpc, qpc, rnr_retry, 7);
1041		break;
1042	default:
1043		goto outerr_nullify;
1044	}
1045
1046	return;
1047
1048outerr:
1049	kfree(*in);
1050	kfree(*out);
1051outerr_nullify:
1052	*in = NULL;
1053	*out = NULL;
1054}
1055
1056static void free_inout(void *in, void *out)
1057{
1058	kfree(in);
1059	kfree(out);
1060}
1061
1062/* Two QPs are used by each virtqueue. One is used by the driver and one by
1063 * firmware. The fw argument indicates whether the subjected QP is the one used
1064 * by firmware.
1065 */
1066static int modify_qp(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, bool fw, int cmd)
1067{
1068	int outlen;
1069	int inlen;
1070	void *out;
1071	void *in;
1072	int err;
1073
1074	alloc_inout(ndev, cmd, &in, &inlen, &out, &outlen, get_qpn(mvq, fw), get_rqpn(mvq, fw));
1075	if (!in || !out)
1076		return -ENOMEM;
1077
1078	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, outlen);
1079	free_inout(in, out);
1080	return err;
1081}
1082
1083static int connect_qps(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1084{
1085	int err;
1086
1087	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_2RST_QP);
1088	if (err)
1089		return err;
1090
1091	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_2RST_QP);
1092	if (err)
1093		return err;
1094
1095	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_RST2INIT_QP);
1096	if (err)
1097		return err;
1098
1099	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_RST2INIT_QP);
1100	if (err)
1101		return err;
1102
1103	err = modify_qp(ndev, mvq, true, MLX5_CMD_OP_INIT2RTR_QP);
1104	if (err)
1105		return err;
1106
1107	err = modify_qp(ndev, mvq, false, MLX5_CMD_OP_INIT2RTR_QP);
1108	if (err)
1109		return err;
1110
1111	return modify_qp(ndev, mvq, true, MLX5_CMD_OP_RTR2RTS_QP);
1112}
1113
1114struct mlx5_virtq_attr {
1115	u8 state;
1116	u16 available_index;
1117	u16 used_index;
1118};
1119
1120static int query_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
1121			   struct mlx5_virtq_attr *attr)
1122{
1123	int outlen = MLX5_ST_SZ_BYTES(query_virtio_net_q_out);
1124	u32 in[MLX5_ST_SZ_DW(query_virtio_net_q_in)] = {};
1125	void *out;
1126	void *obj_context;
1127	void *cmd_hdr;
1128	int err;
1129
1130	out = kzalloc(outlen, GFP_KERNEL);
1131	if (!out)
1132		return -ENOMEM;
1133
1134	cmd_hdr = MLX5_ADDR_OF(query_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1135
1136	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
1137	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1138	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1139	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1140	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, outlen);
1141	if (err)
1142		goto err_cmd;
1143
1144	obj_context = MLX5_ADDR_OF(query_virtio_net_q_out, out, obj_context);
1145	memset(attr, 0, sizeof(*attr));
1146	attr->state = MLX5_GET(virtio_net_q_object, obj_context, state);
1147	attr->available_index = MLX5_GET(virtio_net_q_object, obj_context, hw_available_index);
1148	attr->used_index = MLX5_GET(virtio_net_q_object, obj_context, hw_used_index);
1149	kfree(out);
1150	return 0;
1151
1152err_cmd:
1153	kfree(out);
1154	return err;
1155}
1156
1157static bool is_valid_state_change(int oldstate, int newstate)
1158{
1159	switch (oldstate) {
1160	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_INIT:
1161		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY;
1162	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY:
1163		return newstate == MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND;
1164	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND:
1165	case MLX5_VIRTIO_NET_Q_OBJECT_STATE_ERR:
1166	default:
1167		return false;
1168	}
1169}
1170
1171static int modify_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq, int state)
1172{
1173	int inlen = MLX5_ST_SZ_BYTES(modify_virtio_net_q_in);
1174	u32 out[MLX5_ST_SZ_DW(modify_virtio_net_q_out)] = {};
1175	void *obj_context;
1176	void *cmd_hdr;
1177	void *in;
1178	int err;
1179
1180	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_NONE)
1181		return 0;
1182
1183	if (!is_valid_state_change(mvq->fw_state, state))
1184		return -EINVAL;
1185
1186	in = kzalloc(inlen, GFP_KERNEL);
1187	if (!in)
1188		return -ENOMEM;
1189
1190	cmd_hdr = MLX5_ADDR_OF(modify_virtio_net_q_in, in, general_obj_in_cmd_hdr);
1191
1192	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_MODIFY_GENERAL_OBJECT);
1193	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_NET_Q);
1194	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->virtq_id);
1195	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1196
1197	obj_context = MLX5_ADDR_OF(modify_virtio_net_q_in, in, obj_context);
1198	MLX5_SET64(virtio_net_q_object, obj_context, modify_field_select,
1199		   MLX5_VIRTQ_MODIFY_MASK_STATE);
1200	MLX5_SET(virtio_net_q_object, obj_context, state, state);
1201	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, inlen, out, sizeof(out));
1202	kfree(in);
1203	if (!err)
1204		mvq->fw_state = state;
1205
1206	return err;
1207}
1208
1209static int counter_set_alloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1210{
1211	u32 in[MLX5_ST_SZ_DW(create_virtio_q_counters_in)] = {};
1212	u32 out[MLX5_ST_SZ_DW(create_virtio_q_counters_out)] = {};
1213	void *cmd_hdr;
1214	int err;
1215
1216	if (!counters_supported(&ndev->mvdev))
1217		return 0;
1218
1219	cmd_hdr = MLX5_ADDR_OF(create_virtio_q_counters_in, in, hdr);
1220
1221	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
1222	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1223	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
1224
1225	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
1226	if (err)
1227		return err;
1228
1229	mvq->counter_set_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
1230
1231	return 0;
1232}
1233
1234static void counter_set_dealloc(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1235{
1236	u32 in[MLX5_ST_SZ_DW(destroy_virtio_q_counters_in)] = {};
1237	u32 out[MLX5_ST_SZ_DW(destroy_virtio_q_counters_out)] = {};
1238
1239	if (!counters_supported(&ndev->mvdev))
1240		return;
1241
1242	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
1243	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_id, mvq->counter_set_id);
1244	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.uid, ndev->mvdev.res.uid);
1245	MLX5_SET(destroy_virtio_q_counters_in, in, hdr.obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
1246	if (mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out)))
1247		mlx5_vdpa_warn(&ndev->mvdev, "dealloc counter set 0x%x\n", mvq->counter_set_id);
1248}
1249
1250static irqreturn_t mlx5_vdpa_int_handler(int irq, void *priv)
1251{
1252	struct vdpa_callback *cb = priv;
1253
1254	if (cb->callback)
1255		return cb->callback(cb->private);
1256
1257	return IRQ_HANDLED;
1258}
1259
1260static void alloc_vector(struct mlx5_vdpa_net *ndev,
1261			 struct mlx5_vdpa_virtqueue *mvq)
1262{
1263	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1264	struct mlx5_vdpa_irq_pool_entry *ent;
1265	int err;
1266	int i;
1267
1268	for (i = 0; i < irqp->num_ent; i++) {
1269		ent = &irqp->entries[i];
1270		if (!ent->used) {
1271			snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
1272				 dev_name(&ndev->mvdev.vdev.dev), mvq->index);
1273			ent->dev_id = &ndev->event_cbs[mvq->index];
1274			err = request_irq(ent->map.virq, mlx5_vdpa_int_handler, 0,
1275					  ent->name, ent->dev_id);
1276			if (err)
1277				return;
1278
1279			ent->used = true;
1280			mvq->map = ent->map;
1281			return;
1282		}
1283	}
1284}
1285
1286static void dealloc_vector(struct mlx5_vdpa_net *ndev,
1287			   struct mlx5_vdpa_virtqueue *mvq)
1288{
1289	struct mlx5_vdpa_irq_pool *irqp = &ndev->irqp;
1290	int i;
1291
1292	for (i = 0; i < irqp->num_ent; i++)
1293		if (mvq->map.virq == irqp->entries[i].map.virq) {
1294			free_irq(mvq->map.virq, irqp->entries[i].dev_id);
1295			irqp->entries[i].used = false;
1296			return;
1297		}
1298}
1299
1300static int setup_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1301{
1302	u16 idx = mvq->index;
1303	int err;
1304
1305	if (!mvq->num_ent)
1306		return 0;
1307
1308	if (mvq->initialized)
1309		return 0;
1310
1311	err = cq_create(ndev, idx, mvq->num_ent);
1312	if (err)
1313		return err;
1314
1315	err = qp_create(ndev, mvq, &mvq->fwqp);
1316	if (err)
1317		goto err_fwqp;
1318
1319	err = qp_create(ndev, mvq, &mvq->vqqp);
1320	if (err)
1321		goto err_vqqp;
1322
1323	err = connect_qps(ndev, mvq);
1324	if (err)
1325		goto err_connect;
1326
1327	err = counter_set_alloc(ndev, mvq);
1328	if (err)
1329		goto err_connect;
1330
1331	alloc_vector(ndev, mvq);
1332	err = create_virtqueue(ndev, mvq);
1333	if (err)
1334		goto err_vq;
1335
1336	if (mvq->ready) {
1337		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
1338		if (err) {
1339			mlx5_vdpa_warn(&ndev->mvdev, "failed to modify to ready vq idx %d(%d)\n",
1340				       idx, err);
1341			goto err_modify;
1342		}
1343	}
1344
1345	mvq->initialized = true;
1346	return 0;
1347
1348err_modify:
1349	destroy_virtqueue(ndev, mvq);
1350err_vq:
1351	dealloc_vector(ndev, mvq);
1352	counter_set_dealloc(ndev, mvq);
1353err_connect:
1354	qp_destroy(ndev, &mvq->vqqp);
1355err_vqqp:
1356	qp_destroy(ndev, &mvq->fwqp);
1357err_fwqp:
1358	cq_destroy(ndev, idx);
1359	return err;
1360}
1361
1362static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1363{
1364	struct mlx5_virtq_attr attr;
1365
1366	if (!mvq->initialized)
1367		return;
1368
1369	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
1370		return;
1371
1372	if (modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_SUSPEND))
1373		mlx5_vdpa_warn(&ndev->mvdev, "modify to suspend failed\n");
1374
1375	if (query_virtqueue(ndev, mvq, &attr)) {
1376		mlx5_vdpa_warn(&ndev->mvdev, "failed to query virtqueue\n");
1377		return;
1378	}
1379	mvq->avail_idx = attr.available_index;
1380	mvq->used_idx = attr.used_index;
1381}
1382
1383static void suspend_vqs(struct mlx5_vdpa_net *ndev)
1384{
1385	int i;
1386
1387	for (i = 0; i < ndev->mvdev.max_vqs; i++)
1388		suspend_vq(ndev, &ndev->vqs[i]);
1389}
1390
1391static void teardown_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
1392{
1393	if (!mvq->initialized)
1394		return;
1395
1396	suspend_vq(ndev, mvq);
1397	destroy_virtqueue(ndev, mvq);
1398	dealloc_vector(ndev, mvq);
1399	counter_set_dealloc(ndev, mvq);
1400	qp_destroy(ndev, &mvq->vqqp);
1401	qp_destroy(ndev, &mvq->fwqp);
1402	cq_destroy(ndev, mvq->index);
1403	mvq->initialized = false;
1404}
1405
1406static int create_rqt(struct mlx5_vdpa_net *ndev)
1407{
1408	int rqt_table_size = roundup_pow_of_two(ndev->rqt_size);
1409	int act_sz = roundup_pow_of_two(ndev->cur_num_vqs / 2);
1410	__be32 *list;
1411	void *rqtc;
1412	int inlen;
1413	void *in;
1414	int i, j;
1415	int err;
1416
1417	inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + rqt_table_size * MLX5_ST_SZ_BYTES(rq_num);
1418	in = kzalloc(inlen, GFP_KERNEL);
1419	if (!in)
1420		return -ENOMEM;
1421
1422	MLX5_SET(create_rqt_in, in, uid, ndev->mvdev.res.uid);
1423	rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
1424
1425	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1426	MLX5_SET(rqtc, rqtc, rqt_max_size, rqt_table_size);
1427	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1428	for (i = 0, j = 0; i < act_sz; i++, j += 2)
1429		list[i] = cpu_to_be32(ndev->vqs[j % ndev->cur_num_vqs].virtq_id);
1430
1431	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1432	err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
1433	kfree(in);
1434	if (err)
1435		return err;
1436
1437	return 0;
1438}
1439
1440#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
1441
1442static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
1443{
1444	int act_sz = roundup_pow_of_two(num / 2);
1445	__be32 *list;
1446	void *rqtc;
1447	int inlen;
1448	void *in;
1449	int i, j;
1450	int err;
1451
1452	inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + act_sz * MLX5_ST_SZ_BYTES(rq_num);
1453	in = kzalloc(inlen, GFP_KERNEL);
1454	if (!in)
1455		return -ENOMEM;
1456
1457	MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
1458	MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
1459	rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
1460	MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
1461
1462	list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
1463	for (i = 0, j = 0; i < act_sz; i++, j = j + 2)
1464		list[i] = cpu_to_be32(ndev->vqs[j % num].virtq_id);
1465
1466	MLX5_SET(rqtc, rqtc, rqt_actual_size, act_sz);
1467	err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
1468	kfree(in);
1469	if (err)
1470		return err;
1471
1472	return 0;
1473}
1474
1475static void destroy_rqt(struct mlx5_vdpa_net *ndev)
1476{
1477	mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
1478}
1479
1480static int create_tir(struct mlx5_vdpa_net *ndev)
1481{
1482#define HASH_IP_L4PORTS                                                                            \
1483	(MLX5_HASH_FIELD_SEL_SRC_IP | MLX5_HASH_FIELD_SEL_DST_IP | MLX5_HASH_FIELD_SEL_L4_SPORT |  \
1484	 MLX5_HASH_FIELD_SEL_L4_DPORT)
1485	static const u8 rx_hash_toeplitz_key[] = { 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7,
1486						   0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94,
1487						   0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1,
1488						   0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59,
1489						   0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a };
1490	void *rss_key;
1491	void *outer;
1492	void *tirc;
1493	void *in;
1494	int err;
1495
1496	in = kzalloc(MLX5_ST_SZ_BYTES(create_tir_in), GFP_KERNEL);
1497	if (!in)
1498		return -ENOMEM;
1499
1500	MLX5_SET(create_tir_in, in, uid, ndev->mvdev.res.uid);
1501	tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
1502	MLX5_SET(tirc, tirc, disp_type, MLX5_TIRC_DISP_TYPE_INDIRECT);
1503
1504	MLX5_SET(tirc, tirc, rx_hash_symmetric, 1);
1505	MLX5_SET(tirc, tirc, rx_hash_fn, MLX5_RX_HASH_FN_TOEPLITZ);
1506	rss_key = MLX5_ADDR_OF(tirc, tirc, rx_hash_toeplitz_key);
1507	memcpy(rss_key, rx_hash_toeplitz_key, sizeof(rx_hash_toeplitz_key));
1508
1509	outer = MLX5_ADDR_OF(tirc, tirc, rx_hash_field_selector_outer);
1510	MLX5_SET(rx_hash_field_select, outer, l3_prot_type, MLX5_L3_PROT_TYPE_IPV4);
1511	MLX5_SET(rx_hash_field_select, outer, l4_prot_type, MLX5_L4_PROT_TYPE_TCP);
1512	MLX5_SET(rx_hash_field_select, outer, selected_fields, HASH_IP_L4PORTS);
1513
1514	MLX5_SET(tirc, tirc, indirect_table, ndev->res.rqtn);
1515	MLX5_SET(tirc, tirc, transport_domain, ndev->res.tdn);
1516
1517	err = mlx5_vdpa_create_tir(&ndev->mvdev, in, &ndev->res.tirn);
1518	kfree(in);
1519	if (err)
1520		return err;
1521
1522	mlx5_vdpa_add_tirn(ndev);
1523	return err;
1524}
1525
1526static void destroy_tir(struct mlx5_vdpa_net *ndev)
1527{
1528	mlx5_vdpa_remove_tirn(ndev);
1529	mlx5_vdpa_destroy_tir(&ndev->mvdev, ndev->res.tirn);
1530}
1531
1532#define MAX_STEERING_ENT 0x8000
1533#define MAX_STEERING_GROUPS 2
1534
1535#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1536       #define NUM_DESTS 2
1537#else
1538       #define NUM_DESTS 1
1539#endif
1540
1541static int add_steering_counters(struct mlx5_vdpa_net *ndev,
1542				 struct macvlan_node *node,
1543				 struct mlx5_flow_act *flow_act,
1544				 struct mlx5_flow_destination *dests)
1545{
1546#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1547	int err;
1548
1549	node->ucast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1550	if (IS_ERR(node->ucast_counter.counter))
1551		return PTR_ERR(node->ucast_counter.counter);
1552
1553	node->mcast_counter.counter = mlx5_fc_create(ndev->mvdev.mdev, false);
1554	if (IS_ERR(node->mcast_counter.counter)) {
1555		err = PTR_ERR(node->mcast_counter.counter);
1556		goto err_mcast_counter;
1557	}
1558
1559	dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1560	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
1561	return 0;
1562
1563err_mcast_counter:
1564	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1565	return err;
1566#else
1567	return 0;
1568#endif
1569}
1570
1571static void remove_steering_counters(struct mlx5_vdpa_net *ndev,
1572				     struct macvlan_node *node)
1573{
1574#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1575	mlx5_fc_destroy(ndev->mvdev.mdev, node->mcast_counter.counter);
1576	mlx5_fc_destroy(ndev->mvdev.mdev, node->ucast_counter.counter);
1577#endif
1578}
1579
1580static int mlx5_vdpa_add_mac_vlan_rules(struct mlx5_vdpa_net *ndev, u8 *mac,
1581					struct macvlan_node *node)
1582{
1583	struct mlx5_flow_destination dests[NUM_DESTS] = {};
1584	struct mlx5_flow_act flow_act = {};
1585	struct mlx5_flow_spec *spec;
1586	void *headers_c;
1587	void *headers_v;
1588	u8 *dmac_c;
1589	u8 *dmac_v;
1590	int err;
1591	u16 vid;
1592
1593	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1594	if (!spec)
1595		return -ENOMEM;
1596
1597	vid = key2vid(node->macvlan);
1598	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
1599	headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
1600	headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
1601	dmac_c = MLX5_ADDR_OF(fte_match_param, headers_c, outer_headers.dmac_47_16);
1602	dmac_v = MLX5_ADDR_OF(fte_match_param, headers_v, outer_headers.dmac_47_16);
1603	eth_broadcast_addr(dmac_c);
1604	ether_addr_copy(dmac_v, mac);
1605	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)) {
1606		MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
1607		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, first_vid);
1608	}
1609	if (node->tagged) {
1610		MLX5_SET(fte_match_set_lyr_2_4, headers_v, cvlan_tag, 1);
1611		MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, vid);
1612	}
1613	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1614	dests[0].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1615	dests[0].tir_num = ndev->res.tirn;
1616	err = add_steering_counters(ndev, node, &flow_act, dests);
1617	if (err)
1618		goto out_free;
1619
1620#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1621	dests[1].counter_id = mlx5_fc_id(node->ucast_counter.counter);
1622#endif
1623	node->ucast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1624	if (IS_ERR(node->ucast_rule)) {
1625		err = PTR_ERR(node->ucast_rule);
1626		goto err_ucast;
1627	}
1628
1629#if defined(CONFIG_MLX5_VDPA_STEERING_DEBUG)
1630	dests[1].counter_id = mlx5_fc_id(node->mcast_counter.counter);
1631#endif
1632
1633	memset(dmac_c, 0, ETH_ALEN);
1634	memset(dmac_v, 0, ETH_ALEN);
1635	dmac_c[0] = 1;
1636	dmac_v[0] = 1;
1637	node->mcast_rule = mlx5_add_flow_rules(ndev->rxft, spec, &flow_act, dests, NUM_DESTS);
1638	if (IS_ERR(node->mcast_rule)) {
1639		err = PTR_ERR(node->mcast_rule);
1640		goto err_mcast;
1641	}
1642	kvfree(spec);
1643	mlx5_vdpa_add_rx_counters(ndev, node);
1644	return 0;
1645
1646err_mcast:
1647	mlx5_del_flow_rules(node->ucast_rule);
1648err_ucast:
1649	remove_steering_counters(ndev, node);
1650out_free:
1651	kvfree(spec);
1652	return err;
1653}
1654
1655static void mlx5_vdpa_del_mac_vlan_rules(struct mlx5_vdpa_net *ndev,
1656					 struct macvlan_node *node)
1657{
1658	mlx5_vdpa_remove_rx_counters(ndev, node);
1659	mlx5_del_flow_rules(node->ucast_rule);
1660	mlx5_del_flow_rules(node->mcast_rule);
1661}
1662
1663static u64 search_val(u8 *mac, u16 vlan, bool tagged)
1664{
1665	u64 val;
1666
1667	if (!tagged)
1668		vlan = MLX5V_UNTAGGED;
1669
1670	val = (u64)vlan << 48 |
1671	      (u64)mac[0] << 40 |
1672	      (u64)mac[1] << 32 |
1673	      (u64)mac[2] << 24 |
1674	      (u64)mac[3] << 16 |
1675	      (u64)mac[4] << 8 |
1676	      (u64)mac[5];
1677
1678	return val;
1679}
1680
1681static struct macvlan_node *mac_vlan_lookup(struct mlx5_vdpa_net *ndev, u64 value)
1682{
1683	struct macvlan_node *pos;
1684	u32 idx;
1685
1686	idx = hash_64(value, 8); // tbd 8
1687	hlist_for_each_entry(pos, &ndev->macvlan_hash[idx], hlist) {
1688		if (pos->macvlan == value)
1689			return pos;
1690	}
1691	return NULL;
1692}
1693
1694static int mac_vlan_add(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vid, bool tagged)
1695{
1696	struct macvlan_node *ptr;
1697	u64 val;
1698	u32 idx;
1699	int err;
1700
1701	val = search_val(mac, vid, tagged);
1702	if (mac_vlan_lookup(ndev, val))
1703		return -EEXIST;
1704
1705	ptr = kzalloc(sizeof(*ptr), GFP_KERNEL);
1706	if (!ptr)
1707		return -ENOMEM;
1708
1709	ptr->tagged = tagged;
1710	ptr->macvlan = val;
1711	ptr->ndev = ndev;
1712	err = mlx5_vdpa_add_mac_vlan_rules(ndev, ndev->config.mac, ptr);
1713	if (err)
1714		goto err_add;
1715
1716	idx = hash_64(val, 8);
1717	hlist_add_head(&ptr->hlist, &ndev->macvlan_hash[idx]);
1718	return 0;
1719
1720err_add:
1721	kfree(ptr);
1722	return err;
1723}
1724
1725static void mac_vlan_del(struct mlx5_vdpa_net *ndev, u8 *mac, u16 vlan, bool tagged)
1726{
1727	struct macvlan_node *ptr;
1728
1729	ptr = mac_vlan_lookup(ndev, search_val(mac, vlan, tagged));
1730	if (!ptr)
1731		return;
1732
1733	hlist_del(&ptr->hlist);
1734	mlx5_vdpa_del_mac_vlan_rules(ndev, ptr);
1735	remove_steering_counters(ndev, ptr);
1736	kfree(ptr);
1737}
1738
1739static void clear_mac_vlan_table(struct mlx5_vdpa_net *ndev)
1740{
1741	struct macvlan_node *pos;
1742	struct hlist_node *n;
1743	int i;
1744
1745	for (i = 0; i < MLX5V_MACVLAN_SIZE; i++) {
1746		hlist_for_each_entry_safe(pos, n, &ndev->macvlan_hash[i], hlist) {
1747			hlist_del(&pos->hlist);
1748			mlx5_vdpa_del_mac_vlan_rules(ndev, pos);
1749			remove_steering_counters(ndev, pos);
1750			kfree(pos);
1751		}
1752	}
1753}
1754
1755static int setup_steering(struct mlx5_vdpa_net *ndev)
1756{
1757	struct mlx5_flow_table_attr ft_attr = {};
1758	struct mlx5_flow_namespace *ns;
1759	int err;
1760
1761	ft_attr.max_fte = MAX_STEERING_ENT;
1762	ft_attr.autogroup.max_num_groups = MAX_STEERING_GROUPS;
1763
1764	ns = mlx5_get_flow_namespace(ndev->mvdev.mdev, MLX5_FLOW_NAMESPACE_BYPASS);
1765	if (!ns) {
1766		mlx5_vdpa_warn(&ndev->mvdev, "failed to get flow namespace\n");
1767		return -EOPNOTSUPP;
1768	}
1769
1770	ndev->rxft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
1771	if (IS_ERR(ndev->rxft)) {
1772		mlx5_vdpa_warn(&ndev->mvdev, "failed to create flow table\n");
1773		return PTR_ERR(ndev->rxft);
1774	}
1775	mlx5_vdpa_add_rx_flow_table(ndev);
1776
1777	err = mac_vlan_add(ndev, ndev->config.mac, 0, false);
1778	if (err)
1779		goto err_add;
1780
1781	return 0;
1782
1783err_add:
1784	mlx5_vdpa_remove_rx_flow_table(ndev);
1785	mlx5_destroy_flow_table(ndev->rxft);
1786	return err;
1787}
1788
1789static void teardown_steering(struct mlx5_vdpa_net *ndev)
1790{
1791	clear_mac_vlan_table(ndev);
1792	mlx5_vdpa_remove_rx_flow_table(ndev);
1793	mlx5_destroy_flow_table(ndev->rxft);
1794}
1795
1796static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1797{
1798	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1799	struct mlx5_control_vq *cvq = &mvdev->cvq;
1800	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1801	struct mlx5_core_dev *pfmdev;
1802	size_t read;
1803	u8 mac[ETH_ALEN], mac_back[ETH_ALEN];
1804
1805	pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
1806	switch (cmd) {
1807	case VIRTIO_NET_CTRL_MAC_ADDR_SET:
1808		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
1809		if (read != ETH_ALEN)
1810			break;
1811
1812		if (!memcmp(ndev->config.mac, mac, 6)) {
1813			status = VIRTIO_NET_OK;
1814			break;
1815		}
1816
1817		if (is_zero_ether_addr(mac))
1818			break;
1819
1820		if (!is_zero_ether_addr(ndev->config.mac)) {
1821			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1822				mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
1823					       ndev->config.mac);
1824				break;
1825			}
1826		}
1827
1828		if (mlx5_mpfs_add_mac(pfmdev, mac)) {
1829			mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
1830				       mac);
1831			break;
1832		}
1833
1834		/* backup the original mac address so that if failed to add the forward rules
1835		 * we could restore it
1836		 */
1837		memcpy(mac_back, ndev->config.mac, ETH_ALEN);
1838
1839		memcpy(ndev->config.mac, mac, ETH_ALEN);
1840
1841		/* Need recreate the flow table entry, so that the packet could forward back
1842		 */
1843		mac_vlan_del(ndev, mac_back, 0, false);
1844
1845		if (mac_vlan_add(ndev, ndev->config.mac, 0, false)) {
1846			mlx5_vdpa_warn(mvdev, "failed to insert forward rules, try to restore\n");
1847
1848			/* Although it hardly run here, we still need double check */
1849			if (is_zero_ether_addr(mac_back)) {
1850				mlx5_vdpa_warn(mvdev, "restore mac failed: Original MAC is zero\n");
1851				break;
1852			}
1853
1854			/* Try to restore original mac address to MFPS table, and try to restore
1855			 * the forward rule entry.
1856			 */
1857			if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
1858				mlx5_vdpa_warn(mvdev, "restore mac failed: delete MAC %pM from MPFS table failed\n",
1859					       ndev->config.mac);
1860			}
1861
1862			if (mlx5_mpfs_add_mac(pfmdev, mac_back)) {
1863				mlx5_vdpa_warn(mvdev, "restore mac failed: insert old MAC %pM into MPFS table failed\n",
1864					       mac_back);
1865			}
1866
1867			memcpy(ndev->config.mac, mac_back, ETH_ALEN);
1868
1869			if (mac_vlan_add(ndev, ndev->config.mac, 0, false))
1870				mlx5_vdpa_warn(mvdev, "restore forward rules failed: insert forward rules failed\n");
1871
1872			break;
1873		}
1874
1875		status = VIRTIO_NET_OK;
1876		break;
1877
1878	default:
1879		break;
1880	}
1881
1882	return status;
1883}
1884
1885static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
1886{
1887	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1888	int cur_qps = ndev->cur_num_vqs / 2;
1889	int err;
1890	int i;
1891
1892	if (cur_qps > newqps) {
1893		err = modify_rqt(ndev, 2 * newqps);
1894		if (err)
1895			return err;
1896
1897		for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
1898			teardown_vq(ndev, &ndev->vqs[i]);
1899
1900		ndev->cur_num_vqs = 2 * newqps;
1901	} else {
1902		ndev->cur_num_vqs = 2 * newqps;
1903		for (i = cur_qps * 2; i < 2 * newqps; i++) {
1904			err = setup_vq(ndev, &ndev->vqs[i]);
1905			if (err)
1906				goto clean_added;
1907		}
1908		err = modify_rqt(ndev, 2 * newqps);
1909		if (err)
1910			goto clean_added;
1911	}
1912	return 0;
1913
1914clean_added:
1915	for (--i; i >= 2 * cur_qps; --i)
1916		teardown_vq(ndev, &ndev->vqs[i]);
1917
1918	ndev->cur_num_vqs = 2 * cur_qps;
1919
1920	return err;
1921}
1922
1923static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1924{
1925	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1926	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1927	struct mlx5_control_vq *cvq = &mvdev->cvq;
1928	struct virtio_net_ctrl_mq mq;
1929	size_t read;
1930	u16 newqps;
1931
1932	switch (cmd) {
1933	case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
1934		/* This mq feature check aligns with pre-existing userspace
1935		 * implementation.
1936		 *
1937		 * Without it, an untrusted driver could fake a multiqueue config
1938		 * request down to a non-mq device that may cause kernel to
1939		 * panic due to uninitialized resources for extra vqs. Even with
1940		 * a well behaving guest driver, it is not expected to allow
1941		 * changing the number of vqs on a non-mq device.
1942		 */
1943		if (!MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ))
1944			break;
1945
1946		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
1947		if (read != sizeof(mq))
1948			break;
1949
1950		newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
1951		if (newqps < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
1952		    newqps > ndev->rqt_size)
1953			break;
1954
1955		if (ndev->cur_num_vqs == 2 * newqps) {
1956			status = VIRTIO_NET_OK;
1957			break;
1958		}
1959
1960		if (!change_num_qps(mvdev, newqps))
1961			status = VIRTIO_NET_OK;
1962
1963		break;
1964	default:
1965		break;
1966	}
1967
1968	return status;
1969}
1970
1971static virtio_net_ctrl_ack handle_ctrl_vlan(struct mlx5_vdpa_dev *mvdev, u8 cmd)
1972{
1973	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
1974	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
1975	struct mlx5_control_vq *cvq = &mvdev->cvq;
1976	__virtio16 vlan;
1977	size_t read;
1978	u16 id;
1979
1980	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VLAN)))
1981		return status;
1982
1983	switch (cmd) {
1984	case VIRTIO_NET_CTRL_VLAN_ADD:
1985		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1986		if (read != sizeof(vlan))
1987			break;
1988
1989		id = mlx5vdpa16_to_cpu(mvdev, vlan);
1990		if (mac_vlan_add(ndev, ndev->config.mac, id, true))
1991			break;
1992
1993		status = VIRTIO_NET_OK;
1994		break;
1995	case VIRTIO_NET_CTRL_VLAN_DEL:
1996		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &vlan, sizeof(vlan));
1997		if (read != sizeof(vlan))
1998			break;
1999
2000		id = mlx5vdpa16_to_cpu(mvdev, vlan);
2001		mac_vlan_del(ndev, ndev->config.mac, id, true);
2002		status = VIRTIO_NET_OK;
2003		break;
2004	default:
2005		break;
2006	}
2007
2008	return status;
2009}
2010
2011static void mlx5_cvq_kick_handler(struct work_struct *work)
2012{
2013	virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
2014	struct virtio_net_ctrl_hdr ctrl;
2015	struct mlx5_vdpa_wq_ent *wqent;
2016	struct mlx5_vdpa_dev *mvdev;
2017	struct mlx5_control_vq *cvq;
2018	struct mlx5_vdpa_net *ndev;
2019	size_t read, write;
2020	int err;
2021
2022	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2023	mvdev = wqent->mvdev;
2024	ndev = to_mlx5_vdpa_ndev(mvdev);
2025	cvq = &mvdev->cvq;
2026
2027	down_write(&ndev->reslock);
2028
2029	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
2030		goto out;
2031
2032	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
2033		goto out;
2034
2035	if (!cvq->ready)
2036		goto out;
2037
2038	while (true) {
2039		err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
2040					   GFP_ATOMIC);
2041		if (err <= 0)
2042			break;
2043
2044		read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
2045		if (read != sizeof(ctrl))
2046			break;
2047
2048		cvq->received_desc++;
2049		switch (ctrl.class) {
2050		case VIRTIO_NET_CTRL_MAC:
2051			status = handle_ctrl_mac(mvdev, ctrl.cmd);
2052			break;
2053		case VIRTIO_NET_CTRL_MQ:
2054			status = handle_ctrl_mq(mvdev, ctrl.cmd);
2055			break;
2056		case VIRTIO_NET_CTRL_VLAN:
2057			status = handle_ctrl_vlan(mvdev, ctrl.cmd);
2058			break;
2059		default:
2060			break;
2061		}
2062
2063		/* Make sure data is written before advancing index */
2064		smp_wmb();
2065
2066		write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
2067		vringh_complete_iotlb(&cvq->vring, cvq->head, write);
2068		vringh_kiov_cleanup(&cvq->riov);
2069		vringh_kiov_cleanup(&cvq->wiov);
2070
2071		if (vringh_need_notify_iotlb(&cvq->vring))
2072			vringh_notify(&cvq->vring);
2073
2074		cvq->completed_desc++;
2075		queue_work(mvdev->wq, &wqent->work);
2076		break;
2077	}
2078
2079out:
2080	up_write(&ndev->reslock);
2081}
2082
2083static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
2084{
2085	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2086	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2087	struct mlx5_vdpa_virtqueue *mvq;
2088
2089	if (!is_index_valid(mvdev, idx))
2090		return;
2091
2092	if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
2093		if (!mvdev->wq || !mvdev->cvq.ready)
2094			return;
2095
2096		queue_work(mvdev->wq, &ndev->cvq_ent.work);
2097		return;
2098	}
2099
2100	mvq = &ndev->vqs[idx];
2101	if (unlikely(!mvq->ready))
2102		return;
2103
2104	iowrite16(idx, ndev->mvdev.res.kick_addr);
2105}
2106
2107static int mlx5_vdpa_set_vq_address(struct vdpa_device *vdev, u16 idx, u64 desc_area,
2108				    u64 driver_area, u64 device_area)
2109{
2110	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2111	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2112	struct mlx5_vdpa_virtqueue *mvq;
2113
2114	if (!is_index_valid(mvdev, idx))
2115		return -EINVAL;
2116
2117	if (is_ctrl_vq_idx(mvdev, idx)) {
2118		mvdev->cvq.desc_addr = desc_area;
2119		mvdev->cvq.device_addr = device_area;
2120		mvdev->cvq.driver_addr = driver_area;
2121		return 0;
2122	}
2123
2124	mvq = &ndev->vqs[idx];
2125	mvq->desc_addr = desc_area;
2126	mvq->device_addr = device_area;
2127	mvq->driver_addr = driver_area;
2128	return 0;
2129}
2130
2131static void mlx5_vdpa_set_vq_num(struct vdpa_device *vdev, u16 idx, u32 num)
2132{
2133	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2134	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2135	struct mlx5_vdpa_virtqueue *mvq;
2136
2137	if (!is_index_valid(mvdev, idx))
2138		return;
2139
2140        if (is_ctrl_vq_idx(mvdev, idx)) {
2141                struct mlx5_control_vq *cvq = &mvdev->cvq;
2142
2143                cvq->vring.vring.num = num;
2144                return;
2145        }
2146
2147	mvq = &ndev->vqs[idx];
2148	mvq->num_ent = num;
2149}
2150
2151static void mlx5_vdpa_set_vq_cb(struct vdpa_device *vdev, u16 idx, struct vdpa_callback *cb)
2152{
2153	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2154	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2155
2156	ndev->event_cbs[idx] = *cb;
2157	if (is_ctrl_vq_idx(mvdev, idx))
2158		mvdev->cvq.event_cb = *cb;
2159}
2160
2161static void mlx5_cvq_notify(struct vringh *vring)
2162{
2163	struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
2164
2165	if (!cvq->event_cb.callback)
2166		return;
2167
2168	cvq->event_cb.callback(cvq->event_cb.private);
2169}
2170
2171static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
2172{
2173	struct mlx5_control_vq *cvq = &mvdev->cvq;
2174
2175	cvq->ready = ready;
2176	if (!ready)
2177		return;
2178
2179	cvq->vring.notify = mlx5_cvq_notify;
2180}
2181
2182static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
2183{
2184	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2185	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2186	struct mlx5_vdpa_virtqueue *mvq;
2187	int err;
2188
2189	if (!mvdev->actual_features)
2190		return;
2191
2192	if (!is_index_valid(mvdev, idx))
2193		return;
2194
2195	if (is_ctrl_vq_idx(mvdev, idx)) {
2196		set_cvq_ready(mvdev, ready);
2197		return;
2198	}
2199
2200	mvq = &ndev->vqs[idx];
2201	if (!ready) {
2202		suspend_vq(ndev, mvq);
2203	} else {
2204		err = modify_virtqueue(ndev, mvq, MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY);
2205		if (err) {
2206			mlx5_vdpa_warn(mvdev, "modify VQ %d to ready failed (%d)\n", idx, err);
2207			ready = false;
2208		}
2209	}
2210
2211
2212	mvq->ready = ready;
2213}
2214
2215static bool mlx5_vdpa_get_vq_ready(struct vdpa_device *vdev, u16 idx)
2216{
2217	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2218	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2219
2220	if (!is_index_valid(mvdev, idx))
2221		return false;
2222
2223	if (is_ctrl_vq_idx(mvdev, idx))
2224		return mvdev->cvq.ready;
2225
2226	return ndev->vqs[idx].ready;
2227}
2228
2229static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
2230				  const struct vdpa_vq_state *state)
2231{
2232	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2233	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2234	struct mlx5_vdpa_virtqueue *mvq;
2235
2236	if (!is_index_valid(mvdev, idx))
2237		return -EINVAL;
2238
2239	if (is_ctrl_vq_idx(mvdev, idx)) {
2240		mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
2241		return 0;
2242	}
2243
2244	mvq = &ndev->vqs[idx];
2245	if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
2246		mlx5_vdpa_warn(mvdev, "can't modify available index\n");
2247		return -EINVAL;
2248	}
2249
2250	mvq->used_idx = state->split.avail_index;
2251	mvq->avail_idx = state->split.avail_index;
2252	return 0;
2253}
2254
2255static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa_vq_state *state)
2256{
2257	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2258	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2259	struct mlx5_vdpa_virtqueue *mvq;
2260	struct mlx5_virtq_attr attr;
2261	int err;
2262
2263	if (!is_index_valid(mvdev, idx))
2264		return -EINVAL;
2265
2266	if (is_ctrl_vq_idx(mvdev, idx)) {
2267		state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
2268		return 0;
2269	}
2270
2271	mvq = &ndev->vqs[idx];
2272	/* If the virtq object was destroyed, use the value saved at
2273	 * the last minute of suspend_vq. This caters for userspace
2274	 * that cares about emulating the index after vq is stopped.
2275	 */
2276	if (!mvq->initialized) {
2277		/* Firmware returns a wrong value for the available index.
2278		 * Since both values should be identical, we take the value of
2279		 * used_idx which is reported correctly.
2280		 */
2281		state->split.avail_index = mvq->used_idx;
2282		return 0;
2283	}
2284
2285	err = query_virtqueue(ndev, mvq, &attr);
2286	if (err) {
2287		mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
2288		return err;
2289	}
2290	state->split.avail_index = attr.used_index;
2291	return 0;
2292}
2293
2294static u32 mlx5_vdpa_get_vq_align(struct vdpa_device *vdev)
2295{
2296	return PAGE_SIZE;
2297}
2298
2299static u32 mlx5_vdpa_get_vq_group(struct vdpa_device *vdev, u16 idx)
2300{
2301	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2302
2303	if (is_ctrl_vq_idx(mvdev, idx))
2304		return MLX5_VDPA_CVQ_GROUP;
2305
2306	return MLX5_VDPA_DATAVQ_GROUP;
2307}
2308
2309static u64 mlx_to_vritio_features(u16 dev_features)
2310{
2311	u64 result = 0;
2312
2313	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_MRG_RXBUF))
2314		result |= BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
2315	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_ECN))
2316		result |= BIT_ULL(VIRTIO_NET_F_HOST_ECN);
2317	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_ECN))
2318		result |= BIT_ULL(VIRTIO_NET_F_GUEST_ECN);
2319	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO6))
2320		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO6);
2321	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_TSO4))
2322		result |= BIT_ULL(VIRTIO_NET_F_GUEST_TSO4);
2323	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_GUEST_CSUM))
2324		result |= BIT_ULL(VIRTIO_NET_F_GUEST_CSUM);
2325	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_CSUM))
2326		result |= BIT_ULL(VIRTIO_NET_F_CSUM);
2327	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO6))
2328		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO6);
2329	if (dev_features & BIT_ULL(MLX5_VIRTIO_NET_F_HOST_TSO4))
2330		result |= BIT_ULL(VIRTIO_NET_F_HOST_TSO4);
2331
2332	return result;
2333}
2334
2335static u64 get_supported_features(struct mlx5_core_dev *mdev)
2336{
2337	u64 mlx_vdpa_features = 0;
2338	u16 dev_features;
2339
2340	dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mdev, device_features_bits_mask);
2341	mlx_vdpa_features |= mlx_to_vritio_features(dev_features);
2342	if (MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_version_1_0))
2343		mlx_vdpa_features |= BIT_ULL(VIRTIO_F_VERSION_1);
2344	mlx_vdpa_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
2345	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
2346	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
2347	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MQ);
2348	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_STATUS);
2349	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MTU);
2350	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VLAN);
2351	mlx_vdpa_features |= BIT_ULL(VIRTIO_NET_F_MAC);
2352
2353	return mlx_vdpa_features;
2354}
2355
2356static u64 mlx5_vdpa_get_device_features(struct vdpa_device *vdev)
2357{
2358	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2359	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2360
2361	print_features(mvdev, ndev->mvdev.mlx_features, false);
2362	return ndev->mvdev.mlx_features;
2363}
2364
2365static int verify_driver_features(struct mlx5_vdpa_dev *mvdev, u64 features)
2366{
2367	/* Minimum features to expect */
2368	if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)))
2369		return -EOPNOTSUPP;
2370
2371	/* Double check features combination sent down by the driver.
2372	 * Fail invalid features due to absence of the depended feature.
2373	 *
2374	 * Per VIRTIO v1.1 specification, section 5.1.3.1 Feature bit
2375	 * requirements: "VIRTIO_NET_F_MQ Requires VIRTIO_NET_F_CTRL_VQ".
2376	 * By failing the invalid features sent down by untrusted drivers,
2377	 * we're assured the assumption made upon is_index_valid() and
2378	 * is_ctrl_vq_idx() will not be compromised.
2379	 */
2380	if ((features & (BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) ==
2381            BIT_ULL(VIRTIO_NET_F_MQ))
2382		return -EINVAL;
2383
2384	return 0;
2385}
2386
2387static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
2388{
2389	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2390	int err;
2391	int i;
2392
2393	for (i = 0; i < mvdev->max_vqs; i++) {
2394		err = setup_vq(ndev, &ndev->vqs[i]);
2395		if (err)
2396			goto err_vq;
2397	}
2398
2399	return 0;
2400
2401err_vq:
2402	for (--i; i >= 0; i--)
2403		teardown_vq(ndev, &ndev->vqs[i]);
2404
2405	return err;
2406}
2407
2408static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
2409{
2410	struct mlx5_vdpa_virtqueue *mvq;
2411	int i;
2412
2413	for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
2414		mvq = &ndev->vqs[i];
2415		if (!mvq->initialized)
2416			continue;
2417
2418		teardown_vq(ndev, mvq);
2419	}
2420}
2421
2422static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
2423{
2424	if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
2425		if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
2426			/* MQ supported. CVQ index is right above the last data virtqueue's */
2427			mvdev->max_idx = mvdev->max_vqs;
2428		} else {
2429			/* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
2430			 * CVQ gets index 2
2431			 */
2432			mvdev->max_idx = 2;
2433		}
2434	} else {
2435		/* Two data virtqueues only: one for rx and one for tx */
2436		mvdev->max_idx = 1;
2437	}
2438}
2439
2440static u8 query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
2441{
2442	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
2443	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
2444	int err;
2445
2446	MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE);
2447	MLX5_SET(query_vport_state_in, in, op_mod, opmod);
2448	MLX5_SET(query_vport_state_in, in, vport_number, vport);
2449	if (vport)
2450		MLX5_SET(query_vport_state_in, in, other_vport, 1);
2451
2452	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
2453	if (err)
2454		return 0;
2455
2456	return MLX5_GET(query_vport_state_out, out, state);
2457}
2458
2459static bool get_link_state(struct mlx5_vdpa_dev *mvdev)
2460{
2461	if (query_vport_state(mvdev->mdev, MLX5_VPORT_STATE_OP_MOD_VNIC_VPORT, 0) ==
2462	    VPORT_STATE_UP)
2463		return true;
2464
2465	return false;
2466}
2467
2468static void update_carrier(struct work_struct *work)
2469{
2470	struct mlx5_vdpa_wq_ent *wqent;
2471	struct mlx5_vdpa_dev *mvdev;
2472	struct mlx5_vdpa_net *ndev;
2473
2474	wqent = container_of(work, struct mlx5_vdpa_wq_ent, work);
2475	mvdev = wqent->mvdev;
2476	ndev = to_mlx5_vdpa_ndev(mvdev);
2477	if (get_link_state(mvdev))
2478		ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
2479	else
2480		ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
2481
2482	if (ndev->config_cb.callback)
2483		ndev->config_cb.callback(ndev->config_cb.private);
2484
2485	kfree(wqent);
2486}
2487
2488static int queue_link_work(struct mlx5_vdpa_net *ndev)
2489{
2490	struct mlx5_vdpa_wq_ent *wqent;
2491
2492	wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
2493	if (!wqent)
2494		return -ENOMEM;
2495
2496	wqent->mvdev = &ndev->mvdev;
2497	INIT_WORK(&wqent->work, update_carrier);
2498	queue_work(ndev->mvdev.wq, &wqent->work);
2499	return 0;
2500}
2501
2502static int event_handler(struct notifier_block *nb, unsigned long event, void *param)
2503{
2504	struct mlx5_vdpa_net *ndev = container_of(nb, struct mlx5_vdpa_net, nb);
2505	struct mlx5_eqe *eqe = param;
2506	int ret = NOTIFY_DONE;
2507
2508	if (event == MLX5_EVENT_TYPE_PORT_CHANGE) {
2509		switch (eqe->sub_type) {
2510		case MLX5_PORT_CHANGE_SUBTYPE_DOWN:
2511		case MLX5_PORT_CHANGE_SUBTYPE_ACTIVE:
2512			if (queue_link_work(ndev))
2513				return NOTIFY_DONE;
2514
2515			ret = NOTIFY_OK;
2516			break;
2517		default:
2518			return NOTIFY_DONE;
2519		}
2520		return ret;
2521	}
2522	return ret;
2523}
2524
2525static void register_link_notifier(struct mlx5_vdpa_net *ndev)
2526{
2527	if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_STATUS)))
2528		return;
2529
2530	ndev->nb.notifier_call = event_handler;
2531	mlx5_notifier_register(ndev->mvdev.mdev, &ndev->nb);
2532	ndev->nb_registered = true;
2533	queue_link_work(ndev);
2534}
2535
2536static void unregister_link_notifier(struct mlx5_vdpa_net *ndev)
2537{
2538	if (!ndev->nb_registered)
2539		return;
2540
2541	ndev->nb_registered = false;
2542	mlx5_notifier_unregister(ndev->mvdev.mdev, &ndev->nb);
2543	if (ndev->mvdev.wq)
2544		flush_workqueue(ndev->mvdev.wq);
2545}
2546
2547static int mlx5_vdpa_set_driver_features(struct vdpa_device *vdev, u64 features)
2548{
2549	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2550	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2551	int err;
2552
2553	print_features(mvdev, features, true);
2554
2555	err = verify_driver_features(mvdev, features);
2556	if (err)
2557		return err;
2558
2559	ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
2560	if (ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_MQ))
2561		ndev->rqt_size = mlx5vdpa16_to_cpu(mvdev, ndev->config.max_virtqueue_pairs);
2562	else
2563		ndev->rqt_size = 1;
2564
2565	/* Device must start with 1 queue pair, as per VIRTIO v1.2 spec, section
2566	 * 5.1.6.5.5 "Device operation in multiqueue mode":
2567	 *
2568	 * Multiqueue is disabled by default.
2569	 * The driver enables multiqueue by sending a command using class
2570	 * VIRTIO_NET_CTRL_MQ. The command selects the mode of multiqueue
2571	 * operation, as follows: ...
2572	 */
2573	ndev->cur_num_vqs = 2;
2574
2575	update_cvq_info(mvdev);
2576	return err;
2577}
2578
2579static void mlx5_vdpa_set_config_cb(struct vdpa_device *vdev, struct vdpa_callback *cb)
2580{
2581	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2582	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2583
2584	ndev->config_cb = *cb;
2585}
2586
2587#define MLX5_VDPA_MAX_VQ_ENTRIES 256
2588static u16 mlx5_vdpa_get_vq_num_max(struct vdpa_device *vdev)
2589{
2590	return MLX5_VDPA_MAX_VQ_ENTRIES;
2591}
2592
2593static u32 mlx5_vdpa_get_device_id(struct vdpa_device *vdev)
2594{
2595	return VIRTIO_ID_NET;
2596}
2597
2598static u32 mlx5_vdpa_get_vendor_id(struct vdpa_device *vdev)
2599{
2600	return PCI_VENDOR_ID_MELLANOX;
2601}
2602
2603static u8 mlx5_vdpa_get_status(struct vdpa_device *vdev)
2604{
2605	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2606	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2607
2608	print_status(mvdev, ndev->mvdev.status, false);
2609	return ndev->mvdev.status;
2610}
2611
2612static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
2613{
2614	struct mlx5_vq_restore_info *ri = &mvq->ri;
2615	struct mlx5_virtq_attr attr = {};
2616	int err;
2617
2618	if (mvq->initialized) {
2619		err = query_virtqueue(ndev, mvq, &attr);
2620		if (err)
2621			return err;
2622	}
2623
2624	ri->avail_index = attr.available_index;
2625	ri->used_index = attr.used_index;
2626	ri->ready = mvq->ready;
2627	ri->num_ent = mvq->num_ent;
2628	ri->desc_addr = mvq->desc_addr;
2629	ri->device_addr = mvq->device_addr;
2630	ri->driver_addr = mvq->driver_addr;
2631	ri->map = mvq->map;
2632	ri->restore = true;
2633	return 0;
2634}
2635
2636static int save_channels_info(struct mlx5_vdpa_net *ndev)
2637{
2638	int i;
2639
2640	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2641		memset(&ndev->vqs[i].ri, 0, sizeof(ndev->vqs[i].ri));
2642		save_channel_info(ndev, &ndev->vqs[i]);
2643	}
2644	return 0;
2645}
2646
2647static void mlx5_clear_vqs(struct mlx5_vdpa_net *ndev)
2648{
2649	int i;
2650
2651	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2652		memset(&ndev->vqs[i], 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
2653}
2654
2655static void restore_channels_info(struct mlx5_vdpa_net *ndev)
2656{
2657	struct mlx5_vdpa_virtqueue *mvq;
2658	struct mlx5_vq_restore_info *ri;
2659	int i;
2660
2661	mlx5_clear_vqs(ndev);
2662	init_mvqs(ndev);
2663	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
2664		mvq = &ndev->vqs[i];
2665		ri = &mvq->ri;
2666		if (!ri->restore)
2667			continue;
2668
2669		mvq->avail_idx = ri->avail_index;
2670		mvq->used_idx = ri->used_index;
2671		mvq->ready = ri->ready;
2672		mvq->num_ent = ri->num_ent;
2673		mvq->desc_addr = ri->desc_addr;
2674		mvq->device_addr = ri->device_addr;
2675		mvq->driver_addr = ri->driver_addr;
2676		mvq->map = ri->map;
2677	}
2678}
2679
2680static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev,
2681				struct vhost_iotlb *iotlb, unsigned int asid)
2682{
2683	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2684	int err;
2685
2686	suspend_vqs(ndev);
2687	err = save_channels_info(ndev);
2688	if (err)
2689		goto err_mr;
2690
2691	teardown_driver(ndev);
2692	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2693	err = mlx5_vdpa_create_mr(mvdev, iotlb, asid);
2694	if (err)
2695		goto err_mr;
2696
2697	if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK) || mvdev->suspended)
2698		goto err_mr;
2699
2700	restore_channels_info(ndev);
2701	err = setup_driver(mvdev);
2702	if (err)
2703		goto err_setup;
2704
2705	return 0;
2706
2707err_setup:
2708	mlx5_vdpa_destroy_mr_asid(mvdev, asid);
2709err_mr:
2710	return err;
2711}
2712
2713/* reslock must be held for this function */
2714static int setup_driver(struct mlx5_vdpa_dev *mvdev)
2715{
2716	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2717	int err;
2718
2719	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2720
2721	if (ndev->setup) {
2722		mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
2723		err = 0;
2724		goto out;
2725	}
2726	mlx5_vdpa_add_debugfs(ndev);
2727
2728	err = read_umem_params(ndev);
2729	if (err)
2730		goto err_setup;
2731
2732	err = setup_virtqueues(mvdev);
2733	if (err) {
2734		mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
2735		goto err_setup;
2736	}
2737
2738	err = create_rqt(ndev);
2739	if (err) {
2740		mlx5_vdpa_warn(mvdev, "create_rqt\n");
2741		goto err_rqt;
2742	}
2743
2744	err = create_tir(ndev);
2745	if (err) {
2746		mlx5_vdpa_warn(mvdev, "create_tir\n");
2747		goto err_tir;
2748	}
2749
2750	err = setup_steering(ndev);
2751	if (err) {
2752		mlx5_vdpa_warn(mvdev, "setup_steering\n");
2753		goto err_fwd;
2754	}
2755	ndev->setup = true;
2756
2757	return 0;
2758
2759err_fwd:
2760	destroy_tir(ndev);
2761err_tir:
2762	destroy_rqt(ndev);
2763err_rqt:
2764	teardown_virtqueues(ndev);
2765err_setup:
2766	mlx5_vdpa_remove_debugfs(ndev);
2767out:
2768	return err;
2769}
2770
2771/* reslock must be held for this function */
2772static void teardown_driver(struct mlx5_vdpa_net *ndev)
2773{
2774
2775	WARN_ON(!rwsem_is_locked(&ndev->reslock));
2776
2777	if (!ndev->setup)
2778		return;
2779
2780	mlx5_vdpa_remove_debugfs(ndev);
2781	teardown_steering(ndev);
2782	destroy_tir(ndev);
2783	destroy_rqt(ndev);
2784	teardown_virtqueues(ndev);
2785	ndev->setup = false;
2786}
2787
2788static void clear_vqs_ready(struct mlx5_vdpa_net *ndev)
2789{
2790	int i;
2791
2792	for (i = 0; i < ndev->mvdev.max_vqs; i++)
2793		ndev->vqs[i].ready = false;
2794
2795	ndev->mvdev.cvq.ready = false;
2796}
2797
2798static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
2799{
2800	struct mlx5_control_vq *cvq = &mvdev->cvq;
2801	int err = 0;
2802
2803	if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
2804		u16 idx = cvq->vring.last_avail_idx;
2805
2806		err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
2807					cvq->vring.vring.num, false,
2808					(struct vring_desc *)(uintptr_t)cvq->desc_addr,
2809					(struct vring_avail *)(uintptr_t)cvq->driver_addr,
2810					(struct vring_used *)(uintptr_t)cvq->device_addr);
2811
2812		if (!err)
2813			cvq->vring.last_avail_idx = cvq->vring.last_used_idx = idx;
2814	}
2815	return err;
2816}
2817
2818static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
2819{
2820	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2821	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2822	int err;
2823
2824	print_status(mvdev, status, true);
2825
2826	down_write(&ndev->reslock);
2827
2828	if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
2829		if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
2830			err = setup_cvq_vring(mvdev);
2831			if (err) {
2832				mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
2833				goto err_setup;
2834			}
2835			register_link_notifier(ndev);
2836			err = setup_driver(mvdev);
2837			if (err) {
2838				mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
2839				goto err_driver;
2840			}
2841		} else {
2842			mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n");
2843			goto err_clear;
2844		}
2845	}
2846
2847	ndev->mvdev.status = status;
2848	up_write(&ndev->reslock);
2849	return;
2850
2851err_driver:
2852	unregister_link_notifier(ndev);
2853err_setup:
2854	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2855	ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
2856err_clear:
2857	up_write(&ndev->reslock);
2858}
2859
2860static void init_group_to_asid_map(struct mlx5_vdpa_dev *mvdev)
2861{
2862	int i;
2863
2864	/* default mapping all groups are mapped to asid 0 */
2865	for (i = 0; i < MLX5_VDPA_NUMVQ_GROUPS; i++)
2866		mvdev->group2asid[i] = 0;
2867}
2868
2869static int mlx5_vdpa_reset(struct vdpa_device *vdev)
2870{
2871	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2872	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2873
2874	print_status(mvdev, 0, true);
2875	mlx5_vdpa_info(mvdev, "performing device reset\n");
2876
2877	down_write(&ndev->reslock);
2878	unregister_link_notifier(ndev);
2879	teardown_driver(ndev);
2880	clear_vqs_ready(ndev);
2881	mlx5_vdpa_destroy_mr(&ndev->mvdev);
2882	ndev->mvdev.status = 0;
2883	ndev->mvdev.suspended = false;
2884	ndev->cur_num_vqs = 0;
2885	ndev->mvdev.cvq.received_desc = 0;
2886	ndev->mvdev.cvq.completed_desc = 0;
2887	memset(ndev->event_cbs, 0, sizeof(*ndev->event_cbs) * (mvdev->max_vqs + 1));
2888	ndev->mvdev.actual_features = 0;
2889	init_group_to_asid_map(mvdev);
2890	++mvdev->generation;
2891
2892	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
2893		if (mlx5_vdpa_create_mr(mvdev, NULL, 0))
2894			mlx5_vdpa_warn(mvdev, "create MR failed\n");
2895	}
2896	up_write(&ndev->reslock);
2897
2898	return 0;
2899}
2900
2901static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
2902{
2903	return sizeof(struct virtio_net_config);
2904}
2905
2906static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
2907				 unsigned int len)
2908{
2909	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2910	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2911
2912	if (offset + len <= sizeof(struct virtio_net_config))
2913		memcpy(buf, (u8 *)&ndev->config + offset, len);
2914}
2915
2916static void mlx5_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, const void *buf,
2917				 unsigned int len)
2918{
2919	/* not supported */
2920}
2921
2922static u32 mlx5_vdpa_get_generation(struct vdpa_device *vdev)
2923{
2924	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2925
2926	return mvdev->generation;
2927}
2928
2929static int set_map_data(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
2930			unsigned int asid)
2931{
2932	bool change_map;
2933	int err;
2934
2935	err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map, asid);
2936	if (err) {
2937		mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err);
2938		return err;
2939	}
2940
2941	if (change_map)
2942		err = mlx5_vdpa_change_map(mvdev, iotlb, asid);
2943
2944	return err;
2945}
2946
2947static int mlx5_vdpa_set_map(struct vdpa_device *vdev, unsigned int asid,
2948			     struct vhost_iotlb *iotlb)
2949{
2950	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2951	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
2952	int err = -EINVAL;
2953
2954	down_write(&ndev->reslock);
2955	err = set_map_data(mvdev, iotlb, asid);
2956	up_write(&ndev->reslock);
2957	return err;
2958}
2959
2960static struct device *mlx5_get_vq_dma_dev(struct vdpa_device *vdev, u16 idx)
2961{
2962	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2963
2964	if (is_ctrl_vq_idx(mvdev, idx))
2965		return &vdev->dev;
2966
2967	return mvdev->vdev.dma_dev;
2968}
2969
2970static void free_irqs(struct mlx5_vdpa_net *ndev)
2971{
2972	struct mlx5_vdpa_irq_pool_entry *ent;
2973	int i;
2974
2975	if (!msix_mode_supported(&ndev->mvdev))
2976		return;
2977
2978	if (!ndev->irqp.entries)
2979		return;
2980
2981	for (i = ndev->irqp.num_ent - 1; i >= 0; i--) {
2982		ent = ndev->irqp.entries + i;
2983		if (ent->map.virq)
2984			pci_msix_free_irq(ndev->mvdev.mdev->pdev, ent->map);
2985	}
2986	kfree(ndev->irqp.entries);
2987}
2988
2989static void mlx5_vdpa_free(struct vdpa_device *vdev)
2990{
2991	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
2992	struct mlx5_core_dev *pfmdev;
2993	struct mlx5_vdpa_net *ndev;
2994
2995	ndev = to_mlx5_vdpa_ndev(mvdev);
2996
2997	free_resources(ndev);
2998	mlx5_vdpa_destroy_mr(mvdev);
2999	if (!is_zero_ether_addr(ndev->config.mac)) {
3000		pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
3001		mlx5_mpfs_del_mac(pfmdev, ndev->config.mac);
3002	}
3003	mlx5_vdpa_free_resources(&ndev->mvdev);
3004	free_irqs(ndev);
3005	kfree(ndev->event_cbs);
3006	kfree(ndev->vqs);
3007}
3008
3009static struct vdpa_notification_area mlx5_get_vq_notification(struct vdpa_device *vdev, u16 idx)
3010{
3011	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3012	struct vdpa_notification_area ret = {};
3013	struct mlx5_vdpa_net *ndev;
3014	phys_addr_t addr;
3015
3016	if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
3017		return ret;
3018
3019	/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
3020	 * notification to avoid the risk of mapping pages that contain BAR of more
3021	 * than one SF
3022	 */
3023	if (MLX5_CAP_GEN(mvdev->mdev, log_min_sf_size) + 12 < PAGE_SHIFT)
3024		return ret;
3025
3026	ndev = to_mlx5_vdpa_ndev(mvdev);
3027	addr = (phys_addr_t)ndev->mvdev.res.phys_kick_addr;
3028	ret.addr = addr;
3029	ret.size = PAGE_SIZE;
3030	return ret;
3031}
3032
3033static int mlx5_get_vq_irq(struct vdpa_device *vdev, u16 idx)
3034{
3035	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3036	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3037	struct mlx5_vdpa_virtqueue *mvq;
3038
3039	if (!is_index_valid(mvdev, idx))
3040		return -EINVAL;
3041
3042	if (is_ctrl_vq_idx(mvdev, idx))
3043		return -EOPNOTSUPP;
3044
3045	mvq = &ndev->vqs[idx];
3046	if (!mvq->map.virq)
3047		return -EOPNOTSUPP;
3048
3049	return mvq->map.virq;
3050}
3051
3052static u64 mlx5_vdpa_get_driver_features(struct vdpa_device *vdev)
3053{
3054	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3055
3056	return mvdev->actual_features;
3057}
3058
3059static int counter_set_query(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq,
3060			     u64 *received_desc, u64 *completed_desc)
3061{
3062	u32 in[MLX5_ST_SZ_DW(query_virtio_q_counters_in)] = {};
3063	u32 out[MLX5_ST_SZ_DW(query_virtio_q_counters_out)] = {};
3064	void *cmd_hdr;
3065	void *ctx;
3066	int err;
3067
3068	if (!counters_supported(&ndev->mvdev))
3069		return -EOPNOTSUPP;
3070
3071	if (mvq->fw_state != MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY)
3072		return -EAGAIN;
3073
3074	cmd_hdr = MLX5_ADDR_OF(query_virtio_q_counters_in, in, hdr);
3075
3076	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, opcode, MLX5_CMD_OP_QUERY_GENERAL_OBJECT);
3077	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_type, MLX5_OBJ_TYPE_VIRTIO_Q_COUNTERS);
3078	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, uid, ndev->mvdev.res.uid);
3079	MLX5_SET(general_obj_in_cmd_hdr, cmd_hdr, obj_id, mvq->counter_set_id);
3080
3081	err = mlx5_cmd_exec(ndev->mvdev.mdev, in, sizeof(in), out, sizeof(out));
3082	if (err)
3083		return err;
3084
3085	ctx = MLX5_ADDR_OF(query_virtio_q_counters_out, out, counters);
3086	*received_desc = MLX5_GET64(virtio_q_counters, ctx, received_desc);
3087	*completed_desc = MLX5_GET64(virtio_q_counters, ctx, completed_desc);
3088	return 0;
3089}
3090
3091static int mlx5_vdpa_get_vendor_vq_stats(struct vdpa_device *vdev, u16 idx,
3092					 struct sk_buff *msg,
3093					 struct netlink_ext_ack *extack)
3094{
3095	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3096	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3097	struct mlx5_vdpa_virtqueue *mvq;
3098	struct mlx5_control_vq *cvq;
3099	u64 received_desc;
3100	u64 completed_desc;
3101	int err = 0;
3102
3103	down_read(&ndev->reslock);
3104	if (!is_index_valid(mvdev, idx)) {
3105		NL_SET_ERR_MSG_MOD(extack, "virtqueue index is not valid");
3106		err = -EINVAL;
3107		goto out_err;
3108	}
3109
3110	if (idx == ctrl_vq_idx(mvdev)) {
3111		cvq = &mvdev->cvq;
3112		received_desc = cvq->received_desc;
3113		completed_desc = cvq->completed_desc;
3114		goto out;
3115	}
3116
3117	mvq = &ndev->vqs[idx];
3118	err = counter_set_query(ndev, mvq, &received_desc, &completed_desc);
3119	if (err) {
3120		NL_SET_ERR_MSG_MOD(extack, "failed to query hardware");
3121		goto out_err;
3122	}
3123
3124out:
3125	err = -EMSGSIZE;
3126	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "received_desc"))
3127		goto out_err;
3128
3129	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, received_desc,
3130			      VDPA_ATTR_PAD))
3131		goto out_err;
3132
3133	if (nla_put_string(msg, VDPA_ATTR_DEV_VENDOR_ATTR_NAME, "completed_desc"))
3134		goto out_err;
3135
3136	if (nla_put_u64_64bit(msg, VDPA_ATTR_DEV_VENDOR_ATTR_VALUE, completed_desc,
3137			      VDPA_ATTR_PAD))
3138		goto out_err;
3139
3140	err = 0;
3141out_err:
3142	up_read(&ndev->reslock);
3143	return err;
3144}
3145
3146static void mlx5_vdpa_cvq_suspend(struct mlx5_vdpa_dev *mvdev)
3147{
3148	struct mlx5_control_vq *cvq;
3149
3150	if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
3151		return;
3152
3153	cvq = &mvdev->cvq;
3154	cvq->ready = false;
3155}
3156
3157static int mlx5_vdpa_suspend(struct vdpa_device *vdev)
3158{
3159	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3160	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3161	struct mlx5_vdpa_virtqueue *mvq;
3162	int i;
3163
3164	mlx5_vdpa_info(mvdev, "suspending device\n");
3165
3166	down_write(&ndev->reslock);
3167	unregister_link_notifier(ndev);
3168	for (i = 0; i < ndev->cur_num_vqs; i++) {
3169		mvq = &ndev->vqs[i];
3170		suspend_vq(ndev, mvq);
3171	}
3172	mlx5_vdpa_cvq_suspend(mvdev);
3173	mvdev->suspended = true;
3174	up_write(&ndev->reslock);
3175	return 0;
3176}
3177
3178static int mlx5_set_group_asid(struct vdpa_device *vdev, u32 group,
3179			       unsigned int asid)
3180{
3181	struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
3182
3183	if (group >= MLX5_VDPA_NUMVQ_GROUPS)
3184		return -EINVAL;
3185
3186	mvdev->group2asid[group] = asid;
3187	return 0;
3188}
3189
3190static const struct vdpa_config_ops mlx5_vdpa_ops = {
3191	.set_vq_address = mlx5_vdpa_set_vq_address,
3192	.set_vq_num = mlx5_vdpa_set_vq_num,
3193	.kick_vq = mlx5_vdpa_kick_vq,
3194	.set_vq_cb = mlx5_vdpa_set_vq_cb,
3195	.set_vq_ready = mlx5_vdpa_set_vq_ready,
3196	.get_vq_ready = mlx5_vdpa_get_vq_ready,
3197	.set_vq_state = mlx5_vdpa_set_vq_state,
3198	.get_vq_state = mlx5_vdpa_get_vq_state,
3199	.get_vendor_vq_stats = mlx5_vdpa_get_vendor_vq_stats,
3200	.get_vq_notification = mlx5_get_vq_notification,
3201	.get_vq_irq = mlx5_get_vq_irq,
3202	.get_vq_align = mlx5_vdpa_get_vq_align,
3203	.get_vq_group = mlx5_vdpa_get_vq_group,
3204	.get_device_features = mlx5_vdpa_get_device_features,
3205	.set_driver_features = mlx5_vdpa_set_driver_features,
3206	.get_driver_features = mlx5_vdpa_get_driver_features,
3207	.set_config_cb = mlx5_vdpa_set_config_cb,
3208	.get_vq_num_max = mlx5_vdpa_get_vq_num_max,
3209	.get_device_id = mlx5_vdpa_get_device_id,
3210	.get_vendor_id = mlx5_vdpa_get_vendor_id,
3211	.get_status = mlx5_vdpa_get_status,
3212	.set_status = mlx5_vdpa_set_status,
3213	.reset = mlx5_vdpa_reset,
3214	.get_config_size = mlx5_vdpa_get_config_size,
3215	.get_config = mlx5_vdpa_get_config,
3216	.set_config = mlx5_vdpa_set_config,
3217	.get_generation = mlx5_vdpa_get_generation,
3218	.set_map = mlx5_vdpa_set_map,
3219	.set_group_asid = mlx5_set_group_asid,
3220	.get_vq_dma_dev = mlx5_get_vq_dma_dev,
3221	.free = mlx5_vdpa_free,
3222	.suspend = mlx5_vdpa_suspend,
3223};
3224
3225static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
3226{
3227	u16 hw_mtu;
3228	int err;
3229
3230	err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
3231	if (err)
3232		return err;
3233
3234	*mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
3235	return 0;
3236}
3237
3238static int alloc_resources(struct mlx5_vdpa_net *ndev)
3239{
3240	struct mlx5_vdpa_net_resources *res = &ndev->res;
3241	int err;
3242
3243	if (res->valid) {
3244		mlx5_vdpa_warn(&ndev->mvdev, "resources already allocated\n");
3245		return -EEXIST;
3246	}
3247
3248	err = mlx5_vdpa_alloc_transport_domain(&ndev->mvdev, &res->tdn);
3249	if (err)
3250		return err;
3251
3252	err = create_tis(ndev);
3253	if (err)
3254		goto err_tis;
3255
3256	res->valid = true;
3257
3258	return 0;
3259
3260err_tis:
3261	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3262	return err;
3263}
3264
3265static void free_resources(struct mlx5_vdpa_net *ndev)
3266{
3267	struct mlx5_vdpa_net_resources *res = &ndev->res;
3268
3269	if (!res->valid)
3270		return;
3271
3272	destroy_tis(ndev);
3273	mlx5_vdpa_dealloc_transport_domain(&ndev->mvdev, res->tdn);
3274	res->valid = false;
3275}
3276
3277static void init_mvqs(struct mlx5_vdpa_net *ndev)
3278{
3279	struct mlx5_vdpa_virtqueue *mvq;
3280	int i;
3281
3282	for (i = 0; i < ndev->mvdev.max_vqs; ++i) {
3283		mvq = &ndev->vqs[i];
3284		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3285		mvq->index = i;
3286		mvq->ndev = ndev;
3287		mvq->fwqp.fw = true;
3288		mvq->fw_state = MLX5_VIRTIO_NET_Q_OBJECT_NONE;
3289	}
3290	for (; i < ndev->mvdev.max_vqs; i++) {
3291		mvq = &ndev->vqs[i];
3292		memset(mvq, 0, offsetof(struct mlx5_vdpa_virtqueue, ri));
3293		mvq->index = i;
3294		mvq->ndev = ndev;
3295	}
3296}
3297
3298struct mlx5_vdpa_mgmtdev {
3299	struct vdpa_mgmt_dev mgtdev;
3300	struct mlx5_adev *madev;
3301	struct mlx5_vdpa_net *ndev;
3302};
3303
3304static int config_func_mtu(struct mlx5_core_dev *mdev, u16 mtu)
3305{
3306	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
3307	void *in;
3308	int err;
3309
3310	in = kvzalloc(inlen, GFP_KERNEL);
3311	if (!in)
3312		return -ENOMEM;
3313
3314	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
3315	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu,
3316		 mtu + MLX5V_ETH_HARD_MTU);
3317	MLX5_SET(modify_nic_vport_context_in, in, opcode,
3318		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
3319
3320	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
3321
3322	kvfree(in);
3323	return err;
3324}
3325
3326static void allocate_irqs(struct mlx5_vdpa_net *ndev)
3327{
3328	struct mlx5_vdpa_irq_pool_entry *ent;
3329	int i;
3330
3331	if (!msix_mode_supported(&ndev->mvdev))
3332		return;
3333
3334	if (!ndev->mvdev.mdev->pdev)
3335		return;
3336
3337	ndev->irqp.entries = kcalloc(ndev->mvdev.max_vqs, sizeof(*ndev->irqp.entries), GFP_KERNEL);
3338	if (!ndev->irqp.entries)
3339		return;
3340
3341
3342	for (i = 0; i < ndev->mvdev.max_vqs; i++) {
3343		ent = ndev->irqp.entries + i;
3344		snprintf(ent->name, MLX5_VDPA_IRQ_NAME_LEN, "%s-vq-%d",
3345			 dev_name(&ndev->mvdev.vdev.dev), i);
3346		ent->map = pci_msix_alloc_irq_at(ndev->mvdev.mdev->pdev, MSI_ANY_INDEX, NULL);
3347		if (!ent->map.virq)
3348			return;
3349
3350		ndev->irqp.num_ent++;
3351	}
3352}
3353
3354static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name,
3355			     const struct vdpa_dev_set_config *add_config)
3356{
3357	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3358	struct virtio_net_config *config;
3359	struct mlx5_core_dev *pfmdev;
3360	struct mlx5_vdpa_dev *mvdev;
3361	struct mlx5_vdpa_net *ndev;
3362	struct mlx5_core_dev *mdev;
3363	u64 device_features;
3364	u32 max_vqs;
3365	u16 mtu;
3366	int err;
3367
3368	if (mgtdev->ndev)
3369		return -ENOSPC;
3370
3371	mdev = mgtdev->madev->mdev;
3372	device_features = mgtdev->mgtdev.supported_features;
3373	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) {
3374		if (add_config->device_features & ~device_features) {
3375			dev_warn(mdev->device,
3376				 "The provisioned features 0x%llx are not supported by this device with features 0x%llx\n",
3377				 add_config->device_features, device_features);
3378			return -EINVAL;
3379		}
3380		device_features &= add_config->device_features;
3381	} else {
3382		device_features &= ~BIT_ULL(VIRTIO_NET_F_MRG_RXBUF);
3383	}
3384	if (!(device_features & BIT_ULL(VIRTIO_F_VERSION_1) &&
3385	      device_features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) {
3386		dev_warn(mdev->device,
3387			 "Must provision minimum features 0x%llx for this device",
3388			 BIT_ULL(VIRTIO_F_VERSION_1) | BIT_ULL(VIRTIO_F_ACCESS_PLATFORM));
3389		return -EOPNOTSUPP;
3390	}
3391
3392	if (!(MLX5_CAP_DEV_VDPA_EMULATION(mdev, virtio_queue_type) &
3393	    MLX5_VIRTIO_EMULATION_CAP_VIRTIO_QUEUE_TYPE_SPLIT)) {
3394		dev_warn(mdev->device, "missing support for split virtqueues\n");
3395		return -EOPNOTSUPP;
3396	}
3397
3398	max_vqs = min_t(int, MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues),
3399			1 << MLX5_CAP_GEN(mdev, log_max_rqt_size));
3400	if (max_vqs < 2) {
3401		dev_warn(mdev->device,
3402			 "%d virtqueues are supported. At least 2 are required\n",
3403			 max_vqs);
3404		return -EAGAIN;
3405	}
3406
3407	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP)) {
3408		if (add_config->net.max_vq_pairs > max_vqs / 2)
3409			return -EINVAL;
3410		max_vqs = min_t(u32, max_vqs, 2 * add_config->net.max_vq_pairs);
3411	} else {
3412		max_vqs = 2;
3413	}
3414
3415	ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
3416				 MLX5_VDPA_NUMVQ_GROUPS, MLX5_VDPA_NUM_AS, name, false);
3417	if (IS_ERR(ndev))
3418		return PTR_ERR(ndev);
3419
3420	ndev->mvdev.max_vqs = max_vqs;
3421	mvdev = &ndev->mvdev;
3422	mvdev->mdev = mdev;
3423
3424	ndev->vqs = kcalloc(max_vqs, sizeof(*ndev->vqs), GFP_KERNEL);
3425	ndev->event_cbs = kcalloc(max_vqs + 1, sizeof(*ndev->event_cbs), GFP_KERNEL);
3426	if (!ndev->vqs || !ndev->event_cbs) {
3427		err = -ENOMEM;
3428		goto err_alloc;
3429	}
3430
3431	init_mvqs(ndev);
3432	allocate_irqs(ndev);
3433	init_rwsem(&ndev->reslock);
3434	config = &ndev->config;
3435
3436	if (add_config->mask & BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU)) {
3437		err = config_func_mtu(mdev, add_config->net.mtu);
3438		if (err)
3439			goto err_alloc;
3440	}
3441
3442	if (device_features & BIT_ULL(VIRTIO_NET_F_MTU)) {
3443		err = query_mtu(mdev, &mtu);
3444		if (err)
3445			goto err_alloc;
3446
3447		ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, mtu);
3448	}
3449
3450	if (device_features & BIT_ULL(VIRTIO_NET_F_STATUS)) {
3451		if (get_link_state(mvdev))
3452			ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
3453		else
3454			ndev->config.status &= cpu_to_mlx5vdpa16(mvdev, ~VIRTIO_NET_S_LINK_UP);
3455	}
3456
3457	if (add_config->mask & (1 << VDPA_ATTR_DEV_NET_CFG_MACADDR)) {
3458		memcpy(ndev->config.mac, add_config->net.mac, ETH_ALEN);
3459	/* No bother setting mac address in config if not going to provision _F_MAC */
3460	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0 ||
3461		   device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3462		err = mlx5_query_nic_vport_mac_address(mdev, 0, 0, config->mac);
3463		if (err)
3464			goto err_alloc;
3465	}
3466
3467	if (!is_zero_ether_addr(config->mac)) {
3468		pfmdev = pci_get_drvdata(pci_physfn(mdev->pdev));
3469		err = mlx5_mpfs_add_mac(pfmdev, config->mac);
3470		if (err)
3471			goto err_alloc;
3472	} else if ((add_config->mask & BIT_ULL(VDPA_ATTR_DEV_FEATURES)) == 0) {
3473		/*
3474		 * We used to clear _F_MAC feature bit if seeing
3475		 * zero mac address when device features are not
3476		 * specifically provisioned. Keep the behaviour
3477		 * so old scripts do not break.
3478		 */
3479		device_features &= ~BIT_ULL(VIRTIO_NET_F_MAC);
3480	} else if (device_features & BIT_ULL(VIRTIO_NET_F_MAC)) {
3481		/* Don't provision zero mac address for _F_MAC */
3482		mlx5_vdpa_warn(&ndev->mvdev,
3483			       "No mac address provisioned?\n");
3484		err = -EINVAL;
3485		goto err_alloc;
3486	}
3487
3488	if (device_features & BIT_ULL(VIRTIO_NET_F_MQ))
3489		config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, max_vqs / 2);
3490
3491	ndev->mvdev.mlx_features = device_features;
3492	mvdev->vdev.dma_dev = &mdev->pdev->dev;
3493	err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
3494	if (err)
3495		goto err_mpfs;
3496
3497	if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
3498		err = mlx5_vdpa_create_mr(mvdev, NULL, 0);
3499		if (err)
3500			goto err_res;
3501	}
3502
3503	err = alloc_resources(ndev);
3504	if (err)
3505		goto err_mr;
3506
3507	ndev->cvq_ent.mvdev = mvdev;
3508	INIT_WORK(&ndev->cvq_ent.work, mlx5_cvq_kick_handler);
3509	mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_wq");
3510	if (!mvdev->wq) {
3511		err = -ENOMEM;
3512		goto err_res2;
3513	}
3514
3515	mvdev->vdev.mdev = &mgtdev->mgtdev;
3516	err = _vdpa_register_device(&mvdev->vdev, max_vqs + 1);
3517	if (err)
3518		goto err_reg;
3519
3520	mgtdev->ndev = ndev;
3521	return 0;
3522
3523err_reg:
3524	destroy_workqueue(mvdev->wq);
3525err_res2:
3526	free_resources(ndev);
3527err_mr:
3528	mlx5_vdpa_destroy_mr(mvdev);
3529err_res:
3530	mlx5_vdpa_free_resources(&ndev->mvdev);
3531err_mpfs:
3532	if (!is_zero_ether_addr(config->mac))
3533		mlx5_mpfs_del_mac(pfmdev, config->mac);
3534err_alloc:
3535	put_device(&mvdev->vdev.dev);
3536	return err;
3537}
3538
3539static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
3540{
3541	struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
3542	struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
3543	struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
3544	struct workqueue_struct *wq;
3545
3546	unregister_link_notifier(ndev);
3547	_vdpa_unregister_device(dev);
3548	wq = mvdev->wq;
3549	mvdev->wq = NULL;
3550	destroy_workqueue(wq);
3551	mgtdev->ndev = NULL;
3552}
3553
3554static const struct vdpa_mgmtdev_ops mdev_ops = {
3555	.dev_add = mlx5_vdpa_dev_add,
3556	.dev_del = mlx5_vdpa_dev_del,
3557};
3558
3559static struct virtio_device_id id_table[] = {
3560	{ VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
3561	{ 0 },
3562};
3563
3564static int mlx5v_probe(struct auxiliary_device *adev,
3565		       const struct auxiliary_device_id *id)
3566
3567{
3568	struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
3569	struct mlx5_core_dev *mdev = madev->mdev;
3570	struct mlx5_vdpa_mgmtdev *mgtdev;
3571	int err;
3572
3573	mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
3574	if (!mgtdev)
3575		return -ENOMEM;
3576
3577	mgtdev->mgtdev.ops = &mdev_ops;
3578	mgtdev->mgtdev.device = mdev->device;
3579	mgtdev->mgtdev.id_table = id_table;
3580	mgtdev->mgtdev.config_attr_mask = BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MACADDR) |
3581					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MAX_VQP) |
3582					  BIT_ULL(VDPA_ATTR_DEV_NET_CFG_MTU) |
3583					  BIT_ULL(VDPA_ATTR_DEV_FEATURES);
3584	mgtdev->mgtdev.max_supported_vqs =
3585		MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues) + 1;
3586	mgtdev->mgtdev.supported_features = get_supported_features(mdev);
3587	mgtdev->madev = madev;
3588
3589	err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
3590	if (err)
3591		goto reg_err;
3592
3593	auxiliary_set_drvdata(adev, mgtdev);
3594
3595	return 0;
3596
3597reg_err:
3598	kfree(mgtdev);
3599	return err;
3600}
3601
3602static void mlx5v_remove(struct auxiliary_device *adev)
3603{
3604	struct mlx5_vdpa_mgmtdev *mgtdev;
3605
3606	mgtdev = auxiliary_get_drvdata(adev);
3607	vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
3608	kfree(mgtdev);
3609}
3610
3611static const struct auxiliary_device_id mlx5v_id_table[] = {
3612	{ .name = MLX5_ADEV_NAME ".vnet", },
3613	{},
3614};
3615
3616MODULE_DEVICE_TABLE(auxiliary, mlx5v_id_table);
3617
3618static struct auxiliary_driver mlx5v_driver = {
3619	.name = "vnet",
3620	.probe = mlx5v_probe,
3621	.remove = mlx5v_remove,
3622	.id_table = mlx5v_id_table,
3623};
3624
3625module_auxiliary_driver(mlx5v_driver);
3626