1// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2/*
3 * Copyright 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
4 */
5
6#include <linux/dma-buf.h>
7#include <linux/dma-resv.h>
8#include <linux/vmalloc.h>
9#include <linux/log2.h>
10
11#include <rdma/ib_addr.h>
12#include <rdma/ib_umem.h>
13#include <rdma/ib_user_verbs.h>
14#include <rdma/ib_verbs.h>
15#include <rdma/uverbs_ioctl.h>
16
17#include "efa.h"
18#include "efa_io_defs.h"
19
20enum {
21	EFA_MMAP_DMA_PAGE = 0,
22	EFA_MMAP_IO_WC,
23	EFA_MMAP_IO_NC,
24};
25
26#define EFA_AENQ_ENABLED_GROUPS \
27	(BIT(EFA_ADMIN_FATAL_ERROR) | BIT(EFA_ADMIN_WARNING) | \
28	 BIT(EFA_ADMIN_NOTIFICATION) | BIT(EFA_ADMIN_KEEP_ALIVE))
29
30struct efa_user_mmap_entry {
31	struct rdma_user_mmap_entry rdma_entry;
32	u64 address;
33	u8 mmap_flag;
34};
35
36#define EFA_DEFINE_DEVICE_STATS(op) \
37	op(EFA_SUBMITTED_CMDS, "submitted_cmds") \
38	op(EFA_COMPLETED_CMDS, "completed_cmds") \
39	op(EFA_CMDS_ERR, "cmds_err") \
40	op(EFA_NO_COMPLETION_CMDS, "no_completion_cmds") \
41	op(EFA_KEEP_ALIVE_RCVD, "keep_alive_rcvd") \
42	op(EFA_ALLOC_PD_ERR, "alloc_pd_err") \
43	op(EFA_CREATE_QP_ERR, "create_qp_err") \
44	op(EFA_CREATE_CQ_ERR, "create_cq_err") \
45	op(EFA_REG_MR_ERR, "reg_mr_err") \
46	op(EFA_ALLOC_UCONTEXT_ERR, "alloc_ucontext_err") \
47	op(EFA_CREATE_AH_ERR, "create_ah_err") \
48	op(EFA_MMAP_ERR, "mmap_err")
49
50#define EFA_DEFINE_PORT_STATS(op) \
51	op(EFA_TX_BYTES, "tx_bytes") \
52	op(EFA_TX_PKTS, "tx_pkts") \
53	op(EFA_RX_BYTES, "rx_bytes") \
54	op(EFA_RX_PKTS, "rx_pkts") \
55	op(EFA_RX_DROPS, "rx_drops") \
56	op(EFA_SEND_BYTES, "send_bytes") \
57	op(EFA_SEND_WRS, "send_wrs") \
58	op(EFA_RECV_BYTES, "recv_bytes") \
59	op(EFA_RECV_WRS, "recv_wrs") \
60	op(EFA_RDMA_READ_WRS, "rdma_read_wrs") \
61	op(EFA_RDMA_READ_BYTES, "rdma_read_bytes") \
62	op(EFA_RDMA_READ_WR_ERR, "rdma_read_wr_err") \
63	op(EFA_RDMA_READ_RESP_BYTES, "rdma_read_resp_bytes") \
64	op(EFA_RDMA_WRITE_WRS, "rdma_write_wrs") \
65	op(EFA_RDMA_WRITE_BYTES, "rdma_write_bytes") \
66	op(EFA_RDMA_WRITE_WR_ERR, "rdma_write_wr_err") \
67	op(EFA_RDMA_WRITE_RECV_BYTES, "rdma_write_recv_bytes") \
68
69#define EFA_STATS_ENUM(ename, name) ename,
70#define EFA_STATS_STR(ename, nam) \
71	[ename].name = nam,
72
73enum efa_hw_device_stats {
74	EFA_DEFINE_DEVICE_STATS(EFA_STATS_ENUM)
75};
76
77static const struct rdma_stat_desc efa_device_stats_descs[] = {
78	EFA_DEFINE_DEVICE_STATS(EFA_STATS_STR)
79};
80
81enum efa_hw_port_stats {
82	EFA_DEFINE_PORT_STATS(EFA_STATS_ENUM)
83};
84
85static const struct rdma_stat_desc efa_port_stats_descs[] = {
86	EFA_DEFINE_PORT_STATS(EFA_STATS_STR)
87};
88
89#define EFA_CHUNK_PAYLOAD_SHIFT       12
90#define EFA_CHUNK_PAYLOAD_SIZE        BIT(EFA_CHUNK_PAYLOAD_SHIFT)
91#define EFA_CHUNK_PAYLOAD_PTR_SIZE    8
92
93#define EFA_CHUNK_SHIFT               12
94#define EFA_CHUNK_SIZE                BIT(EFA_CHUNK_SHIFT)
95#define EFA_CHUNK_PTR_SIZE            sizeof(struct efa_com_ctrl_buff_info)
96
97#define EFA_PTRS_PER_CHUNK \
98	((EFA_CHUNK_SIZE - EFA_CHUNK_PTR_SIZE) / EFA_CHUNK_PAYLOAD_PTR_SIZE)
99
100#define EFA_CHUNK_USED_SIZE \
101	((EFA_PTRS_PER_CHUNK * EFA_CHUNK_PAYLOAD_PTR_SIZE) + EFA_CHUNK_PTR_SIZE)
102
103struct pbl_chunk {
104	dma_addr_t dma_addr;
105	u64 *buf;
106	u32 length;
107};
108
109struct pbl_chunk_list {
110	struct pbl_chunk *chunks;
111	unsigned int size;
112};
113
114struct pbl_context {
115	union {
116		struct {
117			dma_addr_t dma_addr;
118		} continuous;
119		struct {
120			u32 pbl_buf_size_in_pages;
121			struct scatterlist *sgl;
122			int sg_dma_cnt;
123			struct pbl_chunk_list chunk_list;
124		} indirect;
125	} phys;
126	u64 *pbl_buf;
127	u32 pbl_buf_size_in_bytes;
128	u8 physically_continuous;
129};
130
131static inline struct efa_dev *to_edev(struct ib_device *ibdev)
132{
133	return container_of(ibdev, struct efa_dev, ibdev);
134}
135
136static inline struct efa_ucontext *to_eucontext(struct ib_ucontext *ibucontext)
137{
138	return container_of(ibucontext, struct efa_ucontext, ibucontext);
139}
140
141static inline struct efa_pd *to_epd(struct ib_pd *ibpd)
142{
143	return container_of(ibpd, struct efa_pd, ibpd);
144}
145
146static inline struct efa_mr *to_emr(struct ib_mr *ibmr)
147{
148	return container_of(ibmr, struct efa_mr, ibmr);
149}
150
151static inline struct efa_qp *to_eqp(struct ib_qp *ibqp)
152{
153	return container_of(ibqp, struct efa_qp, ibqp);
154}
155
156static inline struct efa_cq *to_ecq(struct ib_cq *ibcq)
157{
158	return container_of(ibcq, struct efa_cq, ibcq);
159}
160
161static inline struct efa_ah *to_eah(struct ib_ah *ibah)
162{
163	return container_of(ibah, struct efa_ah, ibah);
164}
165
166static inline struct efa_user_mmap_entry *
167to_emmap(struct rdma_user_mmap_entry *rdma_entry)
168{
169	return container_of(rdma_entry, struct efa_user_mmap_entry, rdma_entry);
170}
171
172#define EFA_DEV_CAP(dev, cap) \
173	((dev)->dev_attr.device_caps & \
174	 EFA_ADMIN_FEATURE_DEVICE_ATTR_DESC_##cap##_MASK)
175
176#define is_reserved_cleared(reserved) \
177	!memchr_inv(reserved, 0, sizeof(reserved))
178
179static void *efa_zalloc_mapped(struct efa_dev *dev, dma_addr_t *dma_addr,
180			       size_t size, enum dma_data_direction dir)
181{
182	void *addr;
183
184	addr = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
185	if (!addr)
186		return NULL;
187
188	*dma_addr = dma_map_single(&dev->pdev->dev, addr, size, dir);
189	if (dma_mapping_error(&dev->pdev->dev, *dma_addr)) {
190		ibdev_err(&dev->ibdev, "Failed to map DMA address\n");
191		free_pages_exact(addr, size);
192		return NULL;
193	}
194
195	return addr;
196}
197
198static void efa_free_mapped(struct efa_dev *dev, void *cpu_addr,
199			    dma_addr_t dma_addr,
200			    size_t size, enum dma_data_direction dir)
201{
202	dma_unmap_single(&dev->pdev->dev, dma_addr, size, dir);
203	free_pages_exact(cpu_addr, size);
204}
205
206int efa_query_device(struct ib_device *ibdev,
207		     struct ib_device_attr *props,
208		     struct ib_udata *udata)
209{
210	struct efa_com_get_device_attr_result *dev_attr;
211	struct efa_ibv_ex_query_device_resp resp = {};
212	struct efa_dev *dev = to_edev(ibdev);
213	int err;
214
215	if (udata && udata->inlen &&
216	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
217		ibdev_dbg(ibdev,
218			  "Incompatible ABI params, udata not cleared\n");
219		return -EINVAL;
220	}
221
222	dev_attr = &dev->dev_attr;
223
224	memset(props, 0, sizeof(*props));
225	props->max_mr_size = dev_attr->max_mr_pages * PAGE_SIZE;
226	props->page_size_cap = dev_attr->page_size_cap;
227	props->vendor_id = dev->pdev->vendor;
228	props->vendor_part_id = dev->pdev->device;
229	props->hw_ver = dev->pdev->subsystem_device;
230	props->max_qp = dev_attr->max_qp;
231	props->max_cq = dev_attr->max_cq;
232	props->max_pd = dev_attr->max_pd;
233	props->max_mr = dev_attr->max_mr;
234	props->max_ah = dev_attr->max_ah;
235	props->max_cqe = dev_attr->max_cq_depth;
236	props->max_qp_wr = min_t(u32, dev_attr->max_sq_depth,
237				 dev_attr->max_rq_depth);
238	props->max_send_sge = dev_attr->max_sq_sge;
239	props->max_recv_sge = dev_attr->max_rq_sge;
240	props->max_sge_rd = dev_attr->max_wr_rdma_sge;
241	props->max_pkeys = 1;
242
243	if (udata && udata->outlen) {
244		resp.max_sq_sge = dev_attr->max_sq_sge;
245		resp.max_rq_sge = dev_attr->max_rq_sge;
246		resp.max_sq_wr = dev_attr->max_sq_depth;
247		resp.max_rq_wr = dev_attr->max_rq_depth;
248		resp.max_rdma_size = dev_attr->max_rdma_size;
249
250		resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_WITH_SGID;
251		if (EFA_DEV_CAP(dev, RDMA_READ))
252			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_READ;
253
254		if (EFA_DEV_CAP(dev, RNR_RETRY))
255			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RNR_RETRY;
256
257		if (EFA_DEV_CAP(dev, DATA_POLLING_128))
258			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_DATA_POLLING_128;
259
260		if (EFA_DEV_CAP(dev, RDMA_WRITE))
261			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_RDMA_WRITE;
262
263		if (dev->neqs)
264			resp.device_caps |= EFA_QUERY_DEVICE_CAPS_CQ_NOTIFICATIONS;
265
266		err = ib_copy_to_udata(udata, &resp,
267				       min(sizeof(resp), udata->outlen));
268		if (err) {
269			ibdev_dbg(ibdev,
270				  "Failed to copy udata for query_device\n");
271			return err;
272		}
273	}
274
275	return 0;
276}
277
278int efa_query_port(struct ib_device *ibdev, u32 port,
279		   struct ib_port_attr *props)
280{
281	struct efa_dev *dev = to_edev(ibdev);
282
283	props->lmc = 1;
284
285	props->state = IB_PORT_ACTIVE;
286	props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
287	props->gid_tbl_len = 1;
288	props->pkey_tbl_len = 1;
289	props->active_speed = IB_SPEED_EDR;
290	props->active_width = IB_WIDTH_4X;
291	props->max_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
292	props->active_mtu = ib_mtu_int_to_enum(dev->dev_attr.mtu);
293	props->max_msg_sz = dev->dev_attr.mtu;
294	props->max_vl_num = 1;
295
296	return 0;
297}
298
299int efa_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
300		 int qp_attr_mask,
301		 struct ib_qp_init_attr *qp_init_attr)
302{
303	struct efa_dev *dev = to_edev(ibqp->device);
304	struct efa_com_query_qp_params params = {};
305	struct efa_com_query_qp_result result;
306	struct efa_qp *qp = to_eqp(ibqp);
307	int err;
308
309#define EFA_QUERY_QP_SUPP_MASK \
310	(IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT | \
311	 IB_QP_QKEY | IB_QP_SQ_PSN | IB_QP_CAP | IB_QP_RNR_RETRY)
312
313	if (qp_attr_mask & ~EFA_QUERY_QP_SUPP_MASK) {
314		ibdev_dbg(&dev->ibdev,
315			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
316			  qp_attr_mask, EFA_QUERY_QP_SUPP_MASK);
317		return -EOPNOTSUPP;
318	}
319
320	memset(qp_attr, 0, sizeof(*qp_attr));
321	memset(qp_init_attr, 0, sizeof(*qp_init_attr));
322
323	params.qp_handle = qp->qp_handle;
324	err = efa_com_query_qp(&dev->edev, &params, &result);
325	if (err)
326		return err;
327
328	qp_attr->qp_state = result.qp_state;
329	qp_attr->qkey = result.qkey;
330	qp_attr->sq_psn = result.sq_psn;
331	qp_attr->sq_draining = result.sq_draining;
332	qp_attr->port_num = 1;
333	qp_attr->rnr_retry = result.rnr_retry;
334
335	qp_attr->cap.max_send_wr = qp->max_send_wr;
336	qp_attr->cap.max_recv_wr = qp->max_recv_wr;
337	qp_attr->cap.max_send_sge = qp->max_send_sge;
338	qp_attr->cap.max_recv_sge = qp->max_recv_sge;
339	qp_attr->cap.max_inline_data = qp->max_inline_data;
340
341	qp_init_attr->qp_type = ibqp->qp_type;
342	qp_init_attr->recv_cq = ibqp->recv_cq;
343	qp_init_attr->send_cq = ibqp->send_cq;
344	qp_init_attr->qp_context = ibqp->qp_context;
345	qp_init_attr->cap = qp_attr->cap;
346
347	return 0;
348}
349
350int efa_query_gid(struct ib_device *ibdev, u32 port, int index,
351		  union ib_gid *gid)
352{
353	struct efa_dev *dev = to_edev(ibdev);
354
355	memcpy(gid->raw, dev->dev_attr.addr, sizeof(dev->dev_attr.addr));
356
357	return 0;
358}
359
360int efa_query_pkey(struct ib_device *ibdev, u32 port, u16 index,
361		   u16 *pkey)
362{
363	if (index > 0)
364		return -EINVAL;
365
366	*pkey = 0xffff;
367	return 0;
368}
369
370static int efa_pd_dealloc(struct efa_dev *dev, u16 pdn)
371{
372	struct efa_com_dealloc_pd_params params = {
373		.pdn = pdn,
374	};
375
376	return efa_com_dealloc_pd(&dev->edev, &params);
377}
378
379int efa_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
380{
381	struct efa_dev *dev = to_edev(ibpd->device);
382	struct efa_ibv_alloc_pd_resp resp = {};
383	struct efa_com_alloc_pd_result result;
384	struct efa_pd *pd = to_epd(ibpd);
385	int err;
386
387	if (udata->inlen &&
388	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
389		ibdev_dbg(&dev->ibdev,
390			  "Incompatible ABI params, udata not cleared\n");
391		err = -EINVAL;
392		goto err_out;
393	}
394
395	err = efa_com_alloc_pd(&dev->edev, &result);
396	if (err)
397		goto err_out;
398
399	pd->pdn = result.pdn;
400	resp.pdn = result.pdn;
401
402	if (udata->outlen) {
403		err = ib_copy_to_udata(udata, &resp,
404				       min(sizeof(resp), udata->outlen));
405		if (err) {
406			ibdev_dbg(&dev->ibdev,
407				  "Failed to copy udata for alloc_pd\n");
408			goto err_dealloc_pd;
409		}
410	}
411
412	ibdev_dbg(&dev->ibdev, "Allocated pd[%d]\n", pd->pdn);
413
414	return 0;
415
416err_dealloc_pd:
417	efa_pd_dealloc(dev, result.pdn);
418err_out:
419	atomic64_inc(&dev->stats.alloc_pd_err);
420	return err;
421}
422
423int efa_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
424{
425	struct efa_dev *dev = to_edev(ibpd->device);
426	struct efa_pd *pd = to_epd(ibpd);
427
428	ibdev_dbg(&dev->ibdev, "Dealloc pd[%d]\n", pd->pdn);
429	efa_pd_dealloc(dev, pd->pdn);
430	return 0;
431}
432
433static int efa_destroy_qp_handle(struct efa_dev *dev, u32 qp_handle)
434{
435	struct efa_com_destroy_qp_params params = { .qp_handle = qp_handle };
436
437	return efa_com_destroy_qp(&dev->edev, &params);
438}
439
440static void efa_qp_user_mmap_entries_remove(struct efa_qp *qp)
441{
442	rdma_user_mmap_entry_remove(qp->rq_mmap_entry);
443	rdma_user_mmap_entry_remove(qp->rq_db_mmap_entry);
444	rdma_user_mmap_entry_remove(qp->llq_desc_mmap_entry);
445	rdma_user_mmap_entry_remove(qp->sq_db_mmap_entry);
446}
447
448int efa_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
449{
450	struct efa_dev *dev = to_edev(ibqp->pd->device);
451	struct efa_qp *qp = to_eqp(ibqp);
452	int err;
453
454	ibdev_dbg(&dev->ibdev, "Destroy qp[%u]\n", ibqp->qp_num);
455
456	err = efa_destroy_qp_handle(dev, qp->qp_handle);
457	if (err)
458		return err;
459
460	efa_qp_user_mmap_entries_remove(qp);
461
462	if (qp->rq_cpu_addr) {
463		ibdev_dbg(&dev->ibdev,
464			  "qp->cpu_addr[0x%p] freed: size[%lu], dma[%pad]\n",
465			  qp->rq_cpu_addr, qp->rq_size,
466			  &qp->rq_dma_addr);
467		efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
468				qp->rq_size, DMA_TO_DEVICE);
469	}
470
471	return 0;
472}
473
474static struct rdma_user_mmap_entry*
475efa_user_mmap_entry_insert(struct ib_ucontext *ucontext,
476			   u64 address, size_t length,
477			   u8 mmap_flag, u64 *offset)
478{
479	struct efa_user_mmap_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
480	int err;
481
482	if (!entry)
483		return NULL;
484
485	entry->address = address;
486	entry->mmap_flag = mmap_flag;
487
488	err = rdma_user_mmap_entry_insert(ucontext, &entry->rdma_entry,
489					  length);
490	if (err) {
491		kfree(entry);
492		return NULL;
493	}
494	*offset = rdma_user_mmap_get_offset(&entry->rdma_entry);
495
496	return &entry->rdma_entry;
497}
498
499static int qp_mmap_entries_setup(struct efa_qp *qp,
500				 struct efa_dev *dev,
501				 struct efa_ucontext *ucontext,
502				 struct efa_com_create_qp_params *params,
503				 struct efa_ibv_create_qp_resp *resp)
504{
505	size_t length;
506	u64 address;
507
508	address = dev->db_bar_addr + resp->sq_db_offset;
509	qp->sq_db_mmap_entry =
510		efa_user_mmap_entry_insert(&ucontext->ibucontext,
511					   address,
512					   PAGE_SIZE, EFA_MMAP_IO_NC,
513					   &resp->sq_db_mmap_key);
514	if (!qp->sq_db_mmap_entry)
515		return -ENOMEM;
516
517	resp->sq_db_offset &= ~PAGE_MASK;
518
519	address = dev->mem_bar_addr + resp->llq_desc_offset;
520	length = PAGE_ALIGN(params->sq_ring_size_in_bytes +
521			    (resp->llq_desc_offset & ~PAGE_MASK));
522
523	qp->llq_desc_mmap_entry =
524		efa_user_mmap_entry_insert(&ucontext->ibucontext,
525					   address, length,
526					   EFA_MMAP_IO_WC,
527					   &resp->llq_desc_mmap_key);
528	if (!qp->llq_desc_mmap_entry)
529		goto err_remove_mmap;
530
531	resp->llq_desc_offset &= ~PAGE_MASK;
532
533	if (qp->rq_size) {
534		address = dev->db_bar_addr + resp->rq_db_offset;
535
536		qp->rq_db_mmap_entry =
537			efa_user_mmap_entry_insert(&ucontext->ibucontext,
538						   address, PAGE_SIZE,
539						   EFA_MMAP_IO_NC,
540						   &resp->rq_db_mmap_key);
541		if (!qp->rq_db_mmap_entry)
542			goto err_remove_mmap;
543
544		resp->rq_db_offset &= ~PAGE_MASK;
545
546		address = virt_to_phys(qp->rq_cpu_addr);
547		qp->rq_mmap_entry =
548			efa_user_mmap_entry_insert(&ucontext->ibucontext,
549						   address, qp->rq_size,
550						   EFA_MMAP_DMA_PAGE,
551						   &resp->rq_mmap_key);
552		if (!qp->rq_mmap_entry)
553			goto err_remove_mmap;
554
555		resp->rq_mmap_size = qp->rq_size;
556	}
557
558	return 0;
559
560err_remove_mmap:
561	efa_qp_user_mmap_entries_remove(qp);
562
563	return -ENOMEM;
564}
565
566static int efa_qp_validate_cap(struct efa_dev *dev,
567			       struct ib_qp_init_attr *init_attr)
568{
569	if (init_attr->cap.max_send_wr > dev->dev_attr.max_sq_depth) {
570		ibdev_dbg(&dev->ibdev,
571			  "qp: requested send wr[%u] exceeds the max[%u]\n",
572			  init_attr->cap.max_send_wr,
573			  dev->dev_attr.max_sq_depth);
574		return -EINVAL;
575	}
576	if (init_attr->cap.max_recv_wr > dev->dev_attr.max_rq_depth) {
577		ibdev_dbg(&dev->ibdev,
578			  "qp: requested receive wr[%u] exceeds the max[%u]\n",
579			  init_attr->cap.max_recv_wr,
580			  dev->dev_attr.max_rq_depth);
581		return -EINVAL;
582	}
583	if (init_attr->cap.max_send_sge > dev->dev_attr.max_sq_sge) {
584		ibdev_dbg(&dev->ibdev,
585			  "qp: requested sge send[%u] exceeds the max[%u]\n",
586			  init_attr->cap.max_send_sge, dev->dev_attr.max_sq_sge);
587		return -EINVAL;
588	}
589	if (init_attr->cap.max_recv_sge > dev->dev_attr.max_rq_sge) {
590		ibdev_dbg(&dev->ibdev,
591			  "qp: requested sge recv[%u] exceeds the max[%u]\n",
592			  init_attr->cap.max_recv_sge, dev->dev_attr.max_rq_sge);
593		return -EINVAL;
594	}
595	if (init_attr->cap.max_inline_data > dev->dev_attr.inline_buf_size) {
596		ibdev_dbg(&dev->ibdev,
597			  "qp: requested inline data[%u] exceeds the max[%u]\n",
598			  init_attr->cap.max_inline_data,
599			  dev->dev_attr.inline_buf_size);
600		return -EINVAL;
601	}
602
603	return 0;
604}
605
606static int efa_qp_validate_attr(struct efa_dev *dev,
607				struct ib_qp_init_attr *init_attr)
608{
609	if (init_attr->qp_type != IB_QPT_DRIVER &&
610	    init_attr->qp_type != IB_QPT_UD) {
611		ibdev_dbg(&dev->ibdev,
612			  "Unsupported qp type %d\n", init_attr->qp_type);
613		return -EOPNOTSUPP;
614	}
615
616	if (init_attr->srq) {
617		ibdev_dbg(&dev->ibdev, "SRQ is not supported\n");
618		return -EOPNOTSUPP;
619	}
620
621	if (init_attr->create_flags) {
622		ibdev_dbg(&dev->ibdev, "Unsupported create flags\n");
623		return -EOPNOTSUPP;
624	}
625
626	return 0;
627}
628
629int efa_create_qp(struct ib_qp *ibqp, struct ib_qp_init_attr *init_attr,
630		  struct ib_udata *udata)
631{
632	struct efa_com_create_qp_params create_qp_params = {};
633	struct efa_com_create_qp_result create_qp_resp;
634	struct efa_dev *dev = to_edev(ibqp->device);
635	struct efa_ibv_create_qp_resp resp = {};
636	struct efa_ibv_create_qp cmd = {};
637	struct efa_qp *qp = to_eqp(ibqp);
638	struct efa_ucontext *ucontext;
639	int err;
640
641	ucontext = rdma_udata_to_drv_context(udata, struct efa_ucontext,
642					     ibucontext);
643
644	err = efa_qp_validate_cap(dev, init_attr);
645	if (err)
646		goto err_out;
647
648	err = efa_qp_validate_attr(dev, init_attr);
649	if (err)
650		goto err_out;
651
652	if (offsetofend(typeof(cmd), driver_qp_type) > udata->inlen) {
653		ibdev_dbg(&dev->ibdev,
654			  "Incompatible ABI params, no input udata\n");
655		err = -EINVAL;
656		goto err_out;
657	}
658
659	if (udata->inlen > sizeof(cmd) &&
660	    !ib_is_udata_cleared(udata, sizeof(cmd),
661				 udata->inlen - sizeof(cmd))) {
662		ibdev_dbg(&dev->ibdev,
663			  "Incompatible ABI params, unknown fields in udata\n");
664		err = -EINVAL;
665		goto err_out;
666	}
667
668	err = ib_copy_from_udata(&cmd, udata,
669				 min(sizeof(cmd), udata->inlen));
670	if (err) {
671		ibdev_dbg(&dev->ibdev,
672			  "Cannot copy udata for create_qp\n");
673		goto err_out;
674	}
675
676	if (cmd.comp_mask) {
677		ibdev_dbg(&dev->ibdev,
678			  "Incompatible ABI params, unknown fields in udata\n");
679		err = -EINVAL;
680		goto err_out;
681	}
682
683	create_qp_params.uarn = ucontext->uarn;
684	create_qp_params.pd = to_epd(ibqp->pd)->pdn;
685
686	if (init_attr->qp_type == IB_QPT_UD) {
687		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_UD;
688	} else if (cmd.driver_qp_type == EFA_QP_DRIVER_TYPE_SRD) {
689		create_qp_params.qp_type = EFA_ADMIN_QP_TYPE_SRD;
690	} else {
691		ibdev_dbg(&dev->ibdev,
692			  "Unsupported qp type %d driver qp type %d\n",
693			  init_attr->qp_type, cmd.driver_qp_type);
694		err = -EOPNOTSUPP;
695		goto err_out;
696	}
697
698	ibdev_dbg(&dev->ibdev, "Create QP: qp type %d driver qp type %#x\n",
699		  init_attr->qp_type, cmd.driver_qp_type);
700	create_qp_params.send_cq_idx = to_ecq(init_attr->send_cq)->cq_idx;
701	create_qp_params.recv_cq_idx = to_ecq(init_attr->recv_cq)->cq_idx;
702	create_qp_params.sq_depth = init_attr->cap.max_send_wr;
703	create_qp_params.sq_ring_size_in_bytes = cmd.sq_ring_size;
704
705	create_qp_params.rq_depth = init_attr->cap.max_recv_wr;
706	create_qp_params.rq_ring_size_in_bytes = cmd.rq_ring_size;
707	qp->rq_size = PAGE_ALIGN(create_qp_params.rq_ring_size_in_bytes);
708	if (qp->rq_size) {
709		qp->rq_cpu_addr = efa_zalloc_mapped(dev, &qp->rq_dma_addr,
710						    qp->rq_size, DMA_TO_DEVICE);
711		if (!qp->rq_cpu_addr) {
712			err = -ENOMEM;
713			goto err_out;
714		}
715
716		ibdev_dbg(&dev->ibdev,
717			  "qp->cpu_addr[0x%p] allocated: size[%lu], dma[%pad]\n",
718			  qp->rq_cpu_addr, qp->rq_size, &qp->rq_dma_addr);
719		create_qp_params.rq_base_addr = qp->rq_dma_addr;
720	}
721
722	err = efa_com_create_qp(&dev->edev, &create_qp_params,
723				&create_qp_resp);
724	if (err)
725		goto err_free_mapped;
726
727	resp.sq_db_offset = create_qp_resp.sq_db_offset;
728	resp.rq_db_offset = create_qp_resp.rq_db_offset;
729	resp.llq_desc_offset = create_qp_resp.llq_descriptors_offset;
730	resp.send_sub_cq_idx = create_qp_resp.send_sub_cq_idx;
731	resp.recv_sub_cq_idx = create_qp_resp.recv_sub_cq_idx;
732
733	err = qp_mmap_entries_setup(qp, dev, ucontext, &create_qp_params,
734				    &resp);
735	if (err)
736		goto err_destroy_qp;
737
738	qp->qp_handle = create_qp_resp.qp_handle;
739	qp->ibqp.qp_num = create_qp_resp.qp_num;
740	qp->max_send_wr = init_attr->cap.max_send_wr;
741	qp->max_recv_wr = init_attr->cap.max_recv_wr;
742	qp->max_send_sge = init_attr->cap.max_send_sge;
743	qp->max_recv_sge = init_attr->cap.max_recv_sge;
744	qp->max_inline_data = init_attr->cap.max_inline_data;
745
746	if (udata->outlen) {
747		err = ib_copy_to_udata(udata, &resp,
748				       min(sizeof(resp), udata->outlen));
749		if (err) {
750			ibdev_dbg(&dev->ibdev,
751				  "Failed to copy udata for qp[%u]\n",
752				  create_qp_resp.qp_num);
753			goto err_remove_mmap_entries;
754		}
755	}
756
757	ibdev_dbg(&dev->ibdev, "Created qp[%d]\n", qp->ibqp.qp_num);
758
759	return 0;
760
761err_remove_mmap_entries:
762	efa_qp_user_mmap_entries_remove(qp);
763err_destroy_qp:
764	efa_destroy_qp_handle(dev, create_qp_resp.qp_handle);
765err_free_mapped:
766	if (qp->rq_size)
767		efa_free_mapped(dev, qp->rq_cpu_addr, qp->rq_dma_addr,
768				qp->rq_size, DMA_TO_DEVICE);
769err_out:
770	atomic64_inc(&dev->stats.create_qp_err);
771	return err;
772}
773
774static const struct {
775	int			valid;
776	enum ib_qp_attr_mask	req_param;
777	enum ib_qp_attr_mask	opt_param;
778} srd_qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
779	[IB_QPS_RESET] = {
780		[IB_QPS_RESET] = { .valid = 1 },
781		[IB_QPS_INIT]  = {
782			.valid = 1,
783			.req_param = IB_QP_PKEY_INDEX |
784				     IB_QP_PORT |
785				     IB_QP_QKEY,
786		},
787	},
788	[IB_QPS_INIT] = {
789		[IB_QPS_RESET] = { .valid = 1 },
790		[IB_QPS_ERR]   = { .valid = 1 },
791		[IB_QPS_INIT]  = {
792			.valid = 1,
793			.opt_param = IB_QP_PKEY_INDEX |
794				     IB_QP_PORT |
795				     IB_QP_QKEY,
796		},
797		[IB_QPS_RTR]   = {
798			.valid = 1,
799			.opt_param = IB_QP_PKEY_INDEX |
800				     IB_QP_QKEY,
801		},
802	},
803	[IB_QPS_RTR] = {
804		[IB_QPS_RESET] = { .valid = 1 },
805		[IB_QPS_ERR]   = { .valid = 1 },
806		[IB_QPS_RTS]   = {
807			.valid = 1,
808			.req_param = IB_QP_SQ_PSN,
809			.opt_param = IB_QP_CUR_STATE |
810				     IB_QP_QKEY |
811				     IB_QP_RNR_RETRY,
812
813		}
814	},
815	[IB_QPS_RTS] = {
816		[IB_QPS_RESET] = { .valid = 1 },
817		[IB_QPS_ERR]   = { .valid = 1 },
818		[IB_QPS_RTS]   = {
819			.valid = 1,
820			.opt_param = IB_QP_CUR_STATE |
821				     IB_QP_QKEY,
822		},
823		[IB_QPS_SQD] = {
824			.valid = 1,
825			.opt_param = IB_QP_EN_SQD_ASYNC_NOTIFY,
826		},
827	},
828	[IB_QPS_SQD] = {
829		[IB_QPS_RESET] = { .valid = 1 },
830		[IB_QPS_ERR]   = { .valid = 1 },
831		[IB_QPS_RTS]   = {
832			.valid = 1,
833			.opt_param = IB_QP_CUR_STATE |
834				     IB_QP_QKEY,
835		},
836		[IB_QPS_SQD] = {
837			.valid = 1,
838			.opt_param = IB_QP_PKEY_INDEX |
839				     IB_QP_QKEY,
840		}
841	},
842	[IB_QPS_SQE] = {
843		[IB_QPS_RESET] = { .valid = 1 },
844		[IB_QPS_ERR]   = { .valid = 1 },
845		[IB_QPS_RTS]   = {
846			.valid = 1,
847			.opt_param = IB_QP_CUR_STATE |
848				     IB_QP_QKEY,
849		}
850	},
851	[IB_QPS_ERR] = {
852		[IB_QPS_RESET] = { .valid = 1 },
853		[IB_QPS_ERR]   = { .valid = 1 },
854	}
855};
856
857static bool efa_modify_srd_qp_is_ok(enum ib_qp_state cur_state,
858				    enum ib_qp_state next_state,
859				    enum ib_qp_attr_mask mask)
860{
861	enum ib_qp_attr_mask req_param, opt_param;
862
863	if (mask & IB_QP_CUR_STATE  &&
864	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
865	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
866		return false;
867
868	if (!srd_qp_state_table[cur_state][next_state].valid)
869		return false;
870
871	req_param = srd_qp_state_table[cur_state][next_state].req_param;
872	opt_param = srd_qp_state_table[cur_state][next_state].opt_param;
873
874	if ((mask & req_param) != req_param)
875		return false;
876
877	if (mask & ~(req_param | opt_param | IB_QP_STATE))
878		return false;
879
880	return true;
881}
882
883static int efa_modify_qp_validate(struct efa_dev *dev, struct efa_qp *qp,
884				  struct ib_qp_attr *qp_attr, int qp_attr_mask,
885				  enum ib_qp_state cur_state,
886				  enum ib_qp_state new_state)
887{
888	int err;
889
890#define EFA_MODIFY_QP_SUPP_MASK \
891	(IB_QP_STATE | IB_QP_CUR_STATE | IB_QP_EN_SQD_ASYNC_NOTIFY | \
892	 IB_QP_PKEY_INDEX | IB_QP_PORT | IB_QP_QKEY | IB_QP_SQ_PSN | \
893	 IB_QP_RNR_RETRY)
894
895	if (qp_attr_mask & ~EFA_MODIFY_QP_SUPP_MASK) {
896		ibdev_dbg(&dev->ibdev,
897			  "Unsupported qp_attr_mask[%#x] supported[%#x]\n",
898			  qp_attr_mask, EFA_MODIFY_QP_SUPP_MASK);
899		return -EOPNOTSUPP;
900	}
901
902	if (qp->ibqp.qp_type == IB_QPT_DRIVER)
903		err = !efa_modify_srd_qp_is_ok(cur_state, new_state,
904					       qp_attr_mask);
905	else
906		err = !ib_modify_qp_is_ok(cur_state, new_state, IB_QPT_UD,
907					  qp_attr_mask);
908
909	if (err) {
910		ibdev_dbg(&dev->ibdev, "Invalid modify QP parameters\n");
911		return -EINVAL;
912	}
913
914	if ((qp_attr_mask & IB_QP_PORT) && qp_attr->port_num != 1) {
915		ibdev_dbg(&dev->ibdev, "Can't change port num\n");
916		return -EOPNOTSUPP;
917	}
918
919	if ((qp_attr_mask & IB_QP_PKEY_INDEX) && qp_attr->pkey_index) {
920		ibdev_dbg(&dev->ibdev, "Can't change pkey index\n");
921		return -EOPNOTSUPP;
922	}
923
924	return 0;
925}
926
927int efa_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr,
928		  int qp_attr_mask, struct ib_udata *udata)
929{
930	struct efa_dev *dev = to_edev(ibqp->device);
931	struct efa_com_modify_qp_params params = {};
932	struct efa_qp *qp = to_eqp(ibqp);
933	enum ib_qp_state cur_state;
934	enum ib_qp_state new_state;
935	int err;
936
937	if (qp_attr_mask & ~IB_QP_ATTR_STANDARD_BITS)
938		return -EOPNOTSUPP;
939
940	if (udata->inlen &&
941	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
942		ibdev_dbg(&dev->ibdev,
943			  "Incompatible ABI params, udata not cleared\n");
944		return -EINVAL;
945	}
946
947	cur_state = qp_attr_mask & IB_QP_CUR_STATE ? qp_attr->cur_qp_state :
948						     qp->state;
949	new_state = qp_attr_mask & IB_QP_STATE ? qp_attr->qp_state : cur_state;
950
951	err = efa_modify_qp_validate(dev, qp, qp_attr, qp_attr_mask, cur_state,
952				     new_state);
953	if (err)
954		return err;
955
956	params.qp_handle = qp->qp_handle;
957
958	if (qp_attr_mask & IB_QP_STATE) {
959		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QP_STATE,
960			1);
961		EFA_SET(&params.modify_mask,
962			EFA_ADMIN_MODIFY_QP_CMD_CUR_QP_STATE, 1);
963		params.cur_qp_state = cur_state;
964		params.qp_state = new_state;
965	}
966
967	if (qp_attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY) {
968		EFA_SET(&params.modify_mask,
969			EFA_ADMIN_MODIFY_QP_CMD_SQ_DRAINED_ASYNC_NOTIFY, 1);
970		params.sq_drained_async_notify = qp_attr->en_sqd_async_notify;
971	}
972
973	if (qp_attr_mask & IB_QP_QKEY) {
974		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_QKEY, 1);
975		params.qkey = qp_attr->qkey;
976	}
977
978	if (qp_attr_mask & IB_QP_SQ_PSN) {
979		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_SQ_PSN, 1);
980		params.sq_psn = qp_attr->sq_psn;
981	}
982
983	if (qp_attr_mask & IB_QP_RNR_RETRY) {
984		EFA_SET(&params.modify_mask, EFA_ADMIN_MODIFY_QP_CMD_RNR_RETRY,
985			1);
986		params.rnr_retry = qp_attr->rnr_retry;
987	}
988
989	err = efa_com_modify_qp(&dev->edev, &params);
990	if (err)
991		return err;
992
993	qp->state = new_state;
994
995	return 0;
996}
997
998static int efa_destroy_cq_idx(struct efa_dev *dev, int cq_idx)
999{
1000	struct efa_com_destroy_cq_params params = { .cq_idx = cq_idx };
1001
1002	return efa_com_destroy_cq(&dev->edev, &params);
1003}
1004
1005static void efa_cq_user_mmap_entries_remove(struct efa_cq *cq)
1006{
1007	rdma_user_mmap_entry_remove(cq->db_mmap_entry);
1008	rdma_user_mmap_entry_remove(cq->mmap_entry);
1009}
1010
1011int efa_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1012{
1013	struct efa_dev *dev = to_edev(ibcq->device);
1014	struct efa_cq *cq = to_ecq(ibcq);
1015
1016	ibdev_dbg(&dev->ibdev,
1017		  "Destroy cq[%d] virt[0x%p] freed: size[%lu], dma[%pad]\n",
1018		  cq->cq_idx, cq->cpu_addr, cq->size, &cq->dma_addr);
1019
1020	efa_destroy_cq_idx(dev, cq->cq_idx);
1021	efa_cq_user_mmap_entries_remove(cq);
1022	if (cq->eq) {
1023		xa_erase(&dev->cqs_xa, cq->cq_idx);
1024		synchronize_irq(cq->eq->irq.irqn);
1025	}
1026	efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
1027			DMA_FROM_DEVICE);
1028	return 0;
1029}
1030
1031static struct efa_eq *efa_vec2eq(struct efa_dev *dev, int vec)
1032{
1033	return &dev->eqs[vec];
1034}
1035
1036static int cq_mmap_entries_setup(struct efa_dev *dev, struct efa_cq *cq,
1037				 struct efa_ibv_create_cq_resp *resp,
1038				 bool db_valid)
1039{
1040	resp->q_mmap_size = cq->size;
1041	cq->mmap_entry = efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
1042						    virt_to_phys(cq->cpu_addr),
1043						    cq->size, EFA_MMAP_DMA_PAGE,
1044						    &resp->q_mmap_key);
1045	if (!cq->mmap_entry)
1046		return -ENOMEM;
1047
1048	if (db_valid) {
1049		cq->db_mmap_entry =
1050			efa_user_mmap_entry_insert(&cq->ucontext->ibucontext,
1051						   dev->db_bar_addr + resp->db_off,
1052						   PAGE_SIZE, EFA_MMAP_IO_NC,
1053						   &resp->db_mmap_key);
1054		if (!cq->db_mmap_entry) {
1055			rdma_user_mmap_entry_remove(cq->mmap_entry);
1056			return -ENOMEM;
1057		}
1058
1059		resp->db_off &= ~PAGE_MASK;
1060		resp->comp_mask |= EFA_CREATE_CQ_RESP_DB_OFF;
1061	}
1062
1063	return 0;
1064}
1065
1066int efa_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
1067		  struct ib_udata *udata)
1068{
1069	struct efa_ucontext *ucontext = rdma_udata_to_drv_context(
1070		udata, struct efa_ucontext, ibucontext);
1071	struct efa_com_create_cq_params params = {};
1072	struct efa_ibv_create_cq_resp resp = {};
1073	struct efa_com_create_cq_result result;
1074	struct ib_device *ibdev = ibcq->device;
1075	struct efa_dev *dev = to_edev(ibdev);
1076	struct efa_ibv_create_cq cmd = {};
1077	struct efa_cq *cq = to_ecq(ibcq);
1078	int entries = attr->cqe;
1079	bool set_src_addr;
1080	int err;
1081
1082	ibdev_dbg(ibdev, "create_cq entries %d\n", entries);
1083
1084	if (attr->flags)
1085		return -EOPNOTSUPP;
1086
1087	if (entries < 1 || entries > dev->dev_attr.max_cq_depth) {
1088		ibdev_dbg(ibdev,
1089			  "cq: requested entries[%u] non-positive or greater than max[%u]\n",
1090			  entries, dev->dev_attr.max_cq_depth);
1091		err = -EINVAL;
1092		goto err_out;
1093	}
1094
1095	if (offsetofend(typeof(cmd), num_sub_cqs) > udata->inlen) {
1096		ibdev_dbg(ibdev,
1097			  "Incompatible ABI params, no input udata\n");
1098		err = -EINVAL;
1099		goto err_out;
1100	}
1101
1102	if (udata->inlen > sizeof(cmd) &&
1103	    !ib_is_udata_cleared(udata, sizeof(cmd),
1104				 udata->inlen - sizeof(cmd))) {
1105		ibdev_dbg(ibdev,
1106			  "Incompatible ABI params, unknown fields in udata\n");
1107		err = -EINVAL;
1108		goto err_out;
1109	}
1110
1111	err = ib_copy_from_udata(&cmd, udata,
1112				 min(sizeof(cmd), udata->inlen));
1113	if (err) {
1114		ibdev_dbg(ibdev, "Cannot copy udata for create_cq\n");
1115		goto err_out;
1116	}
1117
1118	if (cmd.comp_mask || !is_reserved_cleared(cmd.reserved_58)) {
1119		ibdev_dbg(ibdev,
1120			  "Incompatible ABI params, unknown fields in udata\n");
1121		err = -EINVAL;
1122		goto err_out;
1123	}
1124
1125	set_src_addr = !!(cmd.flags & EFA_CREATE_CQ_WITH_SGID);
1126	if ((cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc_ex)) &&
1127	    (set_src_addr ||
1128	     cmd.cq_entry_size != sizeof(struct efa_io_rx_cdesc))) {
1129		ibdev_dbg(ibdev,
1130			  "Invalid entry size [%u]\n", cmd.cq_entry_size);
1131		err = -EINVAL;
1132		goto err_out;
1133	}
1134
1135	if (cmd.num_sub_cqs != dev->dev_attr.sub_cqs_per_cq) {
1136		ibdev_dbg(ibdev,
1137			  "Invalid number of sub cqs[%u] expected[%u]\n",
1138			  cmd.num_sub_cqs, dev->dev_attr.sub_cqs_per_cq);
1139		err = -EINVAL;
1140		goto err_out;
1141	}
1142
1143	cq->ucontext = ucontext;
1144	cq->size = PAGE_ALIGN(cmd.cq_entry_size * entries * cmd.num_sub_cqs);
1145	cq->cpu_addr = efa_zalloc_mapped(dev, &cq->dma_addr, cq->size,
1146					 DMA_FROM_DEVICE);
1147	if (!cq->cpu_addr) {
1148		err = -ENOMEM;
1149		goto err_out;
1150	}
1151
1152	params.uarn = cq->ucontext->uarn;
1153	params.cq_depth = entries;
1154	params.dma_addr = cq->dma_addr;
1155	params.entry_size_in_bytes = cmd.cq_entry_size;
1156	params.num_sub_cqs = cmd.num_sub_cqs;
1157	params.set_src_addr = set_src_addr;
1158	if (cmd.flags & EFA_CREATE_CQ_WITH_COMPLETION_CHANNEL) {
1159		cq->eq = efa_vec2eq(dev, attr->comp_vector);
1160		params.eqn = cq->eq->eeq.eqn;
1161		params.interrupt_mode_enabled = true;
1162	}
1163
1164	err = efa_com_create_cq(&dev->edev, &params, &result);
1165	if (err)
1166		goto err_free_mapped;
1167
1168	resp.db_off = result.db_off;
1169	resp.cq_idx = result.cq_idx;
1170	cq->cq_idx = result.cq_idx;
1171	cq->ibcq.cqe = result.actual_depth;
1172	WARN_ON_ONCE(entries != result.actual_depth);
1173
1174	err = cq_mmap_entries_setup(dev, cq, &resp, result.db_valid);
1175	if (err) {
1176		ibdev_dbg(ibdev, "Could not setup cq[%u] mmap entries\n",
1177			  cq->cq_idx);
1178		goto err_destroy_cq;
1179	}
1180
1181	if (cq->eq) {
1182		err = xa_err(xa_store(&dev->cqs_xa, cq->cq_idx, cq, GFP_KERNEL));
1183		if (err) {
1184			ibdev_dbg(ibdev, "Failed to store cq[%u] in xarray\n",
1185				  cq->cq_idx);
1186			goto err_remove_mmap;
1187		}
1188	}
1189
1190	if (udata->outlen) {
1191		err = ib_copy_to_udata(udata, &resp,
1192				       min(sizeof(resp), udata->outlen));
1193		if (err) {
1194			ibdev_dbg(ibdev,
1195				  "Failed to copy udata for create_cq\n");
1196			goto err_xa_erase;
1197		}
1198	}
1199
1200	ibdev_dbg(ibdev, "Created cq[%d], cq depth[%u]. dma[%pad] virt[0x%p]\n",
1201		  cq->cq_idx, result.actual_depth, &cq->dma_addr, cq->cpu_addr);
1202
1203	return 0;
1204
1205err_xa_erase:
1206	if (cq->eq)
1207		xa_erase(&dev->cqs_xa, cq->cq_idx);
1208err_remove_mmap:
1209	efa_cq_user_mmap_entries_remove(cq);
1210err_destroy_cq:
1211	efa_destroy_cq_idx(dev, cq->cq_idx);
1212err_free_mapped:
1213	efa_free_mapped(dev, cq->cpu_addr, cq->dma_addr, cq->size,
1214			DMA_FROM_DEVICE);
1215
1216err_out:
1217	atomic64_inc(&dev->stats.create_cq_err);
1218	return err;
1219}
1220
1221static int umem_to_page_list(struct efa_dev *dev,
1222			     struct ib_umem *umem,
1223			     u64 *page_list,
1224			     u32 hp_cnt,
1225			     u8 hp_shift)
1226{
1227	u32 pages_in_hp = BIT(hp_shift - PAGE_SHIFT);
1228	struct ib_block_iter biter;
1229	unsigned int hp_idx = 0;
1230
1231	ibdev_dbg(&dev->ibdev, "hp_cnt[%u], pages_in_hp[%u]\n",
1232		  hp_cnt, pages_in_hp);
1233
1234	rdma_umem_for_each_dma_block(umem, &biter, BIT(hp_shift))
1235		page_list[hp_idx++] = rdma_block_iter_dma_address(&biter);
1236
1237	return 0;
1238}
1239
1240static struct scatterlist *efa_vmalloc_buf_to_sg(u64 *buf, int page_cnt)
1241{
1242	struct scatterlist *sglist;
1243	struct page *pg;
1244	int i;
1245
1246	sglist = kmalloc_array(page_cnt, sizeof(*sglist), GFP_KERNEL);
1247	if (!sglist)
1248		return NULL;
1249	sg_init_table(sglist, page_cnt);
1250	for (i = 0; i < page_cnt; i++) {
1251		pg = vmalloc_to_page(buf);
1252		if (!pg)
1253			goto err;
1254		sg_set_page(&sglist[i], pg, PAGE_SIZE, 0);
1255		buf += PAGE_SIZE / sizeof(*buf);
1256	}
1257	return sglist;
1258
1259err:
1260	kfree(sglist);
1261	return NULL;
1262}
1263
1264/*
1265 * create a chunk list of physical pages dma addresses from the supplied
1266 * scatter gather list
1267 */
1268static int pbl_chunk_list_create(struct efa_dev *dev, struct pbl_context *pbl)
1269{
1270	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1271	int page_cnt = pbl->phys.indirect.pbl_buf_size_in_pages;
1272	struct scatterlist *pages_sgl = pbl->phys.indirect.sgl;
1273	unsigned int chunk_list_size, chunk_idx, payload_idx;
1274	int sg_dma_cnt = pbl->phys.indirect.sg_dma_cnt;
1275	struct efa_com_ctrl_buff_info *ctrl_buf;
1276	u64 *cur_chunk_buf, *prev_chunk_buf;
1277	struct ib_block_iter biter;
1278	dma_addr_t dma_addr;
1279	int i;
1280
1281	/* allocate a chunk list that consists of 4KB chunks */
1282	chunk_list_size = DIV_ROUND_UP(page_cnt, EFA_PTRS_PER_CHUNK);
1283
1284	chunk_list->size = chunk_list_size;
1285	chunk_list->chunks = kcalloc(chunk_list_size,
1286				     sizeof(*chunk_list->chunks),
1287				     GFP_KERNEL);
1288	if (!chunk_list->chunks)
1289		return -ENOMEM;
1290
1291	ibdev_dbg(&dev->ibdev,
1292		  "chunk_list_size[%u] - pages[%u]\n", chunk_list_size,
1293		  page_cnt);
1294
1295	/* allocate chunk buffers: */
1296	for (i = 0; i < chunk_list_size; i++) {
1297		chunk_list->chunks[i].buf = kzalloc(EFA_CHUNK_SIZE, GFP_KERNEL);
1298		if (!chunk_list->chunks[i].buf)
1299			goto chunk_list_dealloc;
1300
1301		chunk_list->chunks[i].length = EFA_CHUNK_USED_SIZE;
1302	}
1303	chunk_list->chunks[chunk_list_size - 1].length =
1304		((page_cnt % EFA_PTRS_PER_CHUNK) * EFA_CHUNK_PAYLOAD_PTR_SIZE) +
1305			EFA_CHUNK_PTR_SIZE;
1306
1307	/* fill the dma addresses of sg list pages to chunks: */
1308	chunk_idx = 0;
1309	payload_idx = 0;
1310	cur_chunk_buf = chunk_list->chunks[0].buf;
1311	rdma_for_each_block(pages_sgl, &biter, sg_dma_cnt,
1312			    EFA_CHUNK_PAYLOAD_SIZE) {
1313		cur_chunk_buf[payload_idx++] =
1314			rdma_block_iter_dma_address(&biter);
1315
1316		if (payload_idx == EFA_PTRS_PER_CHUNK) {
1317			chunk_idx++;
1318			cur_chunk_buf = chunk_list->chunks[chunk_idx].buf;
1319			payload_idx = 0;
1320		}
1321	}
1322
1323	/* map chunks to dma and fill chunks next ptrs */
1324	for (i = chunk_list_size - 1; i >= 0; i--) {
1325		dma_addr = dma_map_single(&dev->pdev->dev,
1326					  chunk_list->chunks[i].buf,
1327					  chunk_list->chunks[i].length,
1328					  DMA_TO_DEVICE);
1329		if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1330			ibdev_err(&dev->ibdev,
1331				  "chunk[%u] dma_map_failed\n", i);
1332			goto chunk_list_unmap;
1333		}
1334
1335		chunk_list->chunks[i].dma_addr = dma_addr;
1336		ibdev_dbg(&dev->ibdev,
1337			  "chunk[%u] mapped at [%pad]\n", i, &dma_addr);
1338
1339		if (!i)
1340			break;
1341
1342		prev_chunk_buf = chunk_list->chunks[i - 1].buf;
1343
1344		ctrl_buf = (struct efa_com_ctrl_buff_info *)
1345				&prev_chunk_buf[EFA_PTRS_PER_CHUNK];
1346		ctrl_buf->length = chunk_list->chunks[i].length;
1347
1348		efa_com_set_dma_addr(dma_addr,
1349				     &ctrl_buf->address.mem_addr_high,
1350				     &ctrl_buf->address.mem_addr_low);
1351	}
1352
1353	return 0;
1354
1355chunk_list_unmap:
1356	for (; i < chunk_list_size; i++) {
1357		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1358				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1359	}
1360chunk_list_dealloc:
1361	for (i = 0; i < chunk_list_size; i++)
1362		kfree(chunk_list->chunks[i].buf);
1363
1364	kfree(chunk_list->chunks);
1365	return -ENOMEM;
1366}
1367
1368static void pbl_chunk_list_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1369{
1370	struct pbl_chunk_list *chunk_list = &pbl->phys.indirect.chunk_list;
1371	int i;
1372
1373	for (i = 0; i < chunk_list->size; i++) {
1374		dma_unmap_single(&dev->pdev->dev, chunk_list->chunks[i].dma_addr,
1375				 chunk_list->chunks[i].length, DMA_TO_DEVICE);
1376		kfree(chunk_list->chunks[i].buf);
1377	}
1378
1379	kfree(chunk_list->chunks);
1380}
1381
1382/* initialize pbl continuous mode: map pbl buffer to a dma address. */
1383static int pbl_continuous_initialize(struct efa_dev *dev,
1384				     struct pbl_context *pbl)
1385{
1386	dma_addr_t dma_addr;
1387
1388	dma_addr = dma_map_single(&dev->pdev->dev, pbl->pbl_buf,
1389				  pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1390	if (dma_mapping_error(&dev->pdev->dev, dma_addr)) {
1391		ibdev_err(&dev->ibdev, "Unable to map pbl to DMA address\n");
1392		return -ENOMEM;
1393	}
1394
1395	pbl->phys.continuous.dma_addr = dma_addr;
1396	ibdev_dbg(&dev->ibdev,
1397		  "pbl continuous - dma_addr = %pad, size[%u]\n",
1398		  &dma_addr, pbl->pbl_buf_size_in_bytes);
1399
1400	return 0;
1401}
1402
1403/*
1404 * initialize pbl indirect mode:
1405 * create a chunk list out of the dma addresses of the physical pages of
1406 * pbl buffer.
1407 */
1408static int pbl_indirect_initialize(struct efa_dev *dev, struct pbl_context *pbl)
1409{
1410	u32 size_in_pages = DIV_ROUND_UP(pbl->pbl_buf_size_in_bytes, EFA_CHUNK_PAYLOAD_SIZE);
1411	struct scatterlist *sgl;
1412	int sg_dma_cnt, err;
1413
1414	BUILD_BUG_ON(EFA_CHUNK_PAYLOAD_SIZE > PAGE_SIZE);
1415	sgl = efa_vmalloc_buf_to_sg(pbl->pbl_buf, size_in_pages);
1416	if (!sgl)
1417		return -ENOMEM;
1418
1419	sg_dma_cnt = dma_map_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1420	if (!sg_dma_cnt) {
1421		err = -EINVAL;
1422		goto err_map;
1423	}
1424
1425	pbl->phys.indirect.pbl_buf_size_in_pages = size_in_pages;
1426	pbl->phys.indirect.sgl = sgl;
1427	pbl->phys.indirect.sg_dma_cnt = sg_dma_cnt;
1428	err = pbl_chunk_list_create(dev, pbl);
1429	if (err) {
1430		ibdev_dbg(&dev->ibdev,
1431			  "chunk_list creation failed[%d]\n", err);
1432		goto err_chunk;
1433	}
1434
1435	ibdev_dbg(&dev->ibdev,
1436		  "pbl indirect - size[%u], chunks[%u]\n",
1437		  pbl->pbl_buf_size_in_bytes,
1438		  pbl->phys.indirect.chunk_list.size);
1439
1440	return 0;
1441
1442err_chunk:
1443	dma_unmap_sg(&dev->pdev->dev, sgl, size_in_pages, DMA_TO_DEVICE);
1444err_map:
1445	kfree(sgl);
1446	return err;
1447}
1448
1449static void pbl_indirect_terminate(struct efa_dev *dev, struct pbl_context *pbl)
1450{
1451	pbl_chunk_list_destroy(dev, pbl);
1452	dma_unmap_sg(&dev->pdev->dev, pbl->phys.indirect.sgl,
1453		     pbl->phys.indirect.pbl_buf_size_in_pages, DMA_TO_DEVICE);
1454	kfree(pbl->phys.indirect.sgl);
1455}
1456
1457/* create a page buffer list from a mapped user memory region */
1458static int pbl_create(struct efa_dev *dev,
1459		      struct pbl_context *pbl,
1460		      struct ib_umem *umem,
1461		      int hp_cnt,
1462		      u8 hp_shift)
1463{
1464	int err;
1465
1466	pbl->pbl_buf_size_in_bytes = hp_cnt * EFA_CHUNK_PAYLOAD_PTR_SIZE;
1467	pbl->pbl_buf = kvzalloc(pbl->pbl_buf_size_in_bytes, GFP_KERNEL);
1468	if (!pbl->pbl_buf)
1469		return -ENOMEM;
1470
1471	if (is_vmalloc_addr(pbl->pbl_buf)) {
1472		pbl->physically_continuous = 0;
1473		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1474					hp_shift);
1475		if (err)
1476			goto err_free;
1477
1478		err = pbl_indirect_initialize(dev, pbl);
1479		if (err)
1480			goto err_free;
1481	} else {
1482		pbl->physically_continuous = 1;
1483		err = umem_to_page_list(dev, umem, pbl->pbl_buf, hp_cnt,
1484					hp_shift);
1485		if (err)
1486			goto err_free;
1487
1488		err = pbl_continuous_initialize(dev, pbl);
1489		if (err)
1490			goto err_free;
1491	}
1492
1493	ibdev_dbg(&dev->ibdev,
1494		  "user_pbl_created: user_pages[%u], continuous[%u]\n",
1495		  hp_cnt, pbl->physically_continuous);
1496
1497	return 0;
1498
1499err_free:
1500	kvfree(pbl->pbl_buf);
1501	return err;
1502}
1503
1504static void pbl_destroy(struct efa_dev *dev, struct pbl_context *pbl)
1505{
1506	if (pbl->physically_continuous)
1507		dma_unmap_single(&dev->pdev->dev, pbl->phys.continuous.dma_addr,
1508				 pbl->pbl_buf_size_in_bytes, DMA_TO_DEVICE);
1509	else
1510		pbl_indirect_terminate(dev, pbl);
1511
1512	kvfree(pbl->pbl_buf);
1513}
1514
1515static int efa_create_inline_pbl(struct efa_dev *dev, struct efa_mr *mr,
1516				 struct efa_com_reg_mr_params *params)
1517{
1518	int err;
1519
1520	params->inline_pbl = 1;
1521	err = umem_to_page_list(dev, mr->umem, params->pbl.inline_pbl_array,
1522				params->page_num, params->page_shift);
1523	if (err)
1524		return err;
1525
1526	ibdev_dbg(&dev->ibdev,
1527		  "inline_pbl_array - pages[%u]\n", params->page_num);
1528
1529	return 0;
1530}
1531
1532static int efa_create_pbl(struct efa_dev *dev,
1533			  struct pbl_context *pbl,
1534			  struct efa_mr *mr,
1535			  struct efa_com_reg_mr_params *params)
1536{
1537	int err;
1538
1539	err = pbl_create(dev, pbl, mr->umem, params->page_num,
1540			 params->page_shift);
1541	if (err) {
1542		ibdev_dbg(&dev->ibdev, "Failed to create pbl[%d]\n", err);
1543		return err;
1544	}
1545
1546	params->inline_pbl = 0;
1547	params->indirect = !pbl->physically_continuous;
1548	if (pbl->physically_continuous) {
1549		params->pbl.pbl.length = pbl->pbl_buf_size_in_bytes;
1550
1551		efa_com_set_dma_addr(pbl->phys.continuous.dma_addr,
1552				     &params->pbl.pbl.address.mem_addr_high,
1553				     &params->pbl.pbl.address.mem_addr_low);
1554	} else {
1555		params->pbl.pbl.length =
1556			pbl->phys.indirect.chunk_list.chunks[0].length;
1557
1558		efa_com_set_dma_addr(pbl->phys.indirect.chunk_list.chunks[0].dma_addr,
1559				     &params->pbl.pbl.address.mem_addr_high,
1560				     &params->pbl.pbl.address.mem_addr_low);
1561	}
1562
1563	return 0;
1564}
1565
1566static struct efa_mr *efa_alloc_mr(struct ib_pd *ibpd, int access_flags,
1567				   struct ib_udata *udata)
1568{
1569	struct efa_dev *dev = to_edev(ibpd->device);
1570	int supp_access_flags;
1571	struct efa_mr *mr;
1572
1573	if (udata && udata->inlen &&
1574	    !ib_is_udata_cleared(udata, 0, sizeof(udata->inlen))) {
1575		ibdev_dbg(&dev->ibdev,
1576			  "Incompatible ABI params, udata not cleared\n");
1577		return ERR_PTR(-EINVAL);
1578	}
1579
1580	supp_access_flags =
1581		IB_ACCESS_LOCAL_WRITE |
1582		(EFA_DEV_CAP(dev, RDMA_READ) ? IB_ACCESS_REMOTE_READ : 0) |
1583		(EFA_DEV_CAP(dev, RDMA_WRITE) ? IB_ACCESS_REMOTE_WRITE : 0);
1584
1585	access_flags &= ~IB_ACCESS_OPTIONAL;
1586	if (access_flags & ~supp_access_flags) {
1587		ibdev_dbg(&dev->ibdev,
1588			  "Unsupported access flags[%#x], supported[%#x]\n",
1589			  access_flags, supp_access_flags);
1590		return ERR_PTR(-EOPNOTSUPP);
1591	}
1592
1593	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1594	if (!mr)
1595		return ERR_PTR(-ENOMEM);
1596
1597	return mr;
1598}
1599
1600static int efa_register_mr(struct ib_pd *ibpd, struct efa_mr *mr, u64 start,
1601			   u64 length, u64 virt_addr, int access_flags)
1602{
1603	struct efa_dev *dev = to_edev(ibpd->device);
1604	struct efa_com_reg_mr_params params = {};
1605	struct efa_com_reg_mr_result result = {};
1606	struct pbl_context pbl;
1607	unsigned int pg_sz;
1608	int inline_size;
1609	int err;
1610
1611	params.pd = to_epd(ibpd)->pdn;
1612	params.iova = virt_addr;
1613	params.mr_length_in_bytes = length;
1614	params.permissions = access_flags;
1615
1616	pg_sz = ib_umem_find_best_pgsz(mr->umem,
1617				       dev->dev_attr.page_size_cap,
1618				       virt_addr);
1619	if (!pg_sz) {
1620		ibdev_dbg(&dev->ibdev, "Failed to find a suitable page size in page_size_cap %#llx\n",
1621			  dev->dev_attr.page_size_cap);
1622		return -EOPNOTSUPP;
1623	}
1624
1625	params.page_shift = order_base_2(pg_sz);
1626	params.page_num = ib_umem_num_dma_blocks(mr->umem, pg_sz);
1627
1628	ibdev_dbg(&dev->ibdev,
1629		  "start %#llx length %#llx params.page_shift %u params.page_num %u\n",
1630		  start, length, params.page_shift, params.page_num);
1631
1632	inline_size = ARRAY_SIZE(params.pbl.inline_pbl_array);
1633	if (params.page_num <= inline_size) {
1634		err = efa_create_inline_pbl(dev, mr, &params);
1635		if (err)
1636			return err;
1637
1638		err = efa_com_register_mr(&dev->edev, &params, &result);
1639		if (err)
1640			return err;
1641	} else {
1642		err = efa_create_pbl(dev, &pbl, mr, &params);
1643		if (err)
1644			return err;
1645
1646		err = efa_com_register_mr(&dev->edev, &params, &result);
1647		pbl_destroy(dev, &pbl);
1648
1649		if (err)
1650			return err;
1651	}
1652
1653	mr->ibmr.lkey = result.l_key;
1654	mr->ibmr.rkey = result.r_key;
1655	mr->ibmr.length = length;
1656	ibdev_dbg(&dev->ibdev, "Registered mr[%d]\n", mr->ibmr.lkey);
1657
1658	return 0;
1659}
1660
1661struct ib_mr *efa_reg_user_mr_dmabuf(struct ib_pd *ibpd, u64 start,
1662				     u64 length, u64 virt_addr,
1663				     int fd, int access_flags,
1664				     struct ib_udata *udata)
1665{
1666	struct efa_dev *dev = to_edev(ibpd->device);
1667	struct ib_umem_dmabuf *umem_dmabuf;
1668	struct efa_mr *mr;
1669	int err;
1670
1671	mr = efa_alloc_mr(ibpd, access_flags, udata);
1672	if (IS_ERR(mr)) {
1673		err = PTR_ERR(mr);
1674		goto err_out;
1675	}
1676
1677	umem_dmabuf = ib_umem_dmabuf_get_pinned(ibpd->device, start, length, fd,
1678						access_flags);
1679	if (IS_ERR(umem_dmabuf)) {
1680		err = PTR_ERR(umem_dmabuf);
1681		ibdev_dbg(&dev->ibdev, "Failed to get dmabuf umem[%d]\n", err);
1682		goto err_free;
1683	}
1684
1685	mr->umem = &umem_dmabuf->umem;
1686	err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
1687	if (err)
1688		goto err_release;
1689
1690	return &mr->ibmr;
1691
1692err_release:
1693	ib_umem_release(mr->umem);
1694err_free:
1695	kfree(mr);
1696err_out:
1697	atomic64_inc(&dev->stats.reg_mr_err);
1698	return ERR_PTR(err);
1699}
1700
1701struct ib_mr *efa_reg_mr(struct ib_pd *ibpd, u64 start, u64 length,
1702			 u64 virt_addr, int access_flags,
1703			 struct ib_udata *udata)
1704{
1705	struct efa_dev *dev = to_edev(ibpd->device);
1706	struct efa_mr *mr;
1707	int err;
1708
1709	mr = efa_alloc_mr(ibpd, access_flags, udata);
1710	if (IS_ERR(mr)) {
1711		err = PTR_ERR(mr);
1712		goto err_out;
1713	}
1714
1715	mr->umem = ib_umem_get(ibpd->device, start, length, access_flags);
1716	if (IS_ERR(mr->umem)) {
1717		err = PTR_ERR(mr->umem);
1718		ibdev_dbg(&dev->ibdev,
1719			  "Failed to pin and map user space memory[%d]\n", err);
1720		goto err_free;
1721	}
1722
1723	err = efa_register_mr(ibpd, mr, start, length, virt_addr, access_flags);
1724	if (err)
1725		goto err_release;
1726
1727	return &mr->ibmr;
1728
1729err_release:
1730	ib_umem_release(mr->umem);
1731err_free:
1732	kfree(mr);
1733err_out:
1734	atomic64_inc(&dev->stats.reg_mr_err);
1735	return ERR_PTR(err);
1736}
1737
1738int efa_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1739{
1740	struct efa_dev *dev = to_edev(ibmr->device);
1741	struct efa_com_dereg_mr_params params;
1742	struct efa_mr *mr = to_emr(ibmr);
1743	int err;
1744
1745	ibdev_dbg(&dev->ibdev, "Deregister mr[%d]\n", ibmr->lkey);
1746
1747	params.l_key = mr->ibmr.lkey;
1748	err = efa_com_dereg_mr(&dev->edev, &params);
1749	if (err)
1750		return err;
1751
1752	ib_umem_release(mr->umem);
1753	kfree(mr);
1754
1755	return 0;
1756}
1757
1758int efa_get_port_immutable(struct ib_device *ibdev, u32 port_num,
1759			   struct ib_port_immutable *immutable)
1760{
1761	struct ib_port_attr attr;
1762	int err;
1763
1764	err = ib_query_port(ibdev, port_num, &attr);
1765	if (err) {
1766		ibdev_dbg(ibdev, "Couldn't query port err[%d]\n", err);
1767		return err;
1768	}
1769
1770	immutable->pkey_tbl_len = attr.pkey_tbl_len;
1771	immutable->gid_tbl_len = attr.gid_tbl_len;
1772
1773	return 0;
1774}
1775
1776static int efa_dealloc_uar(struct efa_dev *dev, u16 uarn)
1777{
1778	struct efa_com_dealloc_uar_params params = {
1779		.uarn = uarn,
1780	};
1781
1782	return efa_com_dealloc_uar(&dev->edev, &params);
1783}
1784
1785#define EFA_CHECK_USER_COMP(_dev, _comp_mask, _attr, _mask, _attr_str) \
1786	(_attr_str = (!(_dev)->dev_attr._attr || ((_comp_mask) & (_mask))) ? \
1787		     NULL : #_attr)
1788
1789static int efa_user_comp_handshake(const struct ib_ucontext *ibucontext,
1790				   const struct efa_ibv_alloc_ucontext_cmd *cmd)
1791{
1792	struct efa_dev *dev = to_edev(ibucontext->device);
1793	char *attr_str;
1794
1795	if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, max_tx_batch,
1796				EFA_ALLOC_UCONTEXT_CMD_COMP_TX_BATCH, attr_str))
1797		goto err;
1798
1799	if (EFA_CHECK_USER_COMP(dev, cmd->comp_mask, min_sq_depth,
1800				EFA_ALLOC_UCONTEXT_CMD_COMP_MIN_SQ_WR,
1801				attr_str))
1802		goto err;
1803
1804	return 0;
1805
1806err:
1807	ibdev_dbg(&dev->ibdev, "Userspace handshake failed for %s attribute\n",
1808		  attr_str);
1809	return -EOPNOTSUPP;
1810}
1811
1812int efa_alloc_ucontext(struct ib_ucontext *ibucontext, struct ib_udata *udata)
1813{
1814	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1815	struct efa_dev *dev = to_edev(ibucontext->device);
1816	struct efa_ibv_alloc_ucontext_resp resp = {};
1817	struct efa_ibv_alloc_ucontext_cmd cmd = {};
1818	struct efa_com_alloc_uar_result result;
1819	int err;
1820
1821	/*
1822	 * it's fine if the driver does not know all request fields,
1823	 * we will ack input fields in our response.
1824	 */
1825
1826	err = ib_copy_from_udata(&cmd, udata,
1827				 min(sizeof(cmd), udata->inlen));
1828	if (err) {
1829		ibdev_dbg(&dev->ibdev,
1830			  "Cannot copy udata for alloc_ucontext\n");
1831		goto err_out;
1832	}
1833
1834	err = efa_user_comp_handshake(ibucontext, &cmd);
1835	if (err)
1836		goto err_out;
1837
1838	err = efa_com_alloc_uar(&dev->edev, &result);
1839	if (err)
1840		goto err_out;
1841
1842	ucontext->uarn = result.uarn;
1843
1844	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_QUERY_DEVICE;
1845	resp.cmds_supp_udata_mask |= EFA_USER_CMDS_SUPP_UDATA_CREATE_AH;
1846	resp.sub_cqs_per_cq = dev->dev_attr.sub_cqs_per_cq;
1847	resp.inline_buf_size = dev->dev_attr.inline_buf_size;
1848	resp.max_llq_size = dev->dev_attr.max_llq_size;
1849	resp.max_tx_batch = dev->dev_attr.max_tx_batch;
1850	resp.min_sq_wr = dev->dev_attr.min_sq_depth;
1851
1852	err = ib_copy_to_udata(udata, &resp,
1853			       min(sizeof(resp), udata->outlen));
1854	if (err)
1855		goto err_dealloc_uar;
1856
1857	return 0;
1858
1859err_dealloc_uar:
1860	efa_dealloc_uar(dev, result.uarn);
1861err_out:
1862	atomic64_inc(&dev->stats.alloc_ucontext_err);
1863	return err;
1864}
1865
1866void efa_dealloc_ucontext(struct ib_ucontext *ibucontext)
1867{
1868	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1869	struct efa_dev *dev = to_edev(ibucontext->device);
1870
1871	efa_dealloc_uar(dev, ucontext->uarn);
1872}
1873
1874void efa_mmap_free(struct rdma_user_mmap_entry *rdma_entry)
1875{
1876	struct efa_user_mmap_entry *entry = to_emmap(rdma_entry);
1877
1878	kfree(entry);
1879}
1880
1881static int __efa_mmap(struct efa_dev *dev, struct efa_ucontext *ucontext,
1882		      struct vm_area_struct *vma)
1883{
1884	struct rdma_user_mmap_entry *rdma_entry;
1885	struct efa_user_mmap_entry *entry;
1886	unsigned long va;
1887	int err = 0;
1888	u64 pfn;
1889
1890	rdma_entry = rdma_user_mmap_entry_get(&ucontext->ibucontext, vma);
1891	if (!rdma_entry) {
1892		ibdev_dbg(&dev->ibdev,
1893			  "pgoff[%#lx] does not have valid entry\n",
1894			  vma->vm_pgoff);
1895		atomic64_inc(&dev->stats.mmap_err);
1896		return -EINVAL;
1897	}
1898	entry = to_emmap(rdma_entry);
1899
1900	ibdev_dbg(&dev->ibdev,
1901		  "Mapping address[%#llx], length[%#zx], mmap_flag[%d]\n",
1902		  entry->address, rdma_entry->npages * PAGE_SIZE,
1903		  entry->mmap_flag);
1904
1905	pfn = entry->address >> PAGE_SHIFT;
1906	switch (entry->mmap_flag) {
1907	case EFA_MMAP_IO_NC:
1908		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
1909					entry->rdma_entry.npages * PAGE_SIZE,
1910					pgprot_noncached(vma->vm_page_prot),
1911					rdma_entry);
1912		break;
1913	case EFA_MMAP_IO_WC:
1914		err = rdma_user_mmap_io(&ucontext->ibucontext, vma, pfn,
1915					entry->rdma_entry.npages * PAGE_SIZE,
1916					pgprot_writecombine(vma->vm_page_prot),
1917					rdma_entry);
1918		break;
1919	case EFA_MMAP_DMA_PAGE:
1920		for (va = vma->vm_start; va < vma->vm_end;
1921		     va += PAGE_SIZE, pfn++) {
1922			err = vm_insert_page(vma, va, pfn_to_page(pfn));
1923			if (err)
1924				break;
1925		}
1926		break;
1927	default:
1928		err = -EINVAL;
1929	}
1930
1931	if (err) {
1932		ibdev_dbg(
1933			&dev->ibdev,
1934			"Couldn't mmap address[%#llx] length[%#zx] mmap_flag[%d] err[%d]\n",
1935			entry->address, rdma_entry->npages * PAGE_SIZE,
1936			entry->mmap_flag, err);
1937		atomic64_inc(&dev->stats.mmap_err);
1938	}
1939
1940	rdma_user_mmap_entry_put(rdma_entry);
1941	return err;
1942}
1943
1944int efa_mmap(struct ib_ucontext *ibucontext,
1945	     struct vm_area_struct *vma)
1946{
1947	struct efa_ucontext *ucontext = to_eucontext(ibucontext);
1948	struct efa_dev *dev = to_edev(ibucontext->device);
1949	size_t length = vma->vm_end - vma->vm_start;
1950
1951	ibdev_dbg(&dev->ibdev,
1952		  "start %#lx, end %#lx, length = %#zx, pgoff = %#lx\n",
1953		  vma->vm_start, vma->vm_end, length, vma->vm_pgoff);
1954
1955	return __efa_mmap(dev, ucontext, vma);
1956}
1957
1958static int efa_ah_destroy(struct efa_dev *dev, struct efa_ah *ah)
1959{
1960	struct efa_com_destroy_ah_params params = {
1961		.ah = ah->ah,
1962		.pdn = to_epd(ah->ibah.pd)->pdn,
1963	};
1964
1965	return efa_com_destroy_ah(&dev->edev, &params);
1966}
1967
1968int efa_create_ah(struct ib_ah *ibah,
1969		  struct rdma_ah_init_attr *init_attr,
1970		  struct ib_udata *udata)
1971{
1972	struct rdma_ah_attr *ah_attr = init_attr->ah_attr;
1973	struct efa_dev *dev = to_edev(ibah->device);
1974	struct efa_com_create_ah_params params = {};
1975	struct efa_ibv_create_ah_resp resp = {};
1976	struct efa_com_create_ah_result result;
1977	struct efa_ah *ah = to_eah(ibah);
1978	int err;
1979
1980	if (!(init_attr->flags & RDMA_CREATE_AH_SLEEPABLE)) {
1981		ibdev_dbg(&dev->ibdev,
1982			  "Create address handle is not supported in atomic context\n");
1983		err = -EOPNOTSUPP;
1984		goto err_out;
1985	}
1986
1987	if (udata->inlen &&
1988	    !ib_is_udata_cleared(udata, 0, udata->inlen)) {
1989		ibdev_dbg(&dev->ibdev, "Incompatible ABI params\n");
1990		err = -EINVAL;
1991		goto err_out;
1992	}
1993
1994	memcpy(params.dest_addr, ah_attr->grh.dgid.raw,
1995	       sizeof(params.dest_addr));
1996	params.pdn = to_epd(ibah->pd)->pdn;
1997	err = efa_com_create_ah(&dev->edev, &params, &result);
1998	if (err)
1999		goto err_out;
2000
2001	memcpy(ah->id, ah_attr->grh.dgid.raw, sizeof(ah->id));
2002	ah->ah = result.ah;
2003
2004	resp.efa_address_handle = result.ah;
2005
2006	if (udata->outlen) {
2007		err = ib_copy_to_udata(udata, &resp,
2008				       min(sizeof(resp), udata->outlen));
2009		if (err) {
2010			ibdev_dbg(&dev->ibdev,
2011				  "Failed to copy udata for create_ah response\n");
2012			goto err_destroy_ah;
2013		}
2014	}
2015	ibdev_dbg(&dev->ibdev, "Created ah[%d]\n", ah->ah);
2016
2017	return 0;
2018
2019err_destroy_ah:
2020	efa_ah_destroy(dev, ah);
2021err_out:
2022	atomic64_inc(&dev->stats.create_ah_err);
2023	return err;
2024}
2025
2026int efa_destroy_ah(struct ib_ah *ibah, u32 flags)
2027{
2028	struct efa_dev *dev = to_edev(ibah->pd->device);
2029	struct efa_ah *ah = to_eah(ibah);
2030
2031	ibdev_dbg(&dev->ibdev, "Destroy ah[%d]\n", ah->ah);
2032
2033	if (!(flags & RDMA_DESTROY_AH_SLEEPABLE)) {
2034		ibdev_dbg(&dev->ibdev,
2035			  "Destroy address handle is not supported in atomic context\n");
2036		return -EOPNOTSUPP;
2037	}
2038
2039	efa_ah_destroy(dev, ah);
2040	return 0;
2041}
2042
2043struct rdma_hw_stats *efa_alloc_hw_port_stats(struct ib_device *ibdev,
2044					      u32 port_num)
2045{
2046	return rdma_alloc_hw_stats_struct(efa_port_stats_descs,
2047					  ARRAY_SIZE(efa_port_stats_descs),
2048					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
2049}
2050
2051struct rdma_hw_stats *efa_alloc_hw_device_stats(struct ib_device *ibdev)
2052{
2053	return rdma_alloc_hw_stats_struct(efa_device_stats_descs,
2054					  ARRAY_SIZE(efa_device_stats_descs),
2055					  RDMA_HW_STATS_DEFAULT_LIFESPAN);
2056}
2057
2058static int efa_fill_device_stats(struct efa_dev *dev,
2059				 struct rdma_hw_stats *stats)
2060{
2061	struct efa_com_stats_admin *as = &dev->edev.aq.stats;
2062	struct efa_stats *s = &dev->stats;
2063
2064	stats->value[EFA_SUBMITTED_CMDS] = atomic64_read(&as->submitted_cmd);
2065	stats->value[EFA_COMPLETED_CMDS] = atomic64_read(&as->completed_cmd);
2066	stats->value[EFA_CMDS_ERR] = atomic64_read(&as->cmd_err);
2067	stats->value[EFA_NO_COMPLETION_CMDS] = atomic64_read(&as->no_completion);
2068
2069	stats->value[EFA_KEEP_ALIVE_RCVD] = atomic64_read(&s->keep_alive_rcvd);
2070	stats->value[EFA_ALLOC_PD_ERR] = atomic64_read(&s->alloc_pd_err);
2071	stats->value[EFA_CREATE_QP_ERR] = atomic64_read(&s->create_qp_err);
2072	stats->value[EFA_CREATE_CQ_ERR] = atomic64_read(&s->create_cq_err);
2073	stats->value[EFA_REG_MR_ERR] = atomic64_read(&s->reg_mr_err);
2074	stats->value[EFA_ALLOC_UCONTEXT_ERR] =
2075		atomic64_read(&s->alloc_ucontext_err);
2076	stats->value[EFA_CREATE_AH_ERR] = atomic64_read(&s->create_ah_err);
2077	stats->value[EFA_MMAP_ERR] = atomic64_read(&s->mmap_err);
2078
2079	return ARRAY_SIZE(efa_device_stats_descs);
2080}
2081
2082static int efa_fill_port_stats(struct efa_dev *dev, struct rdma_hw_stats *stats,
2083			       u32 port_num)
2084{
2085	struct efa_com_get_stats_params params = {};
2086	union efa_com_get_stats_result result;
2087	struct efa_com_rdma_write_stats *rws;
2088	struct efa_com_rdma_read_stats *rrs;
2089	struct efa_com_messages_stats *ms;
2090	struct efa_com_basic_stats *bs;
2091	int err;
2092
2093	params.scope = EFA_ADMIN_GET_STATS_SCOPE_ALL;
2094	params.type = EFA_ADMIN_GET_STATS_TYPE_BASIC;
2095
2096	err = efa_com_get_stats(&dev->edev, &params, &result);
2097	if (err)
2098		return err;
2099
2100	bs = &result.basic_stats;
2101	stats->value[EFA_TX_BYTES] = bs->tx_bytes;
2102	stats->value[EFA_TX_PKTS] = bs->tx_pkts;
2103	stats->value[EFA_RX_BYTES] = bs->rx_bytes;
2104	stats->value[EFA_RX_PKTS] = bs->rx_pkts;
2105	stats->value[EFA_RX_DROPS] = bs->rx_drops;
2106
2107	params.type = EFA_ADMIN_GET_STATS_TYPE_MESSAGES;
2108	err = efa_com_get_stats(&dev->edev, &params, &result);
2109	if (err)
2110		return err;
2111
2112	ms = &result.messages_stats;
2113	stats->value[EFA_SEND_BYTES] = ms->send_bytes;
2114	stats->value[EFA_SEND_WRS] = ms->send_wrs;
2115	stats->value[EFA_RECV_BYTES] = ms->recv_bytes;
2116	stats->value[EFA_RECV_WRS] = ms->recv_wrs;
2117
2118	params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_READ;
2119	err = efa_com_get_stats(&dev->edev, &params, &result);
2120	if (err)
2121		return err;
2122
2123	rrs = &result.rdma_read_stats;
2124	stats->value[EFA_RDMA_READ_WRS] = rrs->read_wrs;
2125	stats->value[EFA_RDMA_READ_BYTES] = rrs->read_bytes;
2126	stats->value[EFA_RDMA_READ_WR_ERR] = rrs->read_wr_err;
2127	stats->value[EFA_RDMA_READ_RESP_BYTES] = rrs->read_resp_bytes;
2128
2129	if (EFA_DEV_CAP(dev, RDMA_WRITE)) {
2130		params.type = EFA_ADMIN_GET_STATS_TYPE_RDMA_WRITE;
2131		err = efa_com_get_stats(&dev->edev, &params, &result);
2132		if (err)
2133			return err;
2134
2135		rws = &result.rdma_write_stats;
2136		stats->value[EFA_RDMA_WRITE_WRS] = rws->write_wrs;
2137		stats->value[EFA_RDMA_WRITE_BYTES] = rws->write_bytes;
2138		stats->value[EFA_RDMA_WRITE_WR_ERR] = rws->write_wr_err;
2139		stats->value[EFA_RDMA_WRITE_RECV_BYTES] = rws->write_recv_bytes;
2140	}
2141
2142	return ARRAY_SIZE(efa_port_stats_descs);
2143}
2144
2145int efa_get_hw_stats(struct ib_device *ibdev, struct rdma_hw_stats *stats,
2146		     u32 port_num, int index)
2147{
2148	if (port_num)
2149		return efa_fill_port_stats(to_edev(ibdev), stats, port_num);
2150	else
2151		return efa_fill_device_stats(to_edev(ibdev), stats);
2152}
2153
2154enum rdma_link_layer efa_port_link_layer(struct ib_device *ibdev,
2155					 u32 port_num)
2156{
2157	return IB_LINK_LAYER_UNSPECIFIED;
2158}
2159
2160