1/*******************************************************************************
2*
3* Copyright (c) 2015-2016 Intel Corporation.  All rights reserved.
4*
5* This software is available to you under a choice of one of two
6* licenses.  You may choose to be licensed under the terms of the GNU
7* General Public License (GPL) Version 2, available from the file
8* COPYING in the main directory of this source tree, or the
9* OpenFabrics.org BSD license below:
10*
11*   Redistribution and use in source and binary forms, with or
12*   without modification, are permitted provided that the following
13*   conditions are met:
14*
15*    - Redistributions of source code must retain the above
16*	copyright notice, this list of conditions and the following
17*	disclaimer.
18*
19*    - Redistributions in binary form must reproduce the above
20*	copyright notice, this list of conditions and the following
21*	disclaimer in the documentation and/or other materials
22*	provided with the distribution.
23*
24* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31* SOFTWARE.
32*
33*******************************************************************************/
34
35#include <linux/module.h>
36#include <linux/moduleparam.h>
37#include <linux/random.h>
38#include <linux/highmem.h>
39#include <linux/time.h>
40#include <linux/hugetlb.h>
41#include <linux/irq.h>
42#include <asm/byteorder.h>
43#include <net/ip.h>
44#include <rdma/ib_verbs.h>
45#include <rdma/iw_cm.h>
46#include <rdma/ib_user_verbs.h>
47#include <rdma/ib_umem.h>
48#include <rdma/uverbs_ioctl.h>
49#include "i40iw.h"
50
51/**
52 * i40iw_query_device - get device attributes
53 * @ibdev: device pointer from stack
54 * @props: returning device attributes
55 * @udata: user data
56 */
57static int i40iw_query_device(struct ib_device *ibdev,
58			      struct ib_device_attr *props,
59			      struct ib_udata *udata)
60{
61	struct i40iw_device *iwdev = to_iwdev(ibdev);
62
63	if (udata->inlen || udata->outlen)
64		return -EINVAL;
65	memset(props, 0, sizeof(*props));
66	ether_addr_copy((u8 *)&props->sys_image_guid, iwdev->netdev->dev_addr);
67	props->fw_ver = i40iw_fw_major_ver(&iwdev->sc_dev) << 32 |
68			i40iw_fw_minor_ver(&iwdev->sc_dev);
69	props->device_cap_flags = iwdev->device_cap_flags;
70	props->vendor_id = iwdev->ldev->pcidev->vendor;
71	props->vendor_part_id = iwdev->ldev->pcidev->device;
72	props->hw_ver = (u32)iwdev->sc_dev.hw_rev;
73	props->max_mr_size = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
74	props->max_qp = iwdev->max_qp - iwdev->used_qps;
75	props->max_qp_wr = I40IW_MAX_QP_WRS;
76	props->max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
77	props->max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
78	props->max_cq = iwdev->max_cq - iwdev->used_cqs;
79	props->max_cqe = iwdev->max_cqe;
80	props->max_mr = iwdev->max_mr - iwdev->used_mrs;
81	props->max_pd = iwdev->max_pd - iwdev->used_pds;
82	props->max_sge_rd = I40IW_MAX_SGE_RD;
83	props->max_qp_rd_atom = I40IW_MAX_IRD_SIZE;
84	props->max_qp_init_rd_atom = props->max_qp_rd_atom;
85	props->atomic_cap = IB_ATOMIC_NONE;
86	props->max_fast_reg_page_list_len = I40IW_MAX_PAGES_PER_FMR;
87	return 0;
88}
89
90/**
91 * i40iw_query_port - get port attrubutes
92 * @ibdev: device pointer from stack
93 * @port: port number for query
94 * @props: returning device attributes
95 */
96static int i40iw_query_port(struct ib_device *ibdev,
97			    u8 port,
98			    struct ib_port_attr *props)
99{
100	props->lid = 1;
101	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
102		IB_PORT_VENDOR_CLASS_SUP | IB_PORT_BOOT_MGMT_SUP;
103	props->gid_tbl_len = 1;
104	props->active_width = IB_WIDTH_4X;
105	props->active_speed = 1;
106	props->max_msg_sz = I40IW_MAX_OUTBOUND_MESSAGE_SIZE;
107	return 0;
108}
109
110/**
111 * i40iw_alloc_ucontext - Allocate the user context data structure
112 * @uctx: Uverbs context pointer from stack
113 * @udata: user data
114 *
115 * This keeps track of all objects associated with a particular
116 * user-mode client.
117 */
118static int i40iw_alloc_ucontext(struct ib_ucontext *uctx,
119				struct ib_udata *udata)
120{
121	struct ib_device *ibdev = uctx->device;
122	struct i40iw_device *iwdev = to_iwdev(ibdev);
123	struct i40iw_alloc_ucontext_req req;
124	struct i40iw_alloc_ucontext_resp uresp = {};
125	struct i40iw_ucontext *ucontext = to_ucontext(uctx);
126
127	if (ib_copy_from_udata(&req, udata, sizeof(req)))
128		return -EINVAL;
129
130	if (req.userspace_ver < 4 || req.userspace_ver > I40IW_ABI_VER) {
131		i40iw_pr_err("Unsupported provider library version %u.\n", req.userspace_ver);
132		return -EINVAL;
133	}
134
135	uresp.max_qps = iwdev->max_qp;
136	uresp.max_pds = iwdev->max_pd;
137	uresp.wq_size = iwdev->max_qp_wr * 2;
138	uresp.kernel_ver = req.userspace_ver;
139
140	ucontext->iwdev = iwdev;
141	ucontext->abi_ver = req.userspace_ver;
142
143	if (ib_copy_to_udata(udata, &uresp, sizeof(uresp)))
144		return -EFAULT;
145
146	INIT_LIST_HEAD(&ucontext->cq_reg_mem_list);
147	spin_lock_init(&ucontext->cq_reg_mem_list_lock);
148	INIT_LIST_HEAD(&ucontext->qp_reg_mem_list);
149	spin_lock_init(&ucontext->qp_reg_mem_list_lock);
150
151	return 0;
152}
153
154/**
155 * i40iw_dealloc_ucontext - deallocate the user context data structure
156 * @context: user context created during alloc
157 */
158static void i40iw_dealloc_ucontext(struct ib_ucontext *context)
159{
160	return;
161}
162
163/**
164 * i40iw_mmap - user memory map
165 * @context: context created during alloc
166 * @vma: kernel info for user memory map
167 */
168static int i40iw_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
169{
170	struct i40iw_ucontext *ucontext = to_ucontext(context);
171	u64 dbaddr;
172
173	if (vma->vm_pgoff || vma->vm_end - vma->vm_start != PAGE_SIZE)
174		return -EINVAL;
175
176	dbaddr = I40IW_DB_ADDR_OFFSET + pci_resource_start(ucontext->iwdev->ldev->pcidev, 0);
177
178	return rdma_user_mmap_io(context, vma, dbaddr >> PAGE_SHIFT, PAGE_SIZE,
179				 pgprot_noncached(vma->vm_page_prot), NULL);
180}
181
182/**
183 * i40iw_alloc_push_page - allocate a push page for qp
184 * @iwdev: iwarp device
185 * @qp: hardware control qp
186 */
187static void i40iw_alloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
188{
189	struct i40iw_cqp_request *cqp_request;
190	struct cqp_commands_info *cqp_info;
191	enum i40iw_status_code status;
192
193	if (qp->push_idx != I40IW_INVALID_PUSH_PAGE_INDEX)
194		return;
195
196	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
197	if (!cqp_request)
198		return;
199
200	atomic_inc(&cqp_request->refcount);
201
202	cqp_info = &cqp_request->info;
203	cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
204	cqp_info->post_sq = 1;
205
206	cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
207	cqp_info->in.u.manage_push_page.info.free_page = 0;
208	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
209	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
210
211	status = i40iw_handle_cqp_op(iwdev, cqp_request);
212	if (!status)
213		qp->push_idx = cqp_request->compl_info.op_ret_val;
214	else
215		i40iw_pr_err("CQP-OP Push page fail");
216	i40iw_put_cqp_request(&iwdev->cqp, cqp_request);
217}
218
219/**
220 * i40iw_dealloc_push_page - free a push page for qp
221 * @iwdev: iwarp device
222 * @qp: hardware control qp
223 */
224static void i40iw_dealloc_push_page(struct i40iw_device *iwdev, struct i40iw_sc_qp *qp)
225{
226	struct i40iw_cqp_request *cqp_request;
227	struct cqp_commands_info *cqp_info;
228	enum i40iw_status_code status;
229
230	if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX)
231		return;
232
233	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, false);
234	if (!cqp_request)
235		return;
236
237	cqp_info = &cqp_request->info;
238	cqp_info->cqp_cmd = OP_MANAGE_PUSH_PAGE;
239	cqp_info->post_sq = 1;
240
241	cqp_info->in.u.manage_push_page.info.push_idx = qp->push_idx;
242	cqp_info->in.u.manage_push_page.info.qs_handle = qp->qs_handle;
243	cqp_info->in.u.manage_push_page.info.free_page = 1;
244	cqp_info->in.u.manage_push_page.cqp = &iwdev->cqp.sc_cqp;
245	cqp_info->in.u.manage_push_page.scratch = (uintptr_t)cqp_request;
246
247	status = i40iw_handle_cqp_op(iwdev, cqp_request);
248	if (!status)
249		qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
250	else
251		i40iw_pr_err("CQP-OP Push page fail");
252}
253
254/**
255 * i40iw_alloc_pd - allocate protection domain
256 * @pd: PD pointer
257 * @udata: user data
258 */
259static int i40iw_alloc_pd(struct ib_pd *pd, struct ib_udata *udata)
260{
261	struct i40iw_pd *iwpd = to_iwpd(pd);
262	struct i40iw_device *iwdev = to_iwdev(pd->device);
263	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
264	struct i40iw_alloc_pd_resp uresp;
265	struct i40iw_sc_pd *sc_pd;
266	u32 pd_id = 0;
267	int err;
268
269	if (iwdev->closing)
270		return -ENODEV;
271
272	err = i40iw_alloc_resource(iwdev, iwdev->allocated_pds,
273				   iwdev->max_pd, &pd_id, &iwdev->next_pd);
274	if (err) {
275		i40iw_pr_err("alloc resource failed\n");
276		return err;
277	}
278
279	sc_pd = &iwpd->sc_pd;
280
281	if (udata) {
282		struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
283			udata, struct i40iw_ucontext, ibucontext);
284		dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, ucontext->abi_ver);
285		memset(&uresp, 0, sizeof(uresp));
286		uresp.pd_id = pd_id;
287		if (ib_copy_to_udata(udata, &uresp, sizeof(uresp))) {
288			err = -EFAULT;
289			goto error;
290		}
291	} else {
292		dev->iw_pd_ops->pd_init(dev, sc_pd, pd_id, -1);
293	}
294
295	i40iw_add_pdusecount(iwpd);
296	return 0;
297
298error:
299	i40iw_free_resource(iwdev, iwdev->allocated_pds, pd_id);
300	return err;
301}
302
303/**
304 * i40iw_dealloc_pd - deallocate pd
305 * @ibpd: ptr of pd to be deallocated
306 * @udata: user data or null for kernel object
307 */
308static int i40iw_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
309{
310	struct i40iw_pd *iwpd = to_iwpd(ibpd);
311	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
312
313	i40iw_rem_pdusecount(iwpd, iwdev);
314	return 0;
315}
316
317/**
318 * i40iw_get_pbl - Retrieve pbl from a list given a virtual
319 * address
320 * @va: user virtual address
321 * @pbl_list: pbl list to search in (QP's or CQ's)
322 */
323static struct i40iw_pbl *i40iw_get_pbl(unsigned long va,
324				       struct list_head *pbl_list)
325{
326	struct i40iw_pbl *iwpbl;
327
328	list_for_each_entry(iwpbl, pbl_list, list) {
329		if (iwpbl->user_base == va) {
330			iwpbl->on_list = false;
331			list_del(&iwpbl->list);
332			return iwpbl;
333		}
334	}
335	return NULL;
336}
337
338/**
339 * i40iw_free_qp_resources - free up memory resources for qp
340 * @iwdev: iwarp device
341 * @iwqp: qp ptr (user or kernel)
342 * @qp_num: qp number assigned
343 */
344void i40iw_free_qp_resources(struct i40iw_qp *iwqp)
345{
346	struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
347	struct i40iw_device *iwdev = iwqp->iwdev;
348	u32 qp_num = iwqp->ibqp.qp_num;
349
350	i40iw_ieq_cleanup_qp(iwdev->vsi.ieq, &iwqp->sc_qp);
351	i40iw_dealloc_push_page(iwdev, &iwqp->sc_qp);
352	if (qp_num)
353		i40iw_free_resource(iwdev, iwdev->allocated_qps, qp_num);
354	if (iwpbl->pbl_allocated)
355		i40iw_free_pble(iwdev->pble_rsrc, &iwpbl->pble_alloc);
356	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->q2_ctx_mem);
357	i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwqp->kqp.dma_mem);
358	kfree(iwqp->kqp.wrid_mem);
359	iwqp->kqp.wrid_mem = NULL;
360	kfree(iwqp);
361}
362
363/**
364 * i40iw_clean_cqes - clean cq entries for qp
365 * @iwqp: qp ptr (user or kernel)
366 * @iwcq: cq ptr
367 */
368static void i40iw_clean_cqes(struct i40iw_qp *iwqp, struct i40iw_cq *iwcq)
369{
370	struct i40iw_cq_uk *ukcq = &iwcq->sc_cq.cq_uk;
371
372	ukcq->ops.iw_cq_clean(&iwqp->sc_qp.qp_uk, ukcq);
373}
374
375/**
376 * i40iw_destroy_qp - destroy qp
377 * @ibqp: qp's ib pointer also to get to device's qp address
378 */
379static int i40iw_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
380{
381	struct i40iw_qp *iwqp = to_iwqp(ibqp);
382	struct ib_qp_attr attr;
383	struct i40iw_device *iwdev = iwqp->iwdev;
384
385	memset(&attr, 0, sizeof(attr));
386
387	iwqp->destroyed = 1;
388
389	if (iwqp->ibqp_state >= IB_QPS_INIT && iwqp->ibqp_state < IB_QPS_RTS)
390		i40iw_next_iw_state(iwqp, I40IW_QP_STATE_ERROR, 0, 0, 0);
391
392	if (!iwqp->user_mode) {
393		if (iwqp->iwscq) {
394			i40iw_clean_cqes(iwqp, iwqp->iwscq);
395			if (iwqp->iwrcq != iwqp->iwscq)
396				i40iw_clean_cqes(iwqp, iwqp->iwrcq);
397		}
398	}
399
400	attr.qp_state = IB_QPS_ERR;
401	i40iw_modify_qp(&iwqp->ibqp, &attr, IB_QP_STATE, NULL);
402	i40iw_qp_rem_ref(&iwqp->ibqp);
403	wait_for_completion(&iwqp->free_qp);
404	i40iw_cqp_qp_destroy_cmd(&iwdev->sc_dev, &iwqp->sc_qp);
405	i40iw_rem_pdusecount(iwqp->iwpd, iwdev);
406	i40iw_free_qp_resources(iwqp);
407	i40iw_rem_devusecount(iwdev);
408
409	return 0;
410}
411
412/**
413 * i40iw_setup_virt_qp - setup for allocation of virtual qp
414 * @dev: iwarp device
415 * @qp: qp ptr
416 * @init_info: initialize info to return
417 */
418static int i40iw_setup_virt_qp(struct i40iw_device *iwdev,
419			       struct i40iw_qp *iwqp,
420			       struct i40iw_qp_init_info *init_info)
421{
422	struct i40iw_pbl *iwpbl = &iwqp->iwpbl;
423	struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
424
425	iwqp->page = qpmr->sq_page;
426	init_info->shadow_area_pa = cpu_to_le64(qpmr->shadow);
427	if (iwpbl->pbl_allocated) {
428		init_info->virtual_map = true;
429		init_info->sq_pa = qpmr->sq_pbl.idx;
430		init_info->rq_pa = qpmr->rq_pbl.idx;
431	} else {
432		init_info->sq_pa = qpmr->sq_pbl.addr;
433		init_info->rq_pa = qpmr->rq_pbl.addr;
434	}
435	return 0;
436}
437
438/**
439 * i40iw_setup_kmode_qp - setup initialization for kernel mode qp
440 * @iwdev: iwarp device
441 * @iwqp: qp ptr (user or kernel)
442 * @info: initialize info to return
443 */
444static int i40iw_setup_kmode_qp(struct i40iw_device *iwdev,
445				struct i40iw_qp *iwqp,
446				struct i40iw_qp_init_info *info)
447{
448	struct i40iw_dma_mem *mem = &iwqp->kqp.dma_mem;
449	u32 sqdepth, rqdepth;
450	u8 sqshift;
451	u32 size;
452	enum i40iw_status_code status;
453	struct i40iw_qp_uk_init_info *ukinfo = &info->qp_uk_init_info;
454
455	i40iw_get_wqe_shift(ukinfo->max_sq_frag_cnt, ukinfo->max_inline_data, &sqshift);
456	status = i40iw_get_sqdepth(ukinfo->sq_size, sqshift, &sqdepth);
457	if (status)
458		return -ENOMEM;
459
460	status = i40iw_get_rqdepth(ukinfo->rq_size, I40IW_MAX_RQ_WQE_SHIFT, &rqdepth);
461	if (status)
462		return -ENOMEM;
463
464	size = sqdepth * sizeof(struct i40iw_sq_uk_wr_trk_info) + (rqdepth << 3);
465	iwqp->kqp.wrid_mem = kzalloc(size, GFP_KERNEL);
466
467	ukinfo->sq_wrtrk_array = (struct i40iw_sq_uk_wr_trk_info *)iwqp->kqp.wrid_mem;
468	if (!ukinfo->sq_wrtrk_array)
469		return -ENOMEM;
470
471	ukinfo->rq_wrid_array = (u64 *)&ukinfo->sq_wrtrk_array[sqdepth];
472
473	size = (sqdepth + rqdepth) * I40IW_QP_WQE_MIN_SIZE;
474	size += (I40IW_SHADOW_AREA_SIZE << 3);
475
476	status = i40iw_allocate_dma_mem(iwdev->sc_dev.hw, mem, size, 256);
477	if (status) {
478		kfree(ukinfo->sq_wrtrk_array);
479		ukinfo->sq_wrtrk_array = NULL;
480		return -ENOMEM;
481	}
482
483	ukinfo->sq = mem->va;
484	info->sq_pa = mem->pa;
485
486	ukinfo->rq = &ukinfo->sq[sqdepth];
487	info->rq_pa = info->sq_pa + (sqdepth * I40IW_QP_WQE_MIN_SIZE);
488
489	ukinfo->shadow_area = ukinfo->rq[rqdepth].elem;
490	info->shadow_area_pa = info->rq_pa + (rqdepth * I40IW_QP_WQE_MIN_SIZE);
491
492	ukinfo->sq_size = sqdepth >> sqshift;
493	ukinfo->rq_size = rqdepth >> I40IW_MAX_RQ_WQE_SHIFT;
494	ukinfo->qp_id = iwqp->ibqp.qp_num;
495	return 0;
496}
497
498/**
499 * i40iw_create_qp - create qp
500 * @ibpd: ptr of pd
501 * @init_attr: attributes for qp
502 * @udata: user data for create qp
503 */
504static struct ib_qp *i40iw_create_qp(struct ib_pd *ibpd,
505				     struct ib_qp_init_attr *init_attr,
506				     struct ib_udata *udata)
507{
508	struct i40iw_pd *iwpd = to_iwpd(ibpd);
509	struct i40iw_device *iwdev = to_iwdev(ibpd->device);
510	struct i40iw_cqp *iwcqp = &iwdev->cqp;
511	struct i40iw_qp *iwqp;
512	struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
513		udata, struct i40iw_ucontext, ibucontext);
514	struct i40iw_create_qp_req req;
515	struct i40iw_create_qp_resp uresp;
516	u32 qp_num = 0;
517	enum i40iw_status_code ret;
518	int err_code;
519	int sq_size;
520	int rq_size;
521	struct i40iw_sc_qp *qp;
522	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
523	struct i40iw_qp_init_info init_info;
524	struct i40iw_create_qp_info *qp_info;
525	struct i40iw_cqp_request *cqp_request;
526	struct cqp_commands_info *cqp_info;
527
528	struct i40iw_qp_host_ctx_info *ctx_info;
529	struct i40iwarp_offload_info *iwarp_info;
530	unsigned long flags;
531
532	if (iwdev->closing)
533		return ERR_PTR(-ENODEV);
534
535	if (init_attr->create_flags)
536		return ERR_PTR(-EINVAL);
537	if (init_attr->cap.max_inline_data > I40IW_MAX_INLINE_DATA_SIZE)
538		init_attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
539
540	if (init_attr->cap.max_send_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
541		init_attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
542
543	if (init_attr->cap.max_recv_sge > I40IW_MAX_WQ_FRAGMENT_COUNT)
544		init_attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
545
546	memset(&init_info, 0, sizeof(init_info));
547
548	sq_size = init_attr->cap.max_send_wr;
549	rq_size = init_attr->cap.max_recv_wr;
550
551	init_info.vsi = &iwdev->vsi;
552	init_info.qp_uk_init_info.sq_size = sq_size;
553	init_info.qp_uk_init_info.rq_size = rq_size;
554	init_info.qp_uk_init_info.max_sq_frag_cnt = init_attr->cap.max_send_sge;
555	init_info.qp_uk_init_info.max_rq_frag_cnt = init_attr->cap.max_recv_sge;
556	init_info.qp_uk_init_info.max_inline_data = init_attr->cap.max_inline_data;
557
558	iwqp = kzalloc(sizeof(*iwqp), GFP_KERNEL);
559	if (!iwqp)
560		return ERR_PTR(-ENOMEM);
561
562	qp = &iwqp->sc_qp;
563	qp->back_qp = (void *)iwqp;
564	qp->push_idx = I40IW_INVALID_PUSH_PAGE_INDEX;
565
566	iwqp->iwdev = iwdev;
567	iwqp->ctx_info.iwarp_info = &iwqp->iwarp_info;
568
569	if (i40iw_allocate_dma_mem(dev->hw,
570				   &iwqp->q2_ctx_mem,
571				   I40IW_Q2_BUFFER_SIZE + I40IW_QP_CTX_SIZE,
572				   256)) {
573		i40iw_pr_err("dma_mem failed\n");
574		err_code = -ENOMEM;
575		goto error;
576	}
577
578	init_info.q2 = iwqp->q2_ctx_mem.va;
579	init_info.q2_pa = iwqp->q2_ctx_mem.pa;
580
581	init_info.host_ctx = (void *)init_info.q2 + I40IW_Q2_BUFFER_SIZE;
582	init_info.host_ctx_pa = init_info.q2_pa + I40IW_Q2_BUFFER_SIZE;
583
584	err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_qps, iwdev->max_qp,
585					&qp_num, &iwdev->next_qp);
586	if (err_code) {
587		i40iw_pr_err("qp resource\n");
588		goto error;
589	}
590
591	iwqp->iwpd = iwpd;
592	iwqp->ibqp.qp_num = qp_num;
593	qp = &iwqp->sc_qp;
594	iwqp->iwscq = to_iwcq(init_attr->send_cq);
595	iwqp->iwrcq = to_iwcq(init_attr->recv_cq);
596
597	iwqp->host_ctx.va = init_info.host_ctx;
598	iwqp->host_ctx.pa = init_info.host_ctx_pa;
599	iwqp->host_ctx.size = I40IW_QP_CTX_SIZE;
600
601	init_info.pd = &iwpd->sc_pd;
602	init_info.qp_uk_init_info.qp_id = iwqp->ibqp.qp_num;
603	iwqp->ctx_info.qp_compl_ctx = (uintptr_t)qp;
604
605	if (init_attr->qp_type != IB_QPT_RC) {
606		err_code = -EOPNOTSUPP;
607		goto error;
608	}
609	if (iwdev->push_mode)
610		i40iw_alloc_push_page(iwdev, qp);
611	if (udata) {
612		err_code = ib_copy_from_udata(&req, udata, sizeof(req));
613		if (err_code) {
614			i40iw_pr_err("ib_copy_from_data\n");
615			goto error;
616		}
617		iwqp->ctx_info.qp_compl_ctx = req.user_compl_ctx;
618		iwqp->user_mode = 1;
619
620		if (req.user_wqe_buffers) {
621			struct i40iw_pbl *iwpbl;
622
623			spin_lock_irqsave(
624			    &ucontext->qp_reg_mem_list_lock, flags);
625			iwpbl = i40iw_get_pbl(
626			    (unsigned long)req.user_wqe_buffers,
627			    &ucontext->qp_reg_mem_list);
628			spin_unlock_irqrestore(
629			    &ucontext->qp_reg_mem_list_lock, flags);
630
631			if (!iwpbl) {
632				err_code = -ENODATA;
633				i40iw_pr_err("no pbl info\n");
634				goto error;
635			}
636			memcpy(&iwqp->iwpbl, iwpbl, sizeof(iwqp->iwpbl));
637		}
638		err_code = i40iw_setup_virt_qp(iwdev, iwqp, &init_info);
639	} else {
640		err_code = i40iw_setup_kmode_qp(iwdev, iwqp, &init_info);
641	}
642
643	if (err_code) {
644		i40iw_pr_err("setup qp failed\n");
645		goto error;
646	}
647
648	init_info.type = I40IW_QP_TYPE_IWARP;
649	ret = dev->iw_priv_qp_ops->qp_init(qp, &init_info);
650	if (ret) {
651		err_code = -EPROTO;
652		i40iw_pr_err("qp_init fail\n");
653		goto error;
654	}
655	ctx_info = &iwqp->ctx_info;
656	iwarp_info = &iwqp->iwarp_info;
657	iwarp_info->rd_enable = true;
658	iwarp_info->wr_rdresp_en = true;
659	if (!iwqp->user_mode) {
660		iwarp_info->fast_reg_en = true;
661		iwarp_info->priv_mode_en = true;
662	}
663	iwarp_info->ddp_ver = 1;
664	iwarp_info->rdmap_ver = 1;
665
666	ctx_info->iwarp_info_valid = true;
667	ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
668	ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
669	if (qp->push_idx == I40IW_INVALID_PUSH_PAGE_INDEX) {
670		ctx_info->push_mode_en = false;
671	} else {
672		ctx_info->push_mode_en = true;
673		ctx_info->push_idx = qp->push_idx;
674	}
675
676	ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
677					     (u64 *)iwqp->host_ctx.va,
678					     ctx_info);
679	ctx_info->iwarp_info_valid = false;
680	cqp_request = i40iw_get_cqp_request(iwcqp, true);
681	if (!cqp_request) {
682		err_code = -ENOMEM;
683		goto error;
684	}
685	cqp_info = &cqp_request->info;
686	qp_info = &cqp_request->info.in.u.qp_create.info;
687
688	memset(qp_info, 0, sizeof(*qp_info));
689
690	qp_info->cq_num_valid = true;
691	qp_info->next_iwarp_state = I40IW_QP_STATE_IDLE;
692
693	cqp_info->cqp_cmd = OP_QP_CREATE;
694	cqp_info->post_sq = 1;
695	cqp_info->in.u.qp_create.qp = qp;
696	cqp_info->in.u.qp_create.scratch = (uintptr_t)cqp_request;
697	ret = i40iw_handle_cqp_op(iwdev, cqp_request);
698	if (ret) {
699		i40iw_pr_err("CQP-OP QP create fail");
700		err_code = -EACCES;
701		goto error;
702	}
703
704	refcount_set(&iwqp->refcount, 1);
705	spin_lock_init(&iwqp->lock);
706	iwqp->sig_all = (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) ? 1 : 0;
707	iwdev->qp_table[qp_num] = iwqp;
708	i40iw_add_pdusecount(iwqp->iwpd);
709	i40iw_add_devusecount(iwdev);
710	if (udata) {
711		memset(&uresp, 0, sizeof(uresp));
712		uresp.actual_sq_size = sq_size;
713		uresp.actual_rq_size = rq_size;
714		uresp.qp_id = qp_num;
715		uresp.push_idx = qp->push_idx;
716		err_code = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
717		if (err_code) {
718			i40iw_pr_err("copy_to_udata failed\n");
719			i40iw_destroy_qp(&iwqp->ibqp, udata);
720			/* let the completion of the qp destroy free the qp */
721			return ERR_PTR(err_code);
722		}
723	}
724	init_completion(&iwqp->sq_drained);
725	init_completion(&iwqp->rq_drained);
726	init_completion(&iwqp->free_qp);
727
728	return &iwqp->ibqp;
729error:
730	i40iw_free_qp_resources(iwqp);
731	return ERR_PTR(err_code);
732}
733
734/**
735 * i40iw_query - query qp attributes
736 * @ibqp: qp pointer
737 * @attr: attributes pointer
738 * @attr_mask: Not used
739 * @init_attr: qp attributes to return
740 */
741static int i40iw_query_qp(struct ib_qp *ibqp,
742			  struct ib_qp_attr *attr,
743			  int attr_mask,
744			  struct ib_qp_init_attr *init_attr)
745{
746	struct i40iw_qp *iwqp = to_iwqp(ibqp);
747	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
748
749	attr->qp_state = iwqp->ibqp_state;
750	attr->cur_qp_state = attr->qp_state;
751	attr->qp_access_flags = 0;
752	attr->cap.max_send_wr = qp->qp_uk.sq_size;
753	attr->cap.max_recv_wr = qp->qp_uk.rq_size;
754	attr->cap.max_inline_data = I40IW_MAX_INLINE_DATA_SIZE;
755	attr->cap.max_send_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
756	attr->cap.max_recv_sge = I40IW_MAX_WQ_FRAGMENT_COUNT;
757	attr->port_num = 1;
758	init_attr->event_handler = iwqp->ibqp.event_handler;
759	init_attr->qp_context = iwqp->ibqp.qp_context;
760	init_attr->send_cq = iwqp->ibqp.send_cq;
761	init_attr->recv_cq = iwqp->ibqp.recv_cq;
762	init_attr->srq = iwqp->ibqp.srq;
763	init_attr->cap = attr->cap;
764	init_attr->port_num = 1;
765	return 0;
766}
767
768/**
769 * i40iw_hw_modify_qp - setup cqp for modify qp
770 * @iwdev: iwarp device
771 * @iwqp: qp ptr (user or kernel)
772 * @info: info for modify qp
773 * @wait: flag to wait or not for modify qp completion
774 */
775void i40iw_hw_modify_qp(struct i40iw_device *iwdev, struct i40iw_qp *iwqp,
776			struct i40iw_modify_qp_info *info, bool wait)
777{
778	struct i40iw_cqp_request *cqp_request;
779	struct cqp_commands_info *cqp_info;
780	struct i40iw_modify_qp_info *m_info;
781	struct i40iw_gen_ae_info ae_info;
782
783	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, wait);
784	if (!cqp_request)
785		return;
786
787	cqp_info = &cqp_request->info;
788	m_info = &cqp_info->in.u.qp_modify.info;
789	memcpy(m_info, info, sizeof(*m_info));
790	cqp_info->cqp_cmd = OP_QP_MODIFY;
791	cqp_info->post_sq = 1;
792	cqp_info->in.u.qp_modify.qp = &iwqp->sc_qp;
793	cqp_info->in.u.qp_modify.scratch = (uintptr_t)cqp_request;
794	if (!i40iw_handle_cqp_op(iwdev, cqp_request))
795		return;
796
797	switch (m_info->next_iwarp_state) {
798	case I40IW_QP_STATE_RTS:
799		if (iwqp->iwarp_state == I40IW_QP_STATE_IDLE)
800			i40iw_send_reset(iwqp->cm_node);
801		fallthrough;
802	case I40IW_QP_STATE_IDLE:
803	case I40IW_QP_STATE_TERMINATE:
804	case I40IW_QP_STATE_CLOSING:
805		ae_info.ae_code = I40IW_AE_BAD_CLOSE;
806		ae_info.ae_source = 0;
807		i40iw_gen_ae(iwdev, &iwqp->sc_qp, &ae_info, false);
808		break;
809	case I40IW_QP_STATE_ERROR:
810	default:
811		break;
812	}
813}
814
815/**
816 * i40iw_modify_qp - modify qp request
817 * @ibqp: qp's pointer for modify
818 * @attr: access attributes
819 * @attr_mask: state mask
820 * @udata: user data
821 */
822int i40iw_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
823		    int attr_mask, struct ib_udata *udata)
824{
825	struct i40iw_qp *iwqp = to_iwqp(ibqp);
826	struct i40iw_device *iwdev = iwqp->iwdev;
827	struct i40iw_qp_host_ctx_info *ctx_info;
828	struct i40iwarp_offload_info *iwarp_info;
829	struct i40iw_modify_qp_info info;
830	u8 issue_modify_qp = 0;
831	u8 dont_wait = 0;
832	u32 err;
833	unsigned long flags;
834
835	memset(&info, 0, sizeof(info));
836	ctx_info = &iwqp->ctx_info;
837	iwarp_info = &iwqp->iwarp_info;
838
839	spin_lock_irqsave(&iwqp->lock, flags);
840
841	if (attr_mask & IB_QP_STATE) {
842		if (iwdev->closing && attr->qp_state != IB_QPS_ERR) {
843			err = -EINVAL;
844			goto exit;
845		}
846
847		switch (attr->qp_state) {
848		case IB_QPS_INIT:
849		case IB_QPS_RTR:
850			if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_IDLE) {
851				err = -EINVAL;
852				goto exit;
853			}
854			if (iwqp->iwarp_state == I40IW_QP_STATE_INVALID) {
855				info.next_iwarp_state = I40IW_QP_STATE_IDLE;
856				issue_modify_qp = 1;
857			}
858			break;
859		case IB_QPS_RTS:
860			if ((iwqp->iwarp_state > (u32)I40IW_QP_STATE_RTS) ||
861			    (!iwqp->cm_id)) {
862				err = -EINVAL;
863				goto exit;
864			}
865
866			issue_modify_qp = 1;
867			iwqp->hw_tcp_state = I40IW_TCP_STATE_ESTABLISHED;
868			iwqp->hte_added = 1;
869			info.next_iwarp_state = I40IW_QP_STATE_RTS;
870			info.tcp_ctx_valid = true;
871			info.ord_valid = true;
872			info.arp_cache_idx_valid = true;
873			info.cq_num_valid = true;
874			break;
875		case IB_QPS_SQD:
876			if (iwqp->hw_iwarp_state > (u32)I40IW_QP_STATE_RTS) {
877				err = 0;
878				goto exit;
879			}
880			if ((iwqp->iwarp_state == (u32)I40IW_QP_STATE_CLOSING) ||
881			    (iwqp->iwarp_state < (u32)I40IW_QP_STATE_RTS)) {
882				err = 0;
883				goto exit;
884			}
885			if (iwqp->iwarp_state > (u32)I40IW_QP_STATE_CLOSING) {
886				err = -EINVAL;
887				goto exit;
888			}
889			info.next_iwarp_state = I40IW_QP_STATE_CLOSING;
890			issue_modify_qp = 1;
891			break;
892		case IB_QPS_SQE:
893			if (iwqp->iwarp_state >= (u32)I40IW_QP_STATE_TERMINATE) {
894				err = -EINVAL;
895				goto exit;
896			}
897			info.next_iwarp_state = I40IW_QP_STATE_TERMINATE;
898			issue_modify_qp = 1;
899			break;
900		case IB_QPS_ERR:
901		case IB_QPS_RESET:
902			if (iwqp->iwarp_state == (u32)I40IW_QP_STATE_ERROR) {
903				err = -EINVAL;
904				goto exit;
905			}
906			if (iwqp->sc_qp.term_flags)
907				i40iw_terminate_del_timer(&iwqp->sc_qp);
908			info.next_iwarp_state = I40IW_QP_STATE_ERROR;
909			if ((iwqp->hw_tcp_state > I40IW_TCP_STATE_CLOSED) &&
910			    iwdev->iw_status &&
911			    (iwqp->hw_tcp_state != I40IW_TCP_STATE_TIME_WAIT))
912				info.reset_tcp_conn = true;
913			else
914				dont_wait = 1;
915			issue_modify_qp = 1;
916			info.next_iwarp_state = I40IW_QP_STATE_ERROR;
917			break;
918		default:
919			err = -EINVAL;
920			goto exit;
921		}
922
923		iwqp->ibqp_state = attr->qp_state;
924
925	}
926	if (attr_mask & IB_QP_ACCESS_FLAGS) {
927		ctx_info->iwarp_info_valid = true;
928		if (attr->qp_access_flags & IB_ACCESS_LOCAL_WRITE)
929			iwarp_info->wr_rdresp_en = true;
930		if (attr->qp_access_flags & IB_ACCESS_REMOTE_WRITE)
931			iwarp_info->wr_rdresp_en = true;
932		if (attr->qp_access_flags & IB_ACCESS_REMOTE_READ)
933			iwarp_info->rd_enable = true;
934		if (attr->qp_access_flags & IB_ACCESS_MW_BIND)
935			iwarp_info->bind_en = true;
936
937		if (iwqp->user_mode) {
938			iwarp_info->rd_enable = true;
939			iwarp_info->wr_rdresp_en = true;
940			iwarp_info->priv_mode_en = false;
941		}
942	}
943
944	if (ctx_info->iwarp_info_valid) {
945		struct i40iw_sc_dev *dev = &iwdev->sc_dev;
946		int ret;
947
948		ctx_info->send_cq_num = iwqp->iwscq->sc_cq.cq_uk.cq_id;
949		ctx_info->rcv_cq_num = iwqp->iwrcq->sc_cq.cq_uk.cq_id;
950		ret = dev->iw_priv_qp_ops->qp_setctx(&iwqp->sc_qp,
951						     (u64 *)iwqp->host_ctx.va,
952						     ctx_info);
953		if (ret) {
954			i40iw_pr_err("setting QP context\n");
955			err = -EINVAL;
956			goto exit;
957		}
958	}
959
960	spin_unlock_irqrestore(&iwqp->lock, flags);
961
962	if (issue_modify_qp) {
963		i40iw_hw_modify_qp(iwdev, iwqp, &info, true);
964
965		spin_lock_irqsave(&iwqp->lock, flags);
966		iwqp->iwarp_state = info.next_iwarp_state;
967		spin_unlock_irqrestore(&iwqp->lock, flags);
968	}
969
970	if (issue_modify_qp && (iwqp->ibqp_state > IB_QPS_RTS)) {
971		if (dont_wait) {
972			if (iwqp->cm_id && iwqp->hw_tcp_state) {
973				spin_lock_irqsave(&iwqp->lock, flags);
974				iwqp->hw_tcp_state = I40IW_TCP_STATE_CLOSED;
975				iwqp->last_aeq = I40IW_AE_RESET_SENT;
976				spin_unlock_irqrestore(&iwqp->lock, flags);
977				i40iw_cm_disconn(iwqp);
978			}
979		} else {
980			spin_lock_irqsave(&iwqp->lock, flags);
981			if (iwqp->cm_id) {
982				if (atomic_inc_return(&iwqp->close_timer_started) == 1) {
983					iwqp->cm_id->add_ref(iwqp->cm_id);
984					i40iw_schedule_cm_timer(iwqp->cm_node,
985								(struct i40iw_puda_buf *)iwqp,
986								 I40IW_TIMER_TYPE_CLOSE, 1, 0);
987				}
988			}
989			spin_unlock_irqrestore(&iwqp->lock, flags);
990		}
991	}
992	return 0;
993exit:
994	spin_unlock_irqrestore(&iwqp->lock, flags);
995	return err;
996}
997
998/**
999 * cq_free_resources - free up recources for cq
1000 * @iwdev: iwarp device
1001 * @iwcq: cq ptr
1002 */
1003static void cq_free_resources(struct i40iw_device *iwdev, struct i40iw_cq *iwcq)
1004{
1005	struct i40iw_sc_cq *cq = &iwcq->sc_cq;
1006
1007	if (!iwcq->user_mode)
1008		i40iw_free_dma_mem(iwdev->sc_dev.hw, &iwcq->kmem);
1009	i40iw_free_resource(iwdev, iwdev->allocated_cqs, cq->cq_uk.cq_id);
1010}
1011
1012/**
1013 * i40iw_cq_wq_destroy - send cq destroy cqp
1014 * @iwdev: iwarp device
1015 * @cq: hardware control cq
1016 */
1017void i40iw_cq_wq_destroy(struct i40iw_device *iwdev, struct i40iw_sc_cq *cq)
1018{
1019	enum i40iw_status_code status;
1020	struct i40iw_cqp_request *cqp_request;
1021	struct cqp_commands_info *cqp_info;
1022
1023	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
1024	if (!cqp_request)
1025		return;
1026
1027	cqp_info = &cqp_request->info;
1028
1029	cqp_info->cqp_cmd = OP_CQ_DESTROY;
1030	cqp_info->post_sq = 1;
1031	cqp_info->in.u.cq_destroy.cq = cq;
1032	cqp_info->in.u.cq_destroy.scratch = (uintptr_t)cqp_request;
1033	status = i40iw_handle_cqp_op(iwdev, cqp_request);
1034	if (status)
1035		i40iw_pr_err("CQP-OP Destroy QP fail");
1036}
1037
1038/**
1039 * i40iw_destroy_cq - destroy cq
1040 * @ib_cq: cq pointer
1041 * @udata: user data or NULL for kernel object
1042 */
1043static int i40iw_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata)
1044{
1045	struct i40iw_cq *iwcq;
1046	struct i40iw_device *iwdev;
1047	struct i40iw_sc_cq *cq;
1048
1049	iwcq = to_iwcq(ib_cq);
1050	iwdev = to_iwdev(ib_cq->device);
1051	cq = &iwcq->sc_cq;
1052	i40iw_cq_wq_destroy(iwdev, cq);
1053	cq_free_resources(iwdev, iwcq);
1054	i40iw_rem_devusecount(iwdev);
1055	return 0;
1056}
1057
1058/**
1059 * i40iw_create_cq - create cq
1060 * @ibcq: CQ allocated
1061 * @attr: attributes for cq
1062 * @udata: user data
1063 */
1064static int i40iw_create_cq(struct ib_cq *ibcq,
1065			   const struct ib_cq_init_attr *attr,
1066			   struct ib_udata *udata)
1067{
1068	struct ib_device *ibdev = ibcq->device;
1069	struct i40iw_device *iwdev = to_iwdev(ibdev);
1070	struct i40iw_cq *iwcq = to_iwcq(ibcq);
1071	struct i40iw_pbl *iwpbl;
1072	u32 cq_num = 0;
1073	struct i40iw_sc_cq *cq;
1074	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
1075	struct i40iw_cq_init_info info = {};
1076	enum i40iw_status_code status;
1077	struct i40iw_cqp_request *cqp_request;
1078	struct cqp_commands_info *cqp_info;
1079	struct i40iw_cq_uk_init_info *ukinfo = &info.cq_uk_init_info;
1080	unsigned long flags;
1081	int err_code;
1082	int entries = attr->cqe;
1083
1084	if (iwdev->closing)
1085		return -ENODEV;
1086
1087	if (entries > iwdev->max_cqe)
1088		return -EINVAL;
1089
1090	err_code = i40iw_alloc_resource(iwdev, iwdev->allocated_cqs,
1091					iwdev->max_cq, &cq_num,
1092					&iwdev->next_cq);
1093	if (err_code)
1094		return err_code;
1095
1096	cq = &iwcq->sc_cq;
1097	cq->back_cq = (void *)iwcq;
1098	spin_lock_init(&iwcq->lock);
1099
1100	info.dev = dev;
1101	ukinfo->cq_size = max(entries, 4);
1102	ukinfo->cq_id = cq_num;
1103	iwcq->ibcq.cqe = info.cq_uk_init_info.cq_size;
1104	info.ceqe_mask = 0;
1105	if (attr->comp_vector < iwdev->ceqs_count)
1106		info.ceq_id = attr->comp_vector;
1107	info.ceq_id_valid = true;
1108	info.ceqe_mask = 1;
1109	info.type = I40IW_CQ_TYPE_IWARP;
1110	if (udata) {
1111		struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
1112			udata, struct i40iw_ucontext, ibucontext);
1113		struct i40iw_create_cq_req req;
1114		struct i40iw_cq_mr *cqmr;
1115
1116		memset(&req, 0, sizeof(req));
1117		iwcq->user_mode = true;
1118		if (ib_copy_from_udata(&req, udata, sizeof(struct i40iw_create_cq_req))) {
1119			err_code = -EFAULT;
1120			goto cq_free_resources;
1121		}
1122
1123		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
1124		iwpbl = i40iw_get_pbl((unsigned long)req.user_cq_buffer,
1125				      &ucontext->cq_reg_mem_list);
1126		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
1127		if (!iwpbl) {
1128			err_code = -EPROTO;
1129			goto cq_free_resources;
1130		}
1131
1132		iwcq->iwpbl = iwpbl;
1133		iwcq->cq_mem_size = 0;
1134		cqmr = &iwpbl->cq_mr;
1135		info.shadow_area_pa = cpu_to_le64(cqmr->shadow);
1136		if (iwpbl->pbl_allocated) {
1137			info.virtual_map = true;
1138			info.pbl_chunk_size = 1;
1139			info.first_pm_pbl_idx = cqmr->cq_pbl.idx;
1140		} else {
1141			info.cq_base_pa = cqmr->cq_pbl.addr;
1142		}
1143	} else {
1144		/* Kmode allocations */
1145		int rsize;
1146		int shadow;
1147
1148		rsize = info.cq_uk_init_info.cq_size * sizeof(struct i40iw_cqe);
1149		rsize = round_up(rsize, 256);
1150		shadow = I40IW_SHADOW_AREA_SIZE << 3;
1151		status = i40iw_allocate_dma_mem(dev->hw, &iwcq->kmem,
1152						rsize + shadow, 256);
1153		if (status) {
1154			err_code = -ENOMEM;
1155			goto cq_free_resources;
1156		}
1157		ukinfo->cq_base = iwcq->kmem.va;
1158		info.cq_base_pa = iwcq->kmem.pa;
1159		info.shadow_area_pa = info.cq_base_pa + rsize;
1160		ukinfo->shadow_area = iwcq->kmem.va + rsize;
1161	}
1162
1163	if (dev->iw_priv_cq_ops->cq_init(cq, &info)) {
1164		i40iw_pr_err("init cq fail\n");
1165		err_code = -EPROTO;
1166		goto cq_free_resources;
1167	}
1168
1169	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
1170	if (!cqp_request) {
1171		err_code = -ENOMEM;
1172		goto cq_free_resources;
1173	}
1174
1175	cqp_info = &cqp_request->info;
1176	cqp_info->cqp_cmd = OP_CQ_CREATE;
1177	cqp_info->post_sq = 1;
1178	cqp_info->in.u.cq_create.cq = cq;
1179	cqp_info->in.u.cq_create.scratch = (uintptr_t)cqp_request;
1180	status = i40iw_handle_cqp_op(iwdev, cqp_request);
1181	if (status) {
1182		i40iw_pr_err("CQP-OP Create QP fail");
1183		err_code = -EPROTO;
1184		goto cq_free_resources;
1185	}
1186
1187	if (udata) {
1188		struct i40iw_create_cq_resp resp;
1189
1190		memset(&resp, 0, sizeof(resp));
1191		resp.cq_id = info.cq_uk_init_info.cq_id;
1192		resp.cq_size = info.cq_uk_init_info.cq_size;
1193		if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
1194			i40iw_pr_err("copy to user data\n");
1195			err_code = -EPROTO;
1196			goto cq_destroy;
1197		}
1198	}
1199
1200	i40iw_add_devusecount(iwdev);
1201	return 0;
1202
1203cq_destroy:
1204	i40iw_cq_wq_destroy(iwdev, cq);
1205cq_free_resources:
1206	cq_free_resources(iwdev, iwcq);
1207	return err_code;
1208}
1209
1210/**
1211 * i40iw_get_user_access - get hw access from IB access
1212 * @acc: IB access to return hw access
1213 */
1214static inline u16 i40iw_get_user_access(int acc)
1215{
1216	u16 access = 0;
1217
1218	access |= (acc & IB_ACCESS_LOCAL_WRITE) ? I40IW_ACCESS_FLAGS_LOCALWRITE : 0;
1219	access |= (acc & IB_ACCESS_REMOTE_WRITE) ? I40IW_ACCESS_FLAGS_REMOTEWRITE : 0;
1220	access |= (acc & IB_ACCESS_REMOTE_READ) ? I40IW_ACCESS_FLAGS_REMOTEREAD : 0;
1221	access |= (acc & IB_ACCESS_MW_BIND) ? I40IW_ACCESS_FLAGS_BIND_WINDOW : 0;
1222	return access;
1223}
1224
1225/**
1226 * i40iw_free_stag - free stag resource
1227 * @iwdev: iwarp device
1228 * @stag: stag to free
1229 */
1230static void i40iw_free_stag(struct i40iw_device *iwdev, u32 stag)
1231{
1232	u32 stag_idx;
1233
1234	stag_idx = (stag & iwdev->mr_stagmask) >> I40IW_CQPSQ_STAG_IDX_SHIFT;
1235	i40iw_free_resource(iwdev, iwdev->allocated_mrs, stag_idx);
1236	i40iw_rem_devusecount(iwdev);
1237}
1238
1239/**
1240 * i40iw_create_stag - create random stag
1241 * @iwdev: iwarp device
1242 */
1243static u32 i40iw_create_stag(struct i40iw_device *iwdev)
1244{
1245	u32 stag = 0;
1246	u32 stag_index = 0;
1247	u32 next_stag_index;
1248	u32 driver_key;
1249	u32 random;
1250	u8 consumer_key;
1251	int ret;
1252
1253	get_random_bytes(&random, sizeof(random));
1254	consumer_key = (u8)random;
1255
1256	driver_key = random & ~iwdev->mr_stagmask;
1257	next_stag_index = (random & iwdev->mr_stagmask) >> 8;
1258	next_stag_index %= iwdev->max_mr;
1259
1260	ret = i40iw_alloc_resource(iwdev,
1261				   iwdev->allocated_mrs, iwdev->max_mr,
1262				   &stag_index, &next_stag_index);
1263	if (!ret) {
1264		stag = stag_index << I40IW_CQPSQ_STAG_IDX_SHIFT;
1265		stag |= driver_key;
1266		stag += (u32)consumer_key;
1267		i40iw_add_devusecount(iwdev);
1268	}
1269	return stag;
1270}
1271
1272/**
1273 * i40iw_next_pbl_addr - Get next pbl address
1274 * @pbl: pointer to a pble
1275 * @pinfo: info pointer
1276 * @idx: index
1277 */
1278static inline u64 *i40iw_next_pbl_addr(u64 *pbl,
1279				       struct i40iw_pble_info **pinfo,
1280				       u32 *idx)
1281{
1282	*idx += 1;
1283	if ((!(*pinfo)) || (*idx != (*pinfo)->cnt))
1284		return ++pbl;
1285	*idx = 0;
1286	(*pinfo)++;
1287	return (u64 *)(*pinfo)->addr;
1288}
1289
1290/**
1291 * i40iw_copy_user_pgaddrs - copy user page address to pble's os locally
1292 * @iwmr: iwmr for IB's user page addresses
1293 * @pbl: ple pointer to save 1 level or 0 level pble
1294 * @level: indicated level 0, 1 or 2
1295 */
1296static void i40iw_copy_user_pgaddrs(struct i40iw_mr *iwmr,
1297				    u64 *pbl,
1298				    enum i40iw_pble_level level)
1299{
1300	struct ib_umem *region = iwmr->region;
1301	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
1302	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
1303	struct i40iw_pble_info *pinfo;
1304	struct ib_block_iter biter;
1305	u32 idx = 0;
1306
1307	pinfo = (level == I40IW_LEVEL_1) ? NULL : palloc->level2.leaf;
1308
1309	if (iwmr->type == IW_MEMREG_TYPE_QP)
1310		iwpbl->qp_mr.sq_page = sg_page(region->sg_head.sgl);
1311
1312	rdma_umem_for_each_dma_block(region, &biter, iwmr->page_size) {
1313		*pbl = rdma_block_iter_dma_address(&biter);
1314		pbl = i40iw_next_pbl_addr(pbl, &pinfo, &idx);
1315	}
1316}
1317
1318/**
1319 * i40iw_check_mem_contiguous - check if pbls stored in arr are contiguous
1320 * @arr: lvl1 pbl array
1321 * @npages: page count
1322 * pg_size: page size
1323 *
1324 */
1325static bool i40iw_check_mem_contiguous(u64 *arr, u32 npages, u32 pg_size)
1326{
1327	u32 pg_idx;
1328
1329	for (pg_idx = 0; pg_idx < npages; pg_idx++) {
1330		if ((*arr + (pg_size * pg_idx)) != arr[pg_idx])
1331			return false;
1332	}
1333	return true;
1334}
1335
1336/**
1337 * i40iw_check_mr_contiguous - check if MR is physically contiguous
1338 * @palloc: pbl allocation struct
1339 * pg_size: page size
1340 */
1341static bool i40iw_check_mr_contiguous(struct i40iw_pble_alloc *palloc, u32 pg_size)
1342{
1343	struct i40iw_pble_level2 *lvl2 = &palloc->level2;
1344	struct i40iw_pble_info *leaf = lvl2->leaf;
1345	u64 *arr = NULL;
1346	u64 *start_addr = NULL;
1347	int i;
1348	bool ret;
1349
1350	if (palloc->level == I40IW_LEVEL_1) {
1351		arr = (u64 *)palloc->level1.addr;
1352		ret = i40iw_check_mem_contiguous(arr, palloc->total_cnt, pg_size);
1353		return ret;
1354	}
1355
1356	start_addr = (u64 *)leaf->addr;
1357
1358	for (i = 0; i < lvl2->leaf_cnt; i++, leaf++) {
1359		arr = (u64 *)leaf->addr;
1360		if ((*start_addr + (i * pg_size * PBLE_PER_PAGE)) != *arr)
1361			return false;
1362		ret = i40iw_check_mem_contiguous(arr, leaf->cnt, pg_size);
1363		if (!ret)
1364			return false;
1365	}
1366
1367	return true;
1368}
1369
1370/**
1371 * i40iw_setup_pbles - copy user pg address to pble's
1372 * @iwdev: iwarp device
1373 * @iwmr: mr pointer for this memory registration
1374 * @use_pbles: flag if to use pble's
1375 */
1376static int i40iw_setup_pbles(struct i40iw_device *iwdev,
1377			     struct i40iw_mr *iwmr,
1378			     bool use_pbles)
1379{
1380	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
1381	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
1382	struct i40iw_pble_info *pinfo;
1383	u64 *pbl;
1384	enum i40iw_status_code status;
1385	enum i40iw_pble_level level = I40IW_LEVEL_1;
1386
1387	if (use_pbles) {
1388		mutex_lock(&iwdev->pbl_mutex);
1389		status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
1390		mutex_unlock(&iwdev->pbl_mutex);
1391		if (status)
1392			return -ENOMEM;
1393
1394		iwpbl->pbl_allocated = true;
1395		level = palloc->level;
1396		pinfo = (level == I40IW_LEVEL_1) ? &palloc->level1 : palloc->level2.leaf;
1397		pbl = (u64 *)pinfo->addr;
1398	} else {
1399		pbl = iwmr->pgaddrmem;
1400	}
1401
1402	i40iw_copy_user_pgaddrs(iwmr, pbl, level);
1403
1404	if (use_pbles)
1405		iwmr->pgaddrmem[0] = *pbl;
1406
1407	return 0;
1408}
1409
1410/**
1411 * i40iw_handle_q_mem - handle memory for qp and cq
1412 * @iwdev: iwarp device
1413 * @req: information for q memory management
1414 * @iwpbl: pble struct
1415 * @use_pbles: flag to use pble
1416 */
1417static int i40iw_handle_q_mem(struct i40iw_device *iwdev,
1418			      struct i40iw_mem_reg_req *req,
1419			      struct i40iw_pbl *iwpbl,
1420			      bool use_pbles)
1421{
1422	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
1423	struct i40iw_mr *iwmr = iwpbl->iwmr;
1424	struct i40iw_qp_mr *qpmr = &iwpbl->qp_mr;
1425	struct i40iw_cq_mr *cqmr = &iwpbl->cq_mr;
1426	struct i40iw_hmc_pble *hmc_p;
1427	u64 *arr = iwmr->pgaddrmem;
1428	u32 pg_size;
1429	int err;
1430	int total;
1431	bool ret = true;
1432
1433	total = req->sq_pages + req->rq_pages + req->cq_pages;
1434	pg_size = iwmr->page_size;
1435
1436	err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
1437	if (err)
1438		return err;
1439
1440	if (use_pbles && (palloc->level != I40IW_LEVEL_1)) {
1441		i40iw_free_pble(iwdev->pble_rsrc, palloc);
1442		iwpbl->pbl_allocated = false;
1443		return -ENOMEM;
1444	}
1445
1446	if (use_pbles)
1447		arr = (u64 *)palloc->level1.addr;
1448
1449	if (iwmr->type == IW_MEMREG_TYPE_QP) {
1450		hmc_p = &qpmr->sq_pbl;
1451		qpmr->shadow = (dma_addr_t)arr[total];
1452
1453		if (use_pbles) {
1454			ret = i40iw_check_mem_contiguous(arr, req->sq_pages, pg_size);
1455			if (ret)
1456				ret = i40iw_check_mem_contiguous(&arr[req->sq_pages], req->rq_pages, pg_size);
1457		}
1458
1459		if (!ret) {
1460			hmc_p->idx = palloc->level1.idx;
1461			hmc_p = &qpmr->rq_pbl;
1462			hmc_p->idx = palloc->level1.idx + req->sq_pages;
1463		} else {
1464			hmc_p->addr = arr[0];
1465			hmc_p = &qpmr->rq_pbl;
1466			hmc_p->addr = arr[req->sq_pages];
1467		}
1468	} else {		/* CQ */
1469		hmc_p = &cqmr->cq_pbl;
1470		cqmr->shadow = (dma_addr_t)arr[total];
1471
1472		if (use_pbles)
1473			ret = i40iw_check_mem_contiguous(arr, req->cq_pages, pg_size);
1474
1475		if (!ret)
1476			hmc_p->idx = palloc->level1.idx;
1477		else
1478			hmc_p->addr = arr[0];
1479	}
1480
1481	if (use_pbles && ret) {
1482		i40iw_free_pble(iwdev->pble_rsrc, palloc);
1483		iwpbl->pbl_allocated = false;
1484	}
1485
1486	return err;
1487}
1488
1489/**
1490 * i40iw_hw_alloc_stag - cqp command to allocate stag
1491 * @iwdev: iwarp device
1492 * @iwmr: iwarp mr pointer
1493 */
1494static int i40iw_hw_alloc_stag(struct i40iw_device *iwdev, struct i40iw_mr *iwmr)
1495{
1496	struct i40iw_allocate_stag_info *info;
1497	struct ib_pd *pd = iwmr->ibmr.pd;
1498	struct i40iw_pd *iwpd = to_iwpd(pd);
1499	enum i40iw_status_code status;
1500	int err = 0;
1501	struct i40iw_cqp_request *cqp_request;
1502	struct cqp_commands_info *cqp_info;
1503
1504	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
1505	if (!cqp_request)
1506		return -ENOMEM;
1507
1508	cqp_info = &cqp_request->info;
1509	info = &cqp_info->in.u.alloc_stag.info;
1510	memset(info, 0, sizeof(*info));
1511	info->page_size = PAGE_SIZE;
1512	info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
1513	info->pd_id = iwpd->sc_pd.pd_id;
1514	info->total_len = iwmr->length;
1515	info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
1516	info->remote_access = true;
1517	cqp_info->cqp_cmd = OP_ALLOC_STAG;
1518	cqp_info->post_sq = 1;
1519	cqp_info->in.u.alloc_stag.dev = &iwdev->sc_dev;
1520	cqp_info->in.u.alloc_stag.scratch = (uintptr_t)cqp_request;
1521
1522	status = i40iw_handle_cqp_op(iwdev, cqp_request);
1523	if (status) {
1524		err = -ENOMEM;
1525		i40iw_pr_err("CQP-OP MR Reg fail");
1526	}
1527	return err;
1528}
1529
1530/**
1531 * i40iw_alloc_mr - register stag for fast memory registration
1532 * @pd: ibpd pointer
1533 * @mr_type: memory for stag registrion
1534 * @max_num_sg: man number of pages
1535 */
1536static struct ib_mr *i40iw_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1537				    u32 max_num_sg)
1538{
1539	struct i40iw_pd *iwpd = to_iwpd(pd);
1540	struct i40iw_device *iwdev = to_iwdev(pd->device);
1541	struct i40iw_pble_alloc *palloc;
1542	struct i40iw_pbl *iwpbl;
1543	struct i40iw_mr *iwmr;
1544	enum i40iw_status_code status;
1545	u32 stag;
1546	int err_code = -ENOMEM;
1547
1548	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
1549	if (!iwmr)
1550		return ERR_PTR(-ENOMEM);
1551
1552	stag = i40iw_create_stag(iwdev);
1553	if (!stag) {
1554		err_code = -EOVERFLOW;
1555		goto err;
1556	}
1557	stag &= ~I40IW_CQPSQ_STAG_KEY_MASK;
1558	iwmr->stag = stag;
1559	iwmr->ibmr.rkey = stag;
1560	iwmr->ibmr.lkey = stag;
1561	iwmr->ibmr.pd = pd;
1562	iwmr->ibmr.device = pd->device;
1563	iwpbl = &iwmr->iwpbl;
1564	iwpbl->iwmr = iwmr;
1565	iwmr->type = IW_MEMREG_TYPE_MEM;
1566	palloc = &iwpbl->pble_alloc;
1567	iwmr->page_cnt = max_num_sg;
1568	/* Use system PAGE_SIZE as the sg page sizes are unknown at this point */
1569	iwmr->length = max_num_sg * PAGE_SIZE;
1570	mutex_lock(&iwdev->pbl_mutex);
1571	status = i40iw_get_pble(&iwdev->sc_dev, iwdev->pble_rsrc, palloc, iwmr->page_cnt);
1572	mutex_unlock(&iwdev->pbl_mutex);
1573	if (status)
1574		goto err1;
1575
1576	if (palloc->level != I40IW_LEVEL_1)
1577		goto err2;
1578	err_code = i40iw_hw_alloc_stag(iwdev, iwmr);
1579	if (err_code)
1580		goto err2;
1581	iwpbl->pbl_allocated = true;
1582	i40iw_add_pdusecount(iwpd);
1583	return &iwmr->ibmr;
1584err2:
1585	i40iw_free_pble(iwdev->pble_rsrc, palloc);
1586err1:
1587	i40iw_free_stag(iwdev, stag);
1588err:
1589	kfree(iwmr);
1590	return ERR_PTR(err_code);
1591}
1592
1593/**
1594 * i40iw_set_page - populate pbl list for fmr
1595 * @ibmr: ib mem to access iwarp mr pointer
1596 * @addr: page dma address fro pbl list
1597 */
1598static int i40iw_set_page(struct ib_mr *ibmr, u64 addr)
1599{
1600	struct i40iw_mr *iwmr = to_iwmr(ibmr);
1601	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
1602	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
1603	u64 *pbl;
1604
1605	if (unlikely(iwmr->npages == iwmr->page_cnt))
1606		return -ENOMEM;
1607
1608	pbl = (u64 *)palloc->level1.addr;
1609	pbl[iwmr->npages++] = cpu_to_le64(addr);
1610	return 0;
1611}
1612
1613/**
1614 * i40iw_map_mr_sg - map of sg list for fmr
1615 * @ibmr: ib mem to access iwarp mr pointer
1616 * @sg: scatter gather list for fmr
1617 * @sg_nents: number of sg pages
1618 */
1619static int i40iw_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg,
1620			   int sg_nents, unsigned int *sg_offset)
1621{
1622	struct i40iw_mr *iwmr = to_iwmr(ibmr);
1623
1624	iwmr->npages = 0;
1625	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, i40iw_set_page);
1626}
1627
1628/**
1629 * i40iw_drain_sq - drain the send queue
1630 * @ibqp: ib qp pointer
1631 */
1632static void i40iw_drain_sq(struct ib_qp *ibqp)
1633{
1634	struct i40iw_qp *iwqp = to_iwqp(ibqp);
1635	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
1636
1637	if (I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
1638		wait_for_completion(&iwqp->sq_drained);
1639}
1640
1641/**
1642 * i40iw_drain_rq - drain the receive queue
1643 * @ibqp: ib qp pointer
1644 */
1645static void i40iw_drain_rq(struct ib_qp *ibqp)
1646{
1647	struct i40iw_qp *iwqp = to_iwqp(ibqp);
1648	struct i40iw_sc_qp *qp = &iwqp->sc_qp;
1649
1650	if (I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
1651		wait_for_completion(&iwqp->rq_drained);
1652}
1653
1654/**
1655 * i40iw_hwreg_mr - send cqp command for memory registration
1656 * @iwdev: iwarp device
1657 * @iwmr: iwarp mr pointer
1658 * @access: access for MR
1659 */
1660static int i40iw_hwreg_mr(struct i40iw_device *iwdev,
1661			  struct i40iw_mr *iwmr,
1662			  u16 access)
1663{
1664	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
1665	struct i40iw_reg_ns_stag_info *stag_info;
1666	struct ib_pd *pd = iwmr->ibmr.pd;
1667	struct i40iw_pd *iwpd = to_iwpd(pd);
1668	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
1669	enum i40iw_status_code status;
1670	int err = 0;
1671	struct i40iw_cqp_request *cqp_request;
1672	struct cqp_commands_info *cqp_info;
1673
1674	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
1675	if (!cqp_request)
1676		return -ENOMEM;
1677
1678	cqp_info = &cqp_request->info;
1679	stag_info = &cqp_info->in.u.mr_reg_non_shared.info;
1680	memset(stag_info, 0, sizeof(*stag_info));
1681	stag_info->va = (void *)(unsigned long)iwpbl->user_base;
1682	stag_info->stag_idx = iwmr->stag >> I40IW_CQPSQ_STAG_IDX_SHIFT;
1683	stag_info->stag_key = (u8)iwmr->stag;
1684	stag_info->total_len = iwmr->length;
1685	stag_info->access_rights = access;
1686	stag_info->pd_id = iwpd->sc_pd.pd_id;
1687	stag_info->all_memory = pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY;
1688	stag_info->addr_type = I40IW_ADDR_TYPE_VA_BASED;
1689	stag_info->page_size = iwmr->page_size;
1690
1691	if (iwpbl->pbl_allocated) {
1692		if (palloc->level == I40IW_LEVEL_1) {
1693			stag_info->first_pm_pbl_index = palloc->level1.idx;
1694			stag_info->chunk_size = 1;
1695		} else {
1696			stag_info->first_pm_pbl_index = palloc->level2.root.idx;
1697			stag_info->chunk_size = 3;
1698		}
1699	} else {
1700		stag_info->reg_addr_pa = iwmr->pgaddrmem[0];
1701	}
1702
1703	cqp_info->cqp_cmd = OP_MR_REG_NON_SHARED;
1704	cqp_info->post_sq = 1;
1705	cqp_info->in.u.mr_reg_non_shared.dev = &iwdev->sc_dev;
1706	cqp_info->in.u.mr_reg_non_shared.scratch = (uintptr_t)cqp_request;
1707
1708	status = i40iw_handle_cqp_op(iwdev, cqp_request);
1709	if (status) {
1710		err = -ENOMEM;
1711		i40iw_pr_err("CQP-OP MR Reg fail");
1712	}
1713	return err;
1714}
1715
1716/**
1717 * i40iw_reg_user_mr - Register a user memory region
1718 * @pd: ptr of pd
1719 * @start: virtual start address
1720 * @length: length of mr
1721 * @virt: virtual address
1722 * @acc: access of mr
1723 * @udata: user data
1724 */
1725static struct ib_mr *i40iw_reg_user_mr(struct ib_pd *pd,
1726				       u64 start,
1727				       u64 length,
1728				       u64 virt,
1729				       int acc,
1730				       struct ib_udata *udata)
1731{
1732	struct i40iw_pd *iwpd = to_iwpd(pd);
1733	struct i40iw_device *iwdev = to_iwdev(pd->device);
1734	struct i40iw_ucontext *ucontext = rdma_udata_to_drv_context(
1735		udata, struct i40iw_ucontext, ibucontext);
1736	struct i40iw_pble_alloc *palloc;
1737	struct i40iw_pbl *iwpbl;
1738	struct i40iw_mr *iwmr;
1739	struct ib_umem *region;
1740	struct i40iw_mem_reg_req req;
1741	u32 stag = 0;
1742	u16 access;
1743	bool use_pbles = false;
1744	unsigned long flags;
1745	int err = -ENOSYS;
1746	int ret;
1747
1748	if (!udata)
1749		return ERR_PTR(-EOPNOTSUPP);
1750
1751	if (iwdev->closing)
1752		return ERR_PTR(-ENODEV);
1753
1754	if (length > I40IW_MAX_MR_SIZE)
1755		return ERR_PTR(-EINVAL);
1756	region = ib_umem_get(pd->device, start, length, acc);
1757	if (IS_ERR(region))
1758		return (struct ib_mr *)region;
1759
1760	if (ib_copy_from_udata(&req, udata, sizeof(req))) {
1761		ib_umem_release(region);
1762		return ERR_PTR(-EFAULT);
1763	}
1764
1765	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
1766	if (!iwmr) {
1767		ib_umem_release(region);
1768		return ERR_PTR(-ENOMEM);
1769	}
1770
1771	iwpbl = &iwmr->iwpbl;
1772	iwpbl->iwmr = iwmr;
1773	iwmr->region = region;
1774	iwmr->ibmr.pd = pd;
1775	iwmr->ibmr.device = pd->device;
1776
1777	iwmr->page_size = PAGE_SIZE;
1778	if (req.reg_type == IW_MEMREG_TYPE_MEM)
1779		iwmr->page_size = ib_umem_find_best_pgsz(region, SZ_4K | SZ_2M,
1780							 virt);
1781	iwmr->length = region->length;
1782
1783	iwpbl->user_base = virt;
1784	palloc = &iwpbl->pble_alloc;
1785
1786	iwmr->type = req.reg_type;
1787	iwmr->page_cnt = ib_umem_num_dma_blocks(region, iwmr->page_size);
1788
1789	switch (req.reg_type) {
1790	case IW_MEMREG_TYPE_QP:
1791		use_pbles = ((req.sq_pages + req.rq_pages) > 2);
1792		err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
1793		if (err)
1794			goto error;
1795		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
1796		list_add_tail(&iwpbl->list, &ucontext->qp_reg_mem_list);
1797		iwpbl->on_list = true;
1798		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
1799		break;
1800	case IW_MEMREG_TYPE_CQ:
1801		use_pbles = (req.cq_pages > 1);
1802		err = i40iw_handle_q_mem(iwdev, &req, iwpbl, use_pbles);
1803		if (err)
1804			goto error;
1805
1806		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
1807		list_add_tail(&iwpbl->list, &ucontext->cq_reg_mem_list);
1808		iwpbl->on_list = true;
1809		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
1810		break;
1811	case IW_MEMREG_TYPE_MEM:
1812		use_pbles = (iwmr->page_cnt != 1);
1813		access = I40IW_ACCESS_FLAGS_LOCALREAD;
1814
1815		err = i40iw_setup_pbles(iwdev, iwmr, use_pbles);
1816		if (err)
1817			goto error;
1818
1819		if (use_pbles) {
1820			ret = i40iw_check_mr_contiguous(palloc, iwmr->page_size);
1821			if (ret) {
1822				i40iw_free_pble(iwdev->pble_rsrc, palloc);
1823				iwpbl->pbl_allocated = false;
1824			}
1825		}
1826
1827		access |= i40iw_get_user_access(acc);
1828		stag = i40iw_create_stag(iwdev);
1829		if (!stag) {
1830			err = -ENOMEM;
1831			goto error;
1832		}
1833
1834		iwmr->stag = stag;
1835		iwmr->ibmr.rkey = stag;
1836		iwmr->ibmr.lkey = stag;
1837
1838		err = i40iw_hwreg_mr(iwdev, iwmr, access);
1839		if (err) {
1840			i40iw_free_stag(iwdev, stag);
1841			goto error;
1842		}
1843
1844		break;
1845	default:
1846		goto error;
1847	}
1848
1849	iwmr->type = req.reg_type;
1850	if (req.reg_type == IW_MEMREG_TYPE_MEM)
1851		i40iw_add_pdusecount(iwpd);
1852	return &iwmr->ibmr;
1853
1854error:
1855	if (palloc->level != I40IW_LEVEL_0 && iwpbl->pbl_allocated)
1856		i40iw_free_pble(iwdev->pble_rsrc, palloc);
1857	ib_umem_release(region);
1858	kfree(iwmr);
1859	return ERR_PTR(err);
1860}
1861
1862/**
1863 * i40iw_reg_phys_mr - register kernel physical memory
1864 * @pd: ibpd pointer
1865 * @addr: physical address of memory to register
1866 * @size: size of memory to register
1867 * @acc: Access rights
1868 * @iova_start: start of virtual address for physical buffers
1869 */
1870struct ib_mr *i40iw_reg_phys_mr(struct ib_pd *pd,
1871				u64 addr,
1872				u64 size,
1873				int acc,
1874				u64 *iova_start)
1875{
1876	struct i40iw_pd *iwpd = to_iwpd(pd);
1877	struct i40iw_device *iwdev = to_iwdev(pd->device);
1878	struct i40iw_pbl *iwpbl;
1879	struct i40iw_mr *iwmr;
1880	enum i40iw_status_code status;
1881	u32 stag;
1882	u16 access = I40IW_ACCESS_FLAGS_LOCALREAD;
1883	int ret;
1884
1885	iwmr = kzalloc(sizeof(*iwmr), GFP_KERNEL);
1886	if (!iwmr)
1887		return ERR_PTR(-ENOMEM);
1888	iwmr->ibmr.pd = pd;
1889	iwmr->ibmr.device = pd->device;
1890	iwpbl = &iwmr->iwpbl;
1891	iwpbl->iwmr = iwmr;
1892	iwmr->type = IW_MEMREG_TYPE_MEM;
1893	iwpbl->user_base = *iova_start;
1894	stag = i40iw_create_stag(iwdev);
1895	if (!stag) {
1896		ret = -EOVERFLOW;
1897		goto err;
1898	}
1899	access |= i40iw_get_user_access(acc);
1900	iwmr->stag = stag;
1901	iwmr->ibmr.rkey = stag;
1902	iwmr->ibmr.lkey = stag;
1903	iwmr->page_cnt = 1;
1904	iwmr->pgaddrmem[0]  = addr;
1905	iwmr->length = size;
1906	status = i40iw_hwreg_mr(iwdev, iwmr, access);
1907	if (status) {
1908		i40iw_free_stag(iwdev, stag);
1909		ret = -ENOMEM;
1910		goto err;
1911	}
1912
1913	i40iw_add_pdusecount(iwpd);
1914	return &iwmr->ibmr;
1915 err:
1916	kfree(iwmr);
1917	return ERR_PTR(ret);
1918}
1919
1920/**
1921 * i40iw_get_dma_mr - register physical mem
1922 * @pd: ptr of pd
1923 * @acc: access for memory
1924 */
1925static struct ib_mr *i40iw_get_dma_mr(struct ib_pd *pd, int acc)
1926{
1927	u64 kva = 0;
1928
1929	return i40iw_reg_phys_mr(pd, 0, 0, acc, &kva);
1930}
1931
1932/**
1933 * i40iw_del_mem_list - Deleting pbl list entries for CQ/QP
1934 * @iwmr: iwmr for IB's user page addresses
1935 * @ucontext: ptr to user context
1936 */
1937static void i40iw_del_memlist(struct i40iw_mr *iwmr,
1938			      struct i40iw_ucontext *ucontext)
1939{
1940	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
1941	unsigned long flags;
1942
1943	switch (iwmr->type) {
1944	case IW_MEMREG_TYPE_CQ:
1945		spin_lock_irqsave(&ucontext->cq_reg_mem_list_lock, flags);
1946		if (iwpbl->on_list) {
1947			iwpbl->on_list = false;
1948			list_del(&iwpbl->list);
1949		}
1950		spin_unlock_irqrestore(&ucontext->cq_reg_mem_list_lock, flags);
1951		break;
1952	case IW_MEMREG_TYPE_QP:
1953		spin_lock_irqsave(&ucontext->qp_reg_mem_list_lock, flags);
1954		if (iwpbl->on_list) {
1955			iwpbl->on_list = false;
1956			list_del(&iwpbl->list);
1957		}
1958		spin_unlock_irqrestore(&ucontext->qp_reg_mem_list_lock, flags);
1959		break;
1960	default:
1961		break;
1962	}
1963}
1964
1965/**
1966 * i40iw_dereg_mr - deregister mr
1967 * @ib_mr: mr ptr for dereg
1968 */
1969static int i40iw_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
1970{
1971	struct ib_pd *ibpd = ib_mr->pd;
1972	struct i40iw_pd *iwpd = to_iwpd(ibpd);
1973	struct i40iw_mr *iwmr = to_iwmr(ib_mr);
1974	struct i40iw_device *iwdev = to_iwdev(ib_mr->device);
1975	enum i40iw_status_code status;
1976	struct i40iw_dealloc_stag_info *info;
1977	struct i40iw_pbl *iwpbl = &iwmr->iwpbl;
1978	struct i40iw_pble_alloc *palloc = &iwpbl->pble_alloc;
1979	struct i40iw_cqp_request *cqp_request;
1980	struct cqp_commands_info *cqp_info;
1981	u32 stag_idx;
1982
1983	ib_umem_release(iwmr->region);
1984
1985	if (iwmr->type != IW_MEMREG_TYPE_MEM) {
1986		/* region is released. only test for userness. */
1987		if (iwmr->region) {
1988			struct i40iw_ucontext *ucontext =
1989				rdma_udata_to_drv_context(
1990					udata,
1991					struct i40iw_ucontext,
1992					ibucontext);
1993
1994			i40iw_del_memlist(iwmr, ucontext);
1995		}
1996		if (iwpbl->pbl_allocated && iwmr->type != IW_MEMREG_TYPE_QP)
1997			i40iw_free_pble(iwdev->pble_rsrc, palloc);
1998		kfree(iwmr);
1999		return 0;
2000	}
2001
2002	cqp_request = i40iw_get_cqp_request(&iwdev->cqp, true);
2003	if (!cqp_request)
2004		return -ENOMEM;
2005
2006	cqp_info = &cqp_request->info;
2007	info = &cqp_info->in.u.dealloc_stag.info;
2008	memset(info, 0, sizeof(*info));
2009
2010	info->pd_id = cpu_to_le32(iwpd->sc_pd.pd_id & 0x00007fff);
2011	info->stag_idx = RS_64_1(ib_mr->rkey, I40IW_CQPSQ_STAG_IDX_SHIFT);
2012	stag_idx = info->stag_idx;
2013	info->mr = true;
2014	if (iwpbl->pbl_allocated)
2015		info->dealloc_pbl = true;
2016
2017	cqp_info->cqp_cmd = OP_DEALLOC_STAG;
2018	cqp_info->post_sq = 1;
2019	cqp_info->in.u.dealloc_stag.dev = &iwdev->sc_dev;
2020	cqp_info->in.u.dealloc_stag.scratch = (uintptr_t)cqp_request;
2021	status = i40iw_handle_cqp_op(iwdev, cqp_request);
2022	if (status)
2023		i40iw_pr_err("CQP-OP dealloc failed for stag_idx = 0x%x\n", stag_idx);
2024	i40iw_rem_pdusecount(iwpd, iwdev);
2025	i40iw_free_stag(iwdev, iwmr->stag);
2026	if (iwpbl->pbl_allocated)
2027		i40iw_free_pble(iwdev->pble_rsrc, palloc);
2028	kfree(iwmr);
2029	return 0;
2030}
2031
2032/**
2033 * hw_rev_show
2034 */
2035static ssize_t hw_rev_show(struct device *dev,
2036			   struct device_attribute *attr, char *buf)
2037{
2038	struct i40iw_ib_device *iwibdev =
2039		rdma_device_to_drv_device(dev, struct i40iw_ib_device, ibdev);
2040	u32 hw_rev = iwibdev->iwdev->sc_dev.hw_rev;
2041
2042	return sprintf(buf, "%x\n", hw_rev);
2043}
2044static DEVICE_ATTR_RO(hw_rev);
2045
2046/**
2047 * hca_type_show
2048 */
2049static ssize_t hca_type_show(struct device *dev,
2050			     struct device_attribute *attr, char *buf)
2051{
2052	return sprintf(buf, "I40IW\n");
2053}
2054static DEVICE_ATTR_RO(hca_type);
2055
2056/**
2057 * board_id_show
2058 */
2059static ssize_t board_id_show(struct device *dev,
2060			     struct device_attribute *attr, char *buf)
2061{
2062	return sprintf(buf, "%.*s\n", 32, "I40IW Board ID");
2063}
2064static DEVICE_ATTR_RO(board_id);
2065
2066static struct attribute *i40iw_dev_attributes[] = {
2067	&dev_attr_hw_rev.attr,
2068	&dev_attr_hca_type.attr,
2069	&dev_attr_board_id.attr,
2070	NULL
2071};
2072
2073static const struct attribute_group i40iw_attr_group = {
2074	.attrs = i40iw_dev_attributes,
2075};
2076
2077/**
2078 * i40iw_copy_sg_list - copy sg list for qp
2079 * @sg_list: copied into sg_list
2080 * @sgl: copy from sgl
2081 * @num_sges: count of sg entries
2082 */
2083static void i40iw_copy_sg_list(struct i40iw_sge *sg_list, struct ib_sge *sgl, int num_sges)
2084{
2085	unsigned int i;
2086
2087	for (i = 0; (i < num_sges) && (i < I40IW_MAX_WQ_FRAGMENT_COUNT); i++) {
2088		sg_list[i].tag_off = sgl[i].addr;
2089		sg_list[i].len = sgl[i].length;
2090		sg_list[i].stag = sgl[i].lkey;
2091	}
2092}
2093
2094/**
2095 * i40iw_post_send -  kernel application wr
2096 * @ibqp: qp ptr for wr
2097 * @ib_wr: work request ptr
2098 * @bad_wr: return of bad wr if err
2099 */
2100static int i40iw_post_send(struct ib_qp *ibqp,
2101			   const struct ib_send_wr *ib_wr,
2102			   const struct ib_send_wr **bad_wr)
2103{
2104	struct i40iw_qp *iwqp;
2105	struct i40iw_qp_uk *ukqp;
2106	struct i40iw_post_sq_info info;
2107	enum i40iw_status_code ret;
2108	int err = 0;
2109	unsigned long flags;
2110	bool inv_stag;
2111
2112	iwqp = (struct i40iw_qp *)ibqp;
2113	ukqp = &iwqp->sc_qp.qp_uk;
2114
2115	spin_lock_irqsave(&iwqp->lock, flags);
2116
2117	if (iwqp->flush_issued) {
2118		err = -EINVAL;
2119		goto out;
2120	}
2121
2122	while (ib_wr) {
2123		inv_stag = false;
2124		memset(&info, 0, sizeof(info));
2125		info.wr_id = (u64)(ib_wr->wr_id);
2126		if ((ib_wr->send_flags & IB_SEND_SIGNALED) || iwqp->sig_all)
2127			info.signaled = true;
2128		if (ib_wr->send_flags & IB_SEND_FENCE)
2129			info.read_fence = true;
2130
2131		switch (ib_wr->opcode) {
2132		case IB_WR_SEND:
2133		case IB_WR_SEND_WITH_INV:
2134			if (ib_wr->opcode == IB_WR_SEND) {
2135				if (ib_wr->send_flags & IB_SEND_SOLICITED)
2136					info.op_type = I40IW_OP_TYPE_SEND_SOL;
2137				else
2138					info.op_type = I40IW_OP_TYPE_SEND;
2139			} else {
2140				if (ib_wr->send_flags & IB_SEND_SOLICITED)
2141					info.op_type = I40IW_OP_TYPE_SEND_SOL_INV;
2142				else
2143					info.op_type = I40IW_OP_TYPE_SEND_INV;
2144			}
2145
2146			if (ib_wr->send_flags & IB_SEND_INLINE) {
2147				info.op.inline_send.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
2148				info.op.inline_send.len = ib_wr->sg_list[0].length;
2149				ret = ukqp->ops.iw_inline_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
2150			} else {
2151				info.op.send.num_sges = ib_wr->num_sge;
2152				info.op.send.sg_list = (struct i40iw_sge *)ib_wr->sg_list;
2153				ret = ukqp->ops.iw_send(ukqp, &info, ib_wr->ex.invalidate_rkey, false);
2154			}
2155
2156			if (ret) {
2157				if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2158					err = -ENOMEM;
2159				else
2160					err = -EINVAL;
2161			}
2162			break;
2163		case IB_WR_RDMA_WRITE:
2164			info.op_type = I40IW_OP_TYPE_RDMA_WRITE;
2165
2166			if (ib_wr->send_flags & IB_SEND_INLINE) {
2167				info.op.inline_rdma_write.data = (void *)(unsigned long)ib_wr->sg_list[0].addr;
2168				info.op.inline_rdma_write.len = ib_wr->sg_list[0].length;
2169				info.op.inline_rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
2170				info.op.inline_rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
2171				ret = ukqp->ops.iw_inline_rdma_write(ukqp, &info, false);
2172			} else {
2173				info.op.rdma_write.lo_sg_list = (void *)ib_wr->sg_list;
2174				info.op.rdma_write.num_lo_sges = ib_wr->num_sge;
2175				info.op.rdma_write.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
2176				info.op.rdma_write.rem_addr.stag = rdma_wr(ib_wr)->rkey;
2177				ret = ukqp->ops.iw_rdma_write(ukqp, &info, false);
2178			}
2179
2180			if (ret) {
2181				if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2182					err = -ENOMEM;
2183				else
2184					err = -EINVAL;
2185			}
2186			break;
2187		case IB_WR_RDMA_READ_WITH_INV:
2188			inv_stag = true;
2189			fallthrough;
2190		case IB_WR_RDMA_READ:
2191			if (ib_wr->num_sge > I40IW_MAX_SGE_RD) {
2192				err = -EINVAL;
2193				break;
2194			}
2195			info.op_type = I40IW_OP_TYPE_RDMA_READ;
2196			info.op.rdma_read.rem_addr.tag_off = rdma_wr(ib_wr)->remote_addr;
2197			info.op.rdma_read.rem_addr.stag = rdma_wr(ib_wr)->rkey;
2198			info.op.rdma_read.lo_addr.tag_off = ib_wr->sg_list->addr;
2199			info.op.rdma_read.lo_addr.stag = ib_wr->sg_list->lkey;
2200			info.op.rdma_read.lo_addr.len = ib_wr->sg_list->length;
2201			ret = ukqp->ops.iw_rdma_read(ukqp, &info, inv_stag, false);
2202			if (ret) {
2203				if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2204					err = -ENOMEM;
2205				else
2206					err = -EINVAL;
2207			}
2208			break;
2209		case IB_WR_LOCAL_INV:
2210			info.op_type = I40IW_OP_TYPE_INV_STAG;
2211			info.op.inv_local_stag.target_stag = ib_wr->ex.invalidate_rkey;
2212			ret = ukqp->ops.iw_stag_local_invalidate(ukqp, &info, true);
2213			if (ret)
2214				err = -ENOMEM;
2215			break;
2216		case IB_WR_REG_MR:
2217		{
2218			struct i40iw_mr *iwmr = to_iwmr(reg_wr(ib_wr)->mr);
2219			int flags = reg_wr(ib_wr)->access;
2220			struct i40iw_pble_alloc *palloc = &iwmr->iwpbl.pble_alloc;
2221			struct i40iw_sc_dev *dev = &iwqp->iwdev->sc_dev;
2222			struct i40iw_fast_reg_stag_info info;
2223
2224			memset(&info, 0, sizeof(info));
2225			info.access_rights = I40IW_ACCESS_FLAGS_LOCALREAD;
2226			info.access_rights |= i40iw_get_user_access(flags);
2227			info.stag_key = reg_wr(ib_wr)->key & 0xff;
2228			info.stag_idx = reg_wr(ib_wr)->key >> 8;
2229			info.page_size = reg_wr(ib_wr)->mr->page_size;
2230			info.wr_id = ib_wr->wr_id;
2231
2232			info.addr_type = I40IW_ADDR_TYPE_VA_BASED;
2233			info.va = (void *)(uintptr_t)iwmr->ibmr.iova;
2234			info.total_len = iwmr->ibmr.length;
2235			info.reg_addr_pa = *(u64 *)palloc->level1.addr;
2236			info.first_pm_pbl_index = palloc->level1.idx;
2237			info.local_fence = ib_wr->send_flags & IB_SEND_FENCE;
2238			info.signaled = ib_wr->send_flags & IB_SEND_SIGNALED;
2239
2240			if (iwmr->npages > I40IW_MIN_PAGES_PER_FMR)
2241				info.chunk_size = 1;
2242
2243			ret = dev->iw_priv_qp_ops->iw_mr_fast_register(&iwqp->sc_qp, &info, true);
2244			if (ret)
2245				err = -ENOMEM;
2246			break;
2247		}
2248		default:
2249			err = -EINVAL;
2250			i40iw_pr_err(" upost_send bad opcode = 0x%x\n",
2251				     ib_wr->opcode);
2252			break;
2253		}
2254
2255		if (err)
2256			break;
2257		ib_wr = ib_wr->next;
2258	}
2259
2260out:
2261	if (err)
2262		*bad_wr = ib_wr;
2263	else
2264		ukqp->ops.iw_qp_post_wr(ukqp);
2265	spin_unlock_irqrestore(&iwqp->lock, flags);
2266
2267	return err;
2268}
2269
2270/**
2271 * i40iw_post_recv - post receive wr for kernel application
2272 * @ibqp: ib qp pointer
2273 * @ib_wr: work request for receive
2274 * @bad_wr: bad wr caused an error
2275 */
2276static int i40iw_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *ib_wr,
2277			   const struct ib_recv_wr **bad_wr)
2278{
2279	struct i40iw_qp *iwqp;
2280	struct i40iw_qp_uk *ukqp;
2281	struct i40iw_post_rq_info post_recv;
2282	struct i40iw_sge sg_list[I40IW_MAX_WQ_FRAGMENT_COUNT];
2283	enum i40iw_status_code ret = 0;
2284	unsigned long flags;
2285	int err = 0;
2286
2287	iwqp = (struct i40iw_qp *)ibqp;
2288	ukqp = &iwqp->sc_qp.qp_uk;
2289
2290	memset(&post_recv, 0, sizeof(post_recv));
2291	spin_lock_irqsave(&iwqp->lock, flags);
2292
2293	if (iwqp->flush_issued) {
2294		err = -EINVAL;
2295		goto out;
2296	}
2297
2298	while (ib_wr) {
2299		post_recv.num_sges = ib_wr->num_sge;
2300		post_recv.wr_id = ib_wr->wr_id;
2301		i40iw_copy_sg_list(sg_list, ib_wr->sg_list, ib_wr->num_sge);
2302		post_recv.sg_list = sg_list;
2303		ret = ukqp->ops.iw_post_receive(ukqp, &post_recv);
2304		if (ret) {
2305			i40iw_pr_err(" post_recv err %d\n", ret);
2306			if (ret == I40IW_ERR_QP_TOOMANY_WRS_POSTED)
2307				err = -ENOMEM;
2308			else
2309				err = -EINVAL;
2310			*bad_wr = ib_wr;
2311			goto out;
2312		}
2313		ib_wr = ib_wr->next;
2314	}
2315 out:
2316	spin_unlock_irqrestore(&iwqp->lock, flags);
2317	return err;
2318}
2319
2320/**
2321 * i40iw_poll_cq - poll cq for completion (kernel apps)
2322 * @ibcq: cq to poll
2323 * @num_entries: number of entries to poll
2324 * @entry: wr of entry completed
2325 */
2326static int i40iw_poll_cq(struct ib_cq *ibcq,
2327			 int num_entries,
2328			 struct ib_wc *entry)
2329{
2330	struct i40iw_cq *iwcq;
2331	int cqe_count = 0;
2332	struct i40iw_cq_poll_info cq_poll_info;
2333	enum i40iw_status_code ret;
2334	struct i40iw_cq_uk *ukcq;
2335	struct i40iw_sc_qp *qp;
2336	struct i40iw_qp *iwqp;
2337	unsigned long flags;
2338
2339	iwcq = (struct i40iw_cq *)ibcq;
2340	ukcq = &iwcq->sc_cq.cq_uk;
2341
2342	spin_lock_irqsave(&iwcq->lock, flags);
2343	while (cqe_count < num_entries) {
2344		ret = ukcq->ops.iw_cq_poll_completion(ukcq, &cq_poll_info);
2345		if (ret == I40IW_ERR_QUEUE_EMPTY) {
2346			break;
2347		} else if (ret == I40IW_ERR_QUEUE_DESTROYED) {
2348			continue;
2349		} else if (ret) {
2350			if (!cqe_count)
2351				cqe_count = -1;
2352			break;
2353		}
2354		entry->wc_flags = 0;
2355		entry->wr_id = cq_poll_info.wr_id;
2356		if (cq_poll_info.error) {
2357			entry->status = IB_WC_WR_FLUSH_ERR;
2358			entry->vendor_err = cq_poll_info.major_err << 16 | cq_poll_info.minor_err;
2359		} else {
2360			entry->status = IB_WC_SUCCESS;
2361		}
2362
2363		switch (cq_poll_info.op_type) {
2364		case I40IW_OP_TYPE_RDMA_WRITE:
2365			entry->opcode = IB_WC_RDMA_WRITE;
2366			break;
2367		case I40IW_OP_TYPE_RDMA_READ_INV_STAG:
2368		case I40IW_OP_TYPE_RDMA_READ:
2369			entry->opcode = IB_WC_RDMA_READ;
2370			break;
2371		case I40IW_OP_TYPE_SEND_SOL:
2372		case I40IW_OP_TYPE_SEND_SOL_INV:
2373		case I40IW_OP_TYPE_SEND_INV:
2374		case I40IW_OP_TYPE_SEND:
2375			entry->opcode = IB_WC_SEND;
2376			break;
2377		case I40IW_OP_TYPE_REC:
2378			entry->opcode = IB_WC_RECV;
2379			break;
2380		default:
2381			entry->opcode = IB_WC_RECV;
2382			break;
2383		}
2384
2385		entry->ex.imm_data = 0;
2386		qp = (struct i40iw_sc_qp *)cq_poll_info.qp_handle;
2387		entry->qp = (struct ib_qp *)qp->back_qp;
2388		entry->src_qp = cq_poll_info.qp_id;
2389		iwqp = (struct i40iw_qp *)qp->back_qp;
2390		if (iwqp->iwarp_state > I40IW_QP_STATE_RTS) {
2391			if (!I40IW_RING_MORE_WORK(qp->qp_uk.sq_ring))
2392				complete(&iwqp->sq_drained);
2393			if (!I40IW_RING_MORE_WORK(qp->qp_uk.rq_ring))
2394				complete(&iwqp->rq_drained);
2395		}
2396		entry->byte_len = cq_poll_info.bytes_xfered;
2397		entry++;
2398		cqe_count++;
2399	}
2400	spin_unlock_irqrestore(&iwcq->lock, flags);
2401	return cqe_count;
2402}
2403
2404/**
2405 * i40iw_req_notify_cq - arm cq kernel application
2406 * @ibcq: cq to arm
2407 * @notify_flags: notofication flags
2408 */
2409static int i40iw_req_notify_cq(struct ib_cq *ibcq,
2410			       enum ib_cq_notify_flags notify_flags)
2411{
2412	struct i40iw_cq *iwcq;
2413	struct i40iw_cq_uk *ukcq;
2414	unsigned long flags;
2415	enum i40iw_completion_notify cq_notify = IW_CQ_COMPL_EVENT;
2416
2417	iwcq = (struct i40iw_cq *)ibcq;
2418	ukcq = &iwcq->sc_cq.cq_uk;
2419	if (notify_flags == IB_CQ_SOLICITED)
2420		cq_notify = IW_CQ_COMPL_SOLICITED;
2421	spin_lock_irqsave(&iwcq->lock, flags);
2422	ukcq->ops.iw_cq_request_notification(ukcq, cq_notify);
2423	spin_unlock_irqrestore(&iwcq->lock, flags);
2424	return 0;
2425}
2426
2427/**
2428 * i40iw_port_immutable - return port's immutable data
2429 * @ibdev: ib dev struct
2430 * @port_num: port number
2431 * @immutable: immutable data for the port return
2432 */
2433static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
2434				struct ib_port_immutable *immutable)
2435{
2436	struct ib_port_attr attr;
2437	int err;
2438
2439	immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
2440
2441	err = ib_query_port(ibdev, port_num, &attr);
2442
2443	if (err)
2444		return err;
2445
2446	immutable->gid_tbl_len = attr.gid_tbl_len;
2447
2448	return 0;
2449}
2450
2451static const char * const i40iw_hw_stat_names[] = {
2452	// 32bit names
2453	[I40IW_HW_STAT_INDEX_IP4RXDISCARD] = "ip4InDiscards",
2454	[I40IW_HW_STAT_INDEX_IP4RXTRUNC] = "ip4InTruncatedPkts",
2455	[I40IW_HW_STAT_INDEX_IP4TXNOROUTE] = "ip4OutNoRoutes",
2456	[I40IW_HW_STAT_INDEX_IP6RXDISCARD] = "ip6InDiscards",
2457	[I40IW_HW_STAT_INDEX_IP6RXTRUNC] = "ip6InTruncatedPkts",
2458	[I40IW_HW_STAT_INDEX_IP6TXNOROUTE] = "ip6OutNoRoutes",
2459	[I40IW_HW_STAT_INDEX_TCPRTXSEG] = "tcpRetransSegs",
2460	[I40IW_HW_STAT_INDEX_TCPRXOPTERR] = "tcpInOptErrors",
2461	[I40IW_HW_STAT_INDEX_TCPRXPROTOERR] = "tcpInProtoErrors",
2462	// 64bit names
2463	[I40IW_HW_STAT_INDEX_IP4RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2464		"ip4InOctets",
2465	[I40IW_HW_STAT_INDEX_IP4RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2466		"ip4InPkts",
2467	[I40IW_HW_STAT_INDEX_IP4RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2468		"ip4InReasmRqd",
2469	[I40IW_HW_STAT_INDEX_IP4RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2470		"ip4InMcastPkts",
2471	[I40IW_HW_STAT_INDEX_IP4TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2472		"ip4OutOctets",
2473	[I40IW_HW_STAT_INDEX_IP4TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2474		"ip4OutPkts",
2475	[I40IW_HW_STAT_INDEX_IP4TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2476		"ip4OutSegRqd",
2477	[I40IW_HW_STAT_INDEX_IP4TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2478		"ip4OutMcastPkts",
2479	[I40IW_HW_STAT_INDEX_IP6RXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2480		"ip6InOctets",
2481	[I40IW_HW_STAT_INDEX_IP6RXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2482		"ip6InPkts",
2483	[I40IW_HW_STAT_INDEX_IP6RXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2484		"ip6InReasmRqd",
2485	[I40IW_HW_STAT_INDEX_IP6RXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2486		"ip6InMcastPkts",
2487	[I40IW_HW_STAT_INDEX_IP6TXOCTS + I40IW_HW_STAT_INDEX_MAX_32] =
2488		"ip6OutOctets",
2489	[I40IW_HW_STAT_INDEX_IP6TXPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2490		"ip6OutPkts",
2491	[I40IW_HW_STAT_INDEX_IP6TXFRAGS + I40IW_HW_STAT_INDEX_MAX_32] =
2492		"ip6OutSegRqd",
2493	[I40IW_HW_STAT_INDEX_IP6TXMCPKTS + I40IW_HW_STAT_INDEX_MAX_32] =
2494		"ip6OutMcastPkts",
2495	[I40IW_HW_STAT_INDEX_TCPRXSEGS + I40IW_HW_STAT_INDEX_MAX_32] =
2496		"tcpInSegs",
2497	[I40IW_HW_STAT_INDEX_TCPTXSEG + I40IW_HW_STAT_INDEX_MAX_32] =
2498		"tcpOutSegs",
2499	[I40IW_HW_STAT_INDEX_RDMARXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
2500		"iwInRdmaReads",
2501	[I40IW_HW_STAT_INDEX_RDMARXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
2502		"iwInRdmaSends",
2503	[I40IW_HW_STAT_INDEX_RDMARXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
2504		"iwInRdmaWrites",
2505	[I40IW_HW_STAT_INDEX_RDMATXRDS + I40IW_HW_STAT_INDEX_MAX_32] =
2506		"iwOutRdmaReads",
2507	[I40IW_HW_STAT_INDEX_RDMATXSNDS + I40IW_HW_STAT_INDEX_MAX_32] =
2508		"iwOutRdmaSends",
2509	[I40IW_HW_STAT_INDEX_RDMATXWRS + I40IW_HW_STAT_INDEX_MAX_32] =
2510		"iwOutRdmaWrites",
2511	[I40IW_HW_STAT_INDEX_RDMAVBND + I40IW_HW_STAT_INDEX_MAX_32] =
2512		"iwRdmaBnd",
2513	[I40IW_HW_STAT_INDEX_RDMAVINV + I40IW_HW_STAT_INDEX_MAX_32] =
2514		"iwRdmaInv"
2515};
2516
2517static void i40iw_get_dev_fw_str(struct ib_device *dev, char *str)
2518{
2519	struct i40iw_device *iwdev = to_iwdev(dev);
2520
2521	snprintf(str, IB_FW_VERSION_NAME_MAX, "%llu.%llu",
2522		 i40iw_fw_major_ver(&iwdev->sc_dev),
2523		 i40iw_fw_minor_ver(&iwdev->sc_dev));
2524}
2525
2526/**
2527 * i40iw_alloc_hw_stats - Allocate a hw stats structure
2528 * @ibdev: device pointer from stack
2529 * @port_num: port number
2530 */
2531static struct rdma_hw_stats *i40iw_alloc_hw_stats(struct ib_device *ibdev,
2532						  u8 port_num)
2533{
2534	struct i40iw_device *iwdev = to_iwdev(ibdev);
2535	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
2536	int num_counters = I40IW_HW_STAT_INDEX_MAX_32 +
2537		I40IW_HW_STAT_INDEX_MAX_64;
2538	unsigned long lifespan = RDMA_HW_STATS_DEFAULT_LIFESPAN;
2539
2540	BUILD_BUG_ON(ARRAY_SIZE(i40iw_hw_stat_names) !=
2541		     (I40IW_HW_STAT_INDEX_MAX_32 +
2542		      I40IW_HW_STAT_INDEX_MAX_64));
2543
2544	/*
2545	 * PFs get the default update lifespan, but VFs only update once
2546	 * per second
2547	 */
2548	if (!dev->is_pf)
2549		lifespan = 1000;
2550	return rdma_alloc_hw_stats_struct(i40iw_hw_stat_names, num_counters,
2551					  lifespan);
2552}
2553
2554/**
2555 * i40iw_get_hw_stats - Populates the rdma_hw_stats structure
2556 * @ibdev: device pointer from stack
2557 * @stats: stats pointer from stack
2558 * @port_num: port number
2559 * @index: which hw counter the stack is requesting we update
2560 */
2561static int i40iw_get_hw_stats(struct ib_device *ibdev,
2562			      struct rdma_hw_stats *stats,
2563			      u8 port_num, int index)
2564{
2565	struct i40iw_device *iwdev = to_iwdev(ibdev);
2566	struct i40iw_sc_dev *dev = &iwdev->sc_dev;
2567	struct i40iw_vsi_pestat *devstat = iwdev->vsi.pestat;
2568	struct i40iw_dev_hw_stats *hw_stats = &devstat->hw_stats;
2569
2570	if (dev->is_pf) {
2571		i40iw_hw_stats_read_all(devstat, &devstat->hw_stats);
2572	} else {
2573		if (i40iw_vchnl_vf_get_pe_stats(dev, &devstat->hw_stats))
2574			return -ENOSYS;
2575	}
2576
2577	memcpy(&stats->value[0], hw_stats, sizeof(*hw_stats));
2578
2579	return stats->num_counters;
2580}
2581
2582/**
2583 * i40iw_query_gid - Query port GID
2584 * @ibdev: device pointer from stack
2585 * @port: port number
2586 * @index: Entry index
2587 * @gid: Global ID
2588 */
2589static int i40iw_query_gid(struct ib_device *ibdev,
2590			   u8 port,
2591			   int index,
2592			   union ib_gid *gid)
2593{
2594	struct i40iw_device *iwdev = to_iwdev(ibdev);
2595
2596	memset(gid->raw, 0, sizeof(gid->raw));
2597	ether_addr_copy(gid->raw, iwdev->netdev->dev_addr);
2598	return 0;
2599}
2600
2601static const struct ib_device_ops i40iw_dev_ops = {
2602	.owner = THIS_MODULE,
2603	.driver_id = RDMA_DRIVER_I40IW,
2604	/* NOTE: Older kernels wrongly use 0 for the uverbs_abi_ver */
2605	.uverbs_abi_ver = I40IW_ABI_VER,
2606
2607	.alloc_hw_stats = i40iw_alloc_hw_stats,
2608	.alloc_mr = i40iw_alloc_mr,
2609	.alloc_pd = i40iw_alloc_pd,
2610	.alloc_ucontext = i40iw_alloc_ucontext,
2611	.create_cq = i40iw_create_cq,
2612	.create_qp = i40iw_create_qp,
2613	.dealloc_pd = i40iw_dealloc_pd,
2614	.dealloc_ucontext = i40iw_dealloc_ucontext,
2615	.dereg_mr = i40iw_dereg_mr,
2616	.destroy_cq = i40iw_destroy_cq,
2617	.destroy_qp = i40iw_destroy_qp,
2618	.drain_rq = i40iw_drain_rq,
2619	.drain_sq = i40iw_drain_sq,
2620	.get_dev_fw_str = i40iw_get_dev_fw_str,
2621	.get_dma_mr = i40iw_get_dma_mr,
2622	.get_hw_stats = i40iw_get_hw_stats,
2623	.get_port_immutable = i40iw_port_immutable,
2624	.iw_accept = i40iw_accept,
2625	.iw_add_ref = i40iw_qp_add_ref,
2626	.iw_connect = i40iw_connect,
2627	.iw_create_listen = i40iw_create_listen,
2628	.iw_destroy_listen = i40iw_destroy_listen,
2629	.iw_get_qp = i40iw_get_qp,
2630	.iw_reject = i40iw_reject,
2631	.iw_rem_ref = i40iw_qp_rem_ref,
2632	.map_mr_sg = i40iw_map_mr_sg,
2633	.mmap = i40iw_mmap,
2634	.modify_qp = i40iw_modify_qp,
2635	.poll_cq = i40iw_poll_cq,
2636	.post_recv = i40iw_post_recv,
2637	.post_send = i40iw_post_send,
2638	.query_device = i40iw_query_device,
2639	.query_gid = i40iw_query_gid,
2640	.query_port = i40iw_query_port,
2641	.query_qp = i40iw_query_qp,
2642	.reg_user_mr = i40iw_reg_user_mr,
2643	.req_notify_cq = i40iw_req_notify_cq,
2644	INIT_RDMA_OBJ_SIZE(ib_pd, i40iw_pd, ibpd),
2645	INIT_RDMA_OBJ_SIZE(ib_cq, i40iw_cq, ibcq),
2646	INIT_RDMA_OBJ_SIZE(ib_ucontext, i40iw_ucontext, ibucontext),
2647};
2648
2649/**
2650 * i40iw_init_rdma_device - initialization of iwarp device
2651 * @iwdev: iwarp device
2652 */
2653static struct i40iw_ib_device *i40iw_init_rdma_device(struct i40iw_device *iwdev)
2654{
2655	struct i40iw_ib_device *iwibdev;
2656	struct net_device *netdev = iwdev->netdev;
2657	struct pci_dev *pcidev = iwdev->hw.pcidev;
2658
2659	iwibdev = ib_alloc_device(i40iw_ib_device, ibdev);
2660	if (!iwibdev) {
2661		i40iw_pr_err("iwdev == NULL\n");
2662		return NULL;
2663	}
2664	iwdev->iwibdev = iwibdev;
2665	iwibdev->iwdev = iwdev;
2666
2667	iwibdev->ibdev.node_type = RDMA_NODE_RNIC;
2668	ether_addr_copy((u8 *)&iwibdev->ibdev.node_guid, netdev->dev_addr);
2669
2670	iwibdev->ibdev.uverbs_cmd_mask =
2671	    (1ull << IB_USER_VERBS_CMD_GET_CONTEXT) |
2672	    (1ull << IB_USER_VERBS_CMD_QUERY_DEVICE) |
2673	    (1ull << IB_USER_VERBS_CMD_QUERY_PORT) |
2674	    (1ull << IB_USER_VERBS_CMD_ALLOC_PD) |
2675	    (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) |
2676	    (1ull << IB_USER_VERBS_CMD_REG_MR) |
2677	    (1ull << IB_USER_VERBS_CMD_DEREG_MR) |
2678	    (1ull << IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) |
2679	    (1ull << IB_USER_VERBS_CMD_CREATE_CQ) |
2680	    (1ull << IB_USER_VERBS_CMD_DESTROY_CQ) |
2681	    (1ull << IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) |
2682	    (1ull << IB_USER_VERBS_CMD_CREATE_QP) |
2683	    (1ull << IB_USER_VERBS_CMD_MODIFY_QP) |
2684	    (1ull << IB_USER_VERBS_CMD_QUERY_QP) |
2685	    (1ull << IB_USER_VERBS_CMD_POLL_CQ) |
2686	    (1ull << IB_USER_VERBS_CMD_CREATE_AH) |
2687	    (1ull << IB_USER_VERBS_CMD_DESTROY_AH) |
2688	    (1ull << IB_USER_VERBS_CMD_DESTROY_QP) |
2689	    (1ull << IB_USER_VERBS_CMD_POST_RECV) |
2690	    (1ull << IB_USER_VERBS_CMD_POST_SEND);
2691	iwibdev->ibdev.phys_port_cnt = 1;
2692	iwibdev->ibdev.num_comp_vectors = iwdev->ceqs_count;
2693	iwibdev->ibdev.dev.parent = &pcidev->dev;
2694	memcpy(iwibdev->ibdev.iw_ifname, netdev->name,
2695	       sizeof(iwibdev->ibdev.iw_ifname));
2696	ib_set_device_ops(&iwibdev->ibdev, &i40iw_dev_ops);
2697
2698	return iwibdev;
2699}
2700
2701/**
2702 * i40iw_port_ibevent - indicate port event
2703 * @iwdev: iwarp device
2704 */
2705void i40iw_port_ibevent(struct i40iw_device *iwdev)
2706{
2707	struct i40iw_ib_device *iwibdev = iwdev->iwibdev;
2708	struct ib_event event;
2709
2710	event.device = &iwibdev->ibdev;
2711	event.element.port_num = 1;
2712	event.event = iwdev->iw_status ? IB_EVENT_PORT_ACTIVE : IB_EVENT_PORT_ERR;
2713	ib_dispatch_event(&event);
2714}
2715
2716/**
2717 * i40iw_destroy_rdma_device - destroy rdma device and free resources
2718 * @iwibdev: IB device ptr
2719 */
2720void i40iw_destroy_rdma_device(struct i40iw_ib_device *iwibdev)
2721{
2722	ib_unregister_device(&iwibdev->ibdev);
2723	wait_event_timeout(iwibdev->iwdev->close_wq,
2724			   !atomic64_read(&iwibdev->iwdev->use_count),
2725			   I40IW_EVENT_TIMEOUT);
2726	ib_dealloc_device(&iwibdev->ibdev);
2727}
2728
2729/**
2730 * i40iw_register_rdma_device - register iwarp device to IB
2731 * @iwdev: iwarp device
2732 */
2733int i40iw_register_rdma_device(struct i40iw_device *iwdev)
2734{
2735	int ret;
2736	struct i40iw_ib_device *iwibdev;
2737
2738	iwdev->iwibdev = i40iw_init_rdma_device(iwdev);
2739	if (!iwdev->iwibdev)
2740		return -ENOMEM;
2741	iwibdev = iwdev->iwibdev;
2742	rdma_set_device_sysfs_group(&iwibdev->ibdev, &i40iw_attr_group);
2743	ret = ib_device_set_netdev(&iwibdev->ibdev, iwdev->netdev, 1);
2744	if (ret)
2745		goto error;
2746
2747	dma_set_max_seg_size(&iwdev->hw.pcidev->dev, UINT_MAX);
2748	ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", &iwdev->hw.pcidev->dev);
2749	if (ret)
2750		goto error;
2751
2752	return 0;
2753error:
2754	ib_dealloc_device(&iwdev->iwibdev->ibdev);
2755	return ret;
2756}
2757