1/* This file is part of the Emulex RoCE Device Driver for
2 * RoCE (RDMA over Converged Ethernet) adapters.
3 * Copyright (C) 2012-2015 Emulex. All rights reserved.
4 * EMULEX and SLI are trademarks of Emulex.
5 * www.emulex.com
6 *
7 * This software is available to you under a choice of one of two licenses.
8 * You may choose to be licensed under the terms of the GNU General Public
9 * License (GPL) Version 2, available from the file COPYING in the main
10 * directory of this source tree, or the BSD license below:
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 * - Redistributions of source code must retain the above copyright notice,
17 *   this list of conditions and the following disclaimer.
18 *
19 * - Redistributions in binary form must reproduce the above copyright
20 *   notice, this list of conditions and the following disclaimer in
21 *   the documentation and/or other materials provided with the distribution.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
33 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 *
35 * Contact Information:
36 * linux-drivers@emulex.com
37 *
38 * Emulex
39 * 3333 Susan Street
40 * Costa Mesa, CA 92626
41 */
42
43#include <linux/dma-mapping.h>
44#include <rdma/ib_verbs.h>
45#include <rdma/ib_user_verbs.h>
46#include <rdma/iw_cm.h>
47#include <rdma/ib_umem.h>
48#include <rdma/ib_addr.h>
49#include <rdma/ib_cache.h>
50#include <rdma/uverbs_ioctl.h>
51
52#include "ocrdma.h"
53#include "ocrdma_hw.h"
54#include "ocrdma_verbs.h"
55#include <rdma/ocrdma-abi.h>
56
57int ocrdma_query_pkey(struct ib_device *ibdev, u8 port, u16 index, u16 *pkey)
58{
59	if (index > 0)
60		return -EINVAL;
61
62	*pkey = 0xffff;
63	return 0;
64}
65
66int ocrdma_query_device(struct ib_device *ibdev, struct ib_device_attr *attr,
67			struct ib_udata *uhw)
68{
69	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
70
71	if (uhw->inlen || uhw->outlen)
72		return -EINVAL;
73
74	memset(attr, 0, sizeof *attr);
75	memcpy(&attr->fw_ver, &dev->attr.fw_ver[0],
76	       min(sizeof(dev->attr.fw_ver), sizeof(attr->fw_ver)));
77	ocrdma_get_guid(dev, (u8 *)&attr->sys_image_guid);
78	attr->max_mr_size = dev->attr.max_mr_size;
79	attr->page_size_cap = 0xffff000;
80	attr->vendor_id = dev->nic_info.pdev->vendor;
81	attr->vendor_part_id = dev->nic_info.pdev->device;
82	attr->hw_ver = dev->asic_id;
83	attr->max_qp = dev->attr.max_qp;
84	attr->max_ah = OCRDMA_MAX_AH;
85	attr->max_qp_wr = dev->attr.max_wqe;
86
87	attr->device_cap_flags = IB_DEVICE_CURR_QP_STATE_MOD |
88					IB_DEVICE_RC_RNR_NAK_GEN |
89					IB_DEVICE_SHUTDOWN_PORT |
90					IB_DEVICE_SYS_IMAGE_GUID |
91					IB_DEVICE_LOCAL_DMA_LKEY |
92					IB_DEVICE_MEM_MGT_EXTENSIONS;
93	attr->max_send_sge = dev->attr.max_send_sge;
94	attr->max_recv_sge = dev->attr.max_recv_sge;
95	attr->max_sge_rd = dev->attr.max_rdma_sge;
96	attr->max_cq = dev->attr.max_cq;
97	attr->max_cqe = dev->attr.max_cqe;
98	attr->max_mr = dev->attr.max_mr;
99	attr->max_mw = dev->attr.max_mw;
100	attr->max_pd = dev->attr.max_pd;
101	attr->atomic_cap = 0;
102	attr->max_qp_rd_atom =
103	    min(dev->attr.max_ord_per_qp, dev->attr.max_ird_per_qp);
104	attr->max_qp_init_rd_atom = dev->attr.max_ord_per_qp;
105	attr->max_srq = dev->attr.max_srq;
106	attr->max_srq_sge = dev->attr.max_srq_sge;
107	attr->max_srq_wr = dev->attr.max_rqe;
108	attr->local_ca_ack_delay = dev->attr.local_ca_ack_delay;
109	attr->max_fast_reg_page_list_len = dev->attr.max_pages_per_frmr;
110	attr->max_pkeys = 1;
111	return 0;
112}
113
114static inline void get_link_speed_and_width(struct ocrdma_dev *dev,
115					    u16 *ib_speed, u8 *ib_width)
116{
117	int status;
118	u8 speed;
119
120	status = ocrdma_mbx_get_link_speed(dev, &speed, NULL);
121	if (status)
122		speed = OCRDMA_PHYS_LINK_SPEED_ZERO;
123
124	switch (speed) {
125	case OCRDMA_PHYS_LINK_SPEED_1GBPS:
126		*ib_speed = IB_SPEED_SDR;
127		*ib_width = IB_WIDTH_1X;
128		break;
129
130	case OCRDMA_PHYS_LINK_SPEED_10GBPS:
131		*ib_speed = IB_SPEED_QDR;
132		*ib_width = IB_WIDTH_1X;
133		break;
134
135	case OCRDMA_PHYS_LINK_SPEED_20GBPS:
136		*ib_speed = IB_SPEED_DDR;
137		*ib_width = IB_WIDTH_4X;
138		break;
139
140	case OCRDMA_PHYS_LINK_SPEED_40GBPS:
141		*ib_speed = IB_SPEED_QDR;
142		*ib_width = IB_WIDTH_4X;
143		break;
144
145	default:
146		/* Unsupported */
147		*ib_speed = IB_SPEED_SDR;
148		*ib_width = IB_WIDTH_1X;
149	}
150}
151
152int ocrdma_query_port(struct ib_device *ibdev,
153		      u8 port, struct ib_port_attr *props)
154{
155	enum ib_port_state port_state;
156	struct ocrdma_dev *dev;
157	struct net_device *netdev;
158
159	/* props being zeroed by the caller, avoid zeroing it here */
160	dev = get_ocrdma_dev(ibdev);
161	netdev = dev->nic_info.netdev;
162	if (netif_running(netdev) && netif_oper_up(netdev)) {
163		port_state = IB_PORT_ACTIVE;
164		props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
165	} else {
166		port_state = IB_PORT_DOWN;
167		props->phys_state = IB_PORT_PHYS_STATE_DISABLED;
168	}
169	props->max_mtu = IB_MTU_4096;
170	props->active_mtu = iboe_get_mtu(netdev->mtu);
171	props->lid = 0;
172	props->lmc = 0;
173	props->sm_lid = 0;
174	props->sm_sl = 0;
175	props->state = port_state;
176	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
177				IB_PORT_DEVICE_MGMT_SUP |
178				IB_PORT_VENDOR_CLASS_SUP;
179	props->ip_gids = true;
180	props->gid_tbl_len = OCRDMA_MAX_SGID;
181	props->pkey_tbl_len = 1;
182	props->bad_pkey_cntr = 0;
183	props->qkey_viol_cntr = 0;
184	get_link_speed_and_width(dev, &props->active_speed,
185				 &props->active_width);
186	props->max_msg_sz = 0x80000000;
187	props->max_vl_num = 4;
188	return 0;
189}
190
191static int ocrdma_add_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
192			   unsigned long len)
193{
194	struct ocrdma_mm *mm;
195
196	mm = kzalloc(sizeof(*mm), GFP_KERNEL);
197	if (mm == NULL)
198		return -ENOMEM;
199	mm->key.phy_addr = phy_addr;
200	mm->key.len = len;
201	INIT_LIST_HEAD(&mm->entry);
202
203	mutex_lock(&uctx->mm_list_lock);
204	list_add_tail(&mm->entry, &uctx->mm_head);
205	mutex_unlock(&uctx->mm_list_lock);
206	return 0;
207}
208
209static void ocrdma_del_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
210			    unsigned long len)
211{
212	struct ocrdma_mm *mm, *tmp;
213
214	mutex_lock(&uctx->mm_list_lock);
215	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
216		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
217			continue;
218
219		list_del(&mm->entry);
220		kfree(mm);
221		break;
222	}
223	mutex_unlock(&uctx->mm_list_lock);
224}
225
226static bool ocrdma_search_mmap(struct ocrdma_ucontext *uctx, u64 phy_addr,
227			      unsigned long len)
228{
229	bool found = false;
230	struct ocrdma_mm *mm;
231
232	mutex_lock(&uctx->mm_list_lock);
233	list_for_each_entry(mm, &uctx->mm_head, entry) {
234		if (len != mm->key.len && phy_addr != mm->key.phy_addr)
235			continue;
236
237		found = true;
238		break;
239	}
240	mutex_unlock(&uctx->mm_list_lock);
241	return found;
242}
243
244
245static u16 _ocrdma_pd_mgr_get_bitmap(struct ocrdma_dev *dev, bool dpp_pool)
246{
247	u16 pd_bitmap_idx = 0;
248	const unsigned long *pd_bitmap;
249
250	if (dpp_pool) {
251		pd_bitmap = dev->pd_mgr->pd_dpp_bitmap;
252		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
253						    dev->pd_mgr->max_dpp_pd);
254		__set_bit(pd_bitmap_idx, dev->pd_mgr->pd_dpp_bitmap);
255		dev->pd_mgr->pd_dpp_count++;
256		if (dev->pd_mgr->pd_dpp_count > dev->pd_mgr->pd_dpp_thrsh)
257			dev->pd_mgr->pd_dpp_thrsh = dev->pd_mgr->pd_dpp_count;
258	} else {
259		pd_bitmap = dev->pd_mgr->pd_norm_bitmap;
260		pd_bitmap_idx = find_first_zero_bit(pd_bitmap,
261						    dev->pd_mgr->max_normal_pd);
262		__set_bit(pd_bitmap_idx, dev->pd_mgr->pd_norm_bitmap);
263		dev->pd_mgr->pd_norm_count++;
264		if (dev->pd_mgr->pd_norm_count > dev->pd_mgr->pd_norm_thrsh)
265			dev->pd_mgr->pd_norm_thrsh = dev->pd_mgr->pd_norm_count;
266	}
267	return pd_bitmap_idx;
268}
269
270static int _ocrdma_pd_mgr_put_bitmap(struct ocrdma_dev *dev, u16 pd_id,
271					bool dpp_pool)
272{
273	u16 pd_count;
274	u16 pd_bit_index;
275
276	pd_count = dpp_pool ? dev->pd_mgr->pd_dpp_count :
277			      dev->pd_mgr->pd_norm_count;
278	if (pd_count == 0)
279		return -EINVAL;
280
281	if (dpp_pool) {
282		pd_bit_index = pd_id - dev->pd_mgr->pd_dpp_start;
283		if (pd_bit_index >= dev->pd_mgr->max_dpp_pd) {
284			return -EINVAL;
285		} else {
286			__clear_bit(pd_bit_index, dev->pd_mgr->pd_dpp_bitmap);
287			dev->pd_mgr->pd_dpp_count--;
288		}
289	} else {
290		pd_bit_index = pd_id - dev->pd_mgr->pd_norm_start;
291		if (pd_bit_index >= dev->pd_mgr->max_normal_pd) {
292			return -EINVAL;
293		} else {
294			__clear_bit(pd_bit_index, dev->pd_mgr->pd_norm_bitmap);
295			dev->pd_mgr->pd_norm_count--;
296		}
297	}
298
299	return 0;
300}
301
302static int ocrdma_put_pd_num(struct ocrdma_dev *dev, u16 pd_id,
303				   bool dpp_pool)
304{
305	int status;
306
307	mutex_lock(&dev->dev_lock);
308	status = _ocrdma_pd_mgr_put_bitmap(dev, pd_id, dpp_pool);
309	mutex_unlock(&dev->dev_lock);
310	return status;
311}
312
313static int ocrdma_get_pd_num(struct ocrdma_dev *dev, struct ocrdma_pd *pd)
314{
315	u16 pd_idx = 0;
316	int status = 0;
317
318	mutex_lock(&dev->dev_lock);
319	if (pd->dpp_enabled) {
320		/* try allocating DPP PD, if not available then normal PD */
321		if (dev->pd_mgr->pd_dpp_count < dev->pd_mgr->max_dpp_pd) {
322			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, true);
323			pd->id = dev->pd_mgr->pd_dpp_start + pd_idx;
324			pd->dpp_page = dev->pd_mgr->dpp_page_index + pd_idx;
325		} else if (dev->pd_mgr->pd_norm_count <
326			   dev->pd_mgr->max_normal_pd) {
327			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
328			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
329			pd->dpp_enabled = false;
330		} else {
331			status = -EINVAL;
332		}
333	} else {
334		if (dev->pd_mgr->pd_norm_count < dev->pd_mgr->max_normal_pd) {
335			pd_idx = _ocrdma_pd_mgr_get_bitmap(dev, false);
336			pd->id = dev->pd_mgr->pd_norm_start + pd_idx;
337		} else {
338			status = -EINVAL;
339		}
340	}
341	mutex_unlock(&dev->dev_lock);
342	return status;
343}
344
345/*
346 * NOTE:
347 *
348 * ocrdma_ucontext must be used here because this function is also
349 * called from ocrdma_alloc_ucontext where ib_udata does not have
350 * valid ib_ucontext pointer. ib_uverbs_get_context does not call
351 * uobj_{alloc|get_xxx} helpers which are used to store the
352 * ib_ucontext in uverbs_attr_bundle wrapping the ib_udata. so
353 * ib_udata does NOT imply valid ib_ucontext here!
354 */
355static int _ocrdma_alloc_pd(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
356			    struct ocrdma_ucontext *uctx,
357			    struct ib_udata *udata)
358{
359	int status;
360
361	if (udata && uctx && dev->attr.max_dpp_pds) {
362		pd->dpp_enabled =
363			ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R;
364		pd->num_dpp_qp =
365			pd->dpp_enabled ? (dev->nic_info.db_page_size /
366					   dev->attr.wqe_size) : 0;
367	}
368
369	if (dev->pd_mgr->pd_prealloc_valid)
370		return ocrdma_get_pd_num(dev, pd);
371
372retry:
373	status = ocrdma_mbx_alloc_pd(dev, pd);
374	if (status) {
375		if (pd->dpp_enabled) {
376			pd->dpp_enabled = false;
377			pd->num_dpp_qp = 0;
378			goto retry;
379		}
380		return status;
381	}
382
383	return 0;
384}
385
386static inline int is_ucontext_pd(struct ocrdma_ucontext *uctx,
387				 struct ocrdma_pd *pd)
388{
389	return (uctx->cntxt_pd == pd);
390}
391
392static void _ocrdma_dealloc_pd(struct ocrdma_dev *dev,
393			      struct ocrdma_pd *pd)
394{
395	if (dev->pd_mgr->pd_prealloc_valid)
396		ocrdma_put_pd_num(dev, pd->id, pd->dpp_enabled);
397	else
398		ocrdma_mbx_dealloc_pd(dev, pd);
399}
400
401static int ocrdma_alloc_ucontext_pd(struct ocrdma_dev *dev,
402				    struct ocrdma_ucontext *uctx,
403				    struct ib_udata *udata)
404{
405	struct ib_device *ibdev = &dev->ibdev;
406	struct ib_pd *pd;
407	int status;
408
409	pd = rdma_zalloc_drv_obj(ibdev, ib_pd);
410	if (!pd)
411		return -ENOMEM;
412
413	pd->device  = ibdev;
414	uctx->cntxt_pd = get_ocrdma_pd(pd);
415
416	status = _ocrdma_alloc_pd(dev, uctx->cntxt_pd, uctx, udata);
417	if (status) {
418		kfree(uctx->cntxt_pd);
419		goto err;
420	}
421
422	uctx->cntxt_pd->uctx = uctx;
423	uctx->cntxt_pd->ibpd.device = &dev->ibdev;
424err:
425	return status;
426}
427
428static void ocrdma_dealloc_ucontext_pd(struct ocrdma_ucontext *uctx)
429{
430	struct ocrdma_pd *pd = uctx->cntxt_pd;
431	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
432
433	if (uctx->pd_in_use) {
434		pr_err("%s(%d) Freeing in use pdid=0x%x.\n",
435		       __func__, dev->id, pd->id);
436	}
437	uctx->cntxt_pd = NULL;
438	_ocrdma_dealloc_pd(dev, pd);
439	kfree(pd);
440}
441
442static struct ocrdma_pd *ocrdma_get_ucontext_pd(struct ocrdma_ucontext *uctx)
443{
444	struct ocrdma_pd *pd = NULL;
445
446	mutex_lock(&uctx->mm_list_lock);
447	if (!uctx->pd_in_use) {
448		uctx->pd_in_use = true;
449		pd = uctx->cntxt_pd;
450	}
451	mutex_unlock(&uctx->mm_list_lock);
452
453	return pd;
454}
455
456static void ocrdma_release_ucontext_pd(struct ocrdma_ucontext *uctx)
457{
458	mutex_lock(&uctx->mm_list_lock);
459	uctx->pd_in_use = false;
460	mutex_unlock(&uctx->mm_list_lock);
461}
462
463int ocrdma_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata)
464{
465	struct ib_device *ibdev = uctx->device;
466	int status;
467	struct ocrdma_ucontext *ctx = get_ocrdma_ucontext(uctx);
468	struct ocrdma_alloc_ucontext_resp resp = {};
469	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
470	struct pci_dev *pdev = dev->nic_info.pdev;
471	u32 map_len = roundup(sizeof(u32) * 2048, PAGE_SIZE);
472
473	if (!udata)
474		return -EFAULT;
475	INIT_LIST_HEAD(&ctx->mm_head);
476	mutex_init(&ctx->mm_list_lock);
477
478	ctx->ah_tbl.va = dma_alloc_coherent(&pdev->dev, map_len,
479					    &ctx->ah_tbl.pa, GFP_KERNEL);
480	if (!ctx->ah_tbl.va)
481		return -ENOMEM;
482
483	ctx->ah_tbl.len = map_len;
484
485	resp.ah_tbl_len = ctx->ah_tbl.len;
486	resp.ah_tbl_page = virt_to_phys(ctx->ah_tbl.va);
487
488	status = ocrdma_add_mmap(ctx, resp.ah_tbl_page, resp.ah_tbl_len);
489	if (status)
490		goto map_err;
491
492	status = ocrdma_alloc_ucontext_pd(dev, ctx, udata);
493	if (status)
494		goto pd_err;
495
496	resp.dev_id = dev->id;
497	resp.max_inline_data = dev->attr.max_inline_data;
498	resp.wqe_size = dev->attr.wqe_size;
499	resp.rqe_size = dev->attr.rqe_size;
500	resp.dpp_wqe_size = dev->attr.wqe_size;
501
502	memcpy(resp.fw_ver, dev->attr.fw_ver, sizeof(resp.fw_ver));
503	status = ib_copy_to_udata(udata, &resp, sizeof(resp));
504	if (status)
505		goto cpy_err;
506	return 0;
507
508cpy_err:
509	ocrdma_dealloc_ucontext_pd(ctx);
510pd_err:
511	ocrdma_del_mmap(ctx, ctx->ah_tbl.pa, ctx->ah_tbl.len);
512map_err:
513	dma_free_coherent(&pdev->dev, ctx->ah_tbl.len, ctx->ah_tbl.va,
514			  ctx->ah_tbl.pa);
515	return status;
516}
517
518void ocrdma_dealloc_ucontext(struct ib_ucontext *ibctx)
519{
520	struct ocrdma_mm *mm, *tmp;
521	struct ocrdma_ucontext *uctx = get_ocrdma_ucontext(ibctx);
522	struct ocrdma_dev *dev = get_ocrdma_dev(ibctx->device);
523	struct pci_dev *pdev = dev->nic_info.pdev;
524
525	ocrdma_dealloc_ucontext_pd(uctx);
526
527	ocrdma_del_mmap(uctx, uctx->ah_tbl.pa, uctx->ah_tbl.len);
528	dma_free_coherent(&pdev->dev, uctx->ah_tbl.len, uctx->ah_tbl.va,
529			  uctx->ah_tbl.pa);
530
531	list_for_each_entry_safe(mm, tmp, &uctx->mm_head, entry) {
532		list_del(&mm->entry);
533		kfree(mm);
534	}
535}
536
537int ocrdma_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
538{
539	struct ocrdma_ucontext *ucontext = get_ocrdma_ucontext(context);
540	struct ocrdma_dev *dev = get_ocrdma_dev(context->device);
541	unsigned long vm_page = vma->vm_pgoff << PAGE_SHIFT;
542	u64 unmapped_db = (u64) dev->nic_info.unmapped_db;
543	unsigned long len = (vma->vm_end - vma->vm_start);
544	int status;
545	bool found;
546
547	if (vma->vm_start & (PAGE_SIZE - 1))
548		return -EINVAL;
549	found = ocrdma_search_mmap(ucontext, vma->vm_pgoff << PAGE_SHIFT, len);
550	if (!found)
551		return -EINVAL;
552
553	if ((vm_page >= unmapped_db) && (vm_page <= (unmapped_db +
554		dev->nic_info.db_total_size)) &&
555		(len <=	dev->nic_info.db_page_size)) {
556		if (vma->vm_flags & VM_READ)
557			return -EPERM;
558
559		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
560		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
561					    len, vma->vm_page_prot);
562	} else if (dev->nic_info.dpp_unmapped_len &&
563		(vm_page >= (u64) dev->nic_info.dpp_unmapped_addr) &&
564		(vm_page <= (u64) (dev->nic_info.dpp_unmapped_addr +
565			dev->nic_info.dpp_unmapped_len)) &&
566		(len <= dev->nic_info.dpp_unmapped_len)) {
567		if (vma->vm_flags & VM_READ)
568			return -EPERM;
569
570		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
571		status = io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
572					    len, vma->vm_page_prot);
573	} else {
574		status = remap_pfn_range(vma, vma->vm_start,
575					 vma->vm_pgoff, len, vma->vm_page_prot);
576	}
577	return status;
578}
579
580static int ocrdma_copy_pd_uresp(struct ocrdma_dev *dev, struct ocrdma_pd *pd,
581				struct ib_udata *udata)
582{
583	int status;
584	u64 db_page_addr;
585	u64 dpp_page_addr = 0;
586	u32 db_page_size;
587	struct ocrdma_alloc_pd_uresp rsp;
588	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
589		udata, struct ocrdma_ucontext, ibucontext);
590
591	memset(&rsp, 0, sizeof(rsp));
592	rsp.id = pd->id;
593	rsp.dpp_enabled = pd->dpp_enabled;
594	db_page_addr = ocrdma_get_db_addr(dev, pd->id);
595	db_page_size = dev->nic_info.db_page_size;
596
597	status = ocrdma_add_mmap(uctx, db_page_addr, db_page_size);
598	if (status)
599		return status;
600
601	if (pd->dpp_enabled) {
602		dpp_page_addr = dev->nic_info.dpp_unmapped_addr +
603				(pd->id * PAGE_SIZE);
604		status = ocrdma_add_mmap(uctx, dpp_page_addr,
605				 PAGE_SIZE);
606		if (status)
607			goto dpp_map_err;
608		rsp.dpp_page_addr_hi = upper_32_bits(dpp_page_addr);
609		rsp.dpp_page_addr_lo = dpp_page_addr;
610	}
611
612	status = ib_copy_to_udata(udata, &rsp, sizeof(rsp));
613	if (status)
614		goto ucopy_err;
615
616	pd->uctx = uctx;
617	return 0;
618
619ucopy_err:
620	if (pd->dpp_enabled)
621		ocrdma_del_mmap(pd->uctx, dpp_page_addr, PAGE_SIZE);
622dpp_map_err:
623	ocrdma_del_mmap(pd->uctx, db_page_addr, db_page_size);
624	return status;
625}
626
627int ocrdma_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
628{
629	struct ib_device *ibdev = ibpd->device;
630	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
631	struct ocrdma_pd *pd;
632	int status;
633	u8 is_uctx_pd = false;
634	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
635		udata, struct ocrdma_ucontext, ibucontext);
636
637	if (udata) {
638		pd = ocrdma_get_ucontext_pd(uctx);
639		if (pd) {
640			is_uctx_pd = true;
641			goto pd_mapping;
642		}
643	}
644
645	pd = get_ocrdma_pd(ibpd);
646	status = _ocrdma_alloc_pd(dev, pd, uctx, udata);
647	if (status)
648		goto exit;
649
650pd_mapping:
651	if (udata) {
652		status = ocrdma_copy_pd_uresp(dev, pd, udata);
653		if (status)
654			goto err;
655	}
656	return 0;
657
658err:
659	if (is_uctx_pd)
660		ocrdma_release_ucontext_pd(uctx);
661	else
662		_ocrdma_dealloc_pd(dev, pd);
663exit:
664	return status;
665}
666
667int ocrdma_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
668{
669	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
670	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
671	struct ocrdma_ucontext *uctx = NULL;
672	u64 usr_db;
673
674	uctx = pd->uctx;
675	if (uctx) {
676		u64 dpp_db = dev->nic_info.dpp_unmapped_addr +
677			(pd->id * PAGE_SIZE);
678		if (pd->dpp_enabled)
679			ocrdma_del_mmap(pd->uctx, dpp_db, PAGE_SIZE);
680		usr_db = ocrdma_get_db_addr(dev, pd->id);
681		ocrdma_del_mmap(pd->uctx, usr_db, dev->nic_info.db_page_size);
682
683		if (is_ucontext_pd(uctx, pd)) {
684			ocrdma_release_ucontext_pd(uctx);
685			return 0;
686		}
687	}
688	_ocrdma_dealloc_pd(dev, pd);
689	return 0;
690}
691
692static int ocrdma_alloc_lkey(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
693			    u32 pdid, int acc, u32 num_pbls, u32 addr_check)
694{
695	int status;
696
697	mr->hwmr.fr_mr = 0;
698	mr->hwmr.local_rd = 1;
699	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
700	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
701	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
702	mr->hwmr.mw_bind = (acc & IB_ACCESS_MW_BIND) ? 1 : 0;
703	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
704	mr->hwmr.num_pbls = num_pbls;
705
706	status = ocrdma_mbx_alloc_lkey(dev, &mr->hwmr, pdid, addr_check);
707	if (status)
708		return status;
709
710	mr->ibmr.lkey = mr->hwmr.lkey;
711	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
712		mr->ibmr.rkey = mr->hwmr.lkey;
713	return 0;
714}
715
716struct ib_mr *ocrdma_get_dma_mr(struct ib_pd *ibpd, int acc)
717{
718	int status;
719	struct ocrdma_mr *mr;
720	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
721	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
722
723	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE)) {
724		pr_err("%s err, invalid access rights\n", __func__);
725		return ERR_PTR(-EINVAL);
726	}
727
728	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
729	if (!mr)
730		return ERR_PTR(-ENOMEM);
731
732	status = ocrdma_alloc_lkey(dev, mr, pd->id, acc, 0,
733				   OCRDMA_ADDR_CHECK_DISABLE);
734	if (status) {
735		kfree(mr);
736		return ERR_PTR(status);
737	}
738
739	return &mr->ibmr;
740}
741
742static void ocrdma_free_mr_pbl_tbl(struct ocrdma_dev *dev,
743				   struct ocrdma_hw_mr *mr)
744{
745	struct pci_dev *pdev = dev->nic_info.pdev;
746	int i = 0;
747
748	if (mr->pbl_table) {
749		for (i = 0; i < mr->num_pbls; i++) {
750			if (!mr->pbl_table[i].va)
751				continue;
752			dma_free_coherent(&pdev->dev, mr->pbl_size,
753					  mr->pbl_table[i].va,
754					  mr->pbl_table[i].pa);
755		}
756		kfree(mr->pbl_table);
757		mr->pbl_table = NULL;
758	}
759}
760
761static int ocrdma_get_pbl_info(struct ocrdma_dev *dev, struct ocrdma_mr *mr,
762			      u32 num_pbes)
763{
764	u32 num_pbls = 0;
765	u32 idx = 0;
766	int status = 0;
767	u32 pbl_size;
768
769	do {
770		pbl_size = OCRDMA_MIN_HPAGE_SIZE * (1 << idx);
771		if (pbl_size > MAX_OCRDMA_PBL_SIZE) {
772			status = -EFAULT;
773			break;
774		}
775		num_pbls = roundup(num_pbes, (pbl_size / sizeof(u64)));
776		num_pbls = num_pbls / (pbl_size / sizeof(u64));
777		idx++;
778	} while (num_pbls >= dev->attr.max_num_mr_pbl);
779
780	mr->hwmr.num_pbes = num_pbes;
781	mr->hwmr.num_pbls = num_pbls;
782	mr->hwmr.pbl_size = pbl_size;
783	return status;
784}
785
786static int ocrdma_build_pbl_tbl(struct ocrdma_dev *dev, struct ocrdma_hw_mr *mr)
787{
788	int status = 0;
789	int i;
790	u32 dma_len = mr->pbl_size;
791	struct pci_dev *pdev = dev->nic_info.pdev;
792	void *va;
793	dma_addr_t pa;
794
795	mr->pbl_table = kcalloc(mr->num_pbls, sizeof(struct ocrdma_pbl),
796				GFP_KERNEL);
797
798	if (!mr->pbl_table)
799		return -ENOMEM;
800
801	for (i = 0; i < mr->num_pbls; i++) {
802		va = dma_alloc_coherent(&pdev->dev, dma_len, &pa, GFP_KERNEL);
803		if (!va) {
804			ocrdma_free_mr_pbl_tbl(dev, mr);
805			status = -ENOMEM;
806			break;
807		}
808		mr->pbl_table[i].va = va;
809		mr->pbl_table[i].pa = pa;
810	}
811	return status;
812}
813
814static void build_user_pbes(struct ocrdma_dev *dev, struct ocrdma_mr *mr)
815{
816	struct ocrdma_pbe *pbe;
817	struct ib_block_iter biter;
818	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
819	int pbe_cnt;
820	u64 pg_addr;
821
822	if (!mr->hwmr.num_pbes)
823		return;
824
825	pbe = (struct ocrdma_pbe *)pbl_tbl->va;
826	pbe_cnt = 0;
827
828	rdma_umem_for_each_dma_block (mr->umem, &biter, PAGE_SIZE) {
829		/* store the page address in pbe */
830		pg_addr = rdma_block_iter_dma_address(&biter);
831		pbe->pa_lo = cpu_to_le32(pg_addr);
832		pbe->pa_hi = cpu_to_le32(upper_32_bits(pg_addr));
833		pbe_cnt += 1;
834		pbe++;
835
836		/* if the given pbl is full storing the pbes,
837		 * move to next pbl.
838		 */
839		if (pbe_cnt == (mr->hwmr.pbl_size / sizeof(u64))) {
840			pbl_tbl++;
841			pbe = (struct ocrdma_pbe *)pbl_tbl->va;
842			pbe_cnt = 0;
843		}
844	}
845}
846
847struct ib_mr *ocrdma_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 len,
848				 u64 usr_addr, int acc, struct ib_udata *udata)
849{
850	int status = -ENOMEM;
851	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
852	struct ocrdma_mr *mr;
853	struct ocrdma_pd *pd;
854
855	pd = get_ocrdma_pd(ibpd);
856
857	if (acc & IB_ACCESS_REMOTE_WRITE && !(acc & IB_ACCESS_LOCAL_WRITE))
858		return ERR_PTR(-EINVAL);
859
860	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
861	if (!mr)
862		return ERR_PTR(status);
863	mr->umem = ib_umem_get(ibpd->device, start, len, acc);
864	if (IS_ERR(mr->umem)) {
865		status = -EFAULT;
866		goto umem_err;
867	}
868	status = ocrdma_get_pbl_info(
869		dev, mr, ib_umem_num_dma_blocks(mr->umem, PAGE_SIZE));
870	if (status)
871		goto umem_err;
872
873	mr->hwmr.pbe_size = PAGE_SIZE;
874	mr->hwmr.va = usr_addr;
875	mr->hwmr.len = len;
876	mr->hwmr.remote_wr = (acc & IB_ACCESS_REMOTE_WRITE) ? 1 : 0;
877	mr->hwmr.remote_rd = (acc & IB_ACCESS_REMOTE_READ) ? 1 : 0;
878	mr->hwmr.local_wr = (acc & IB_ACCESS_LOCAL_WRITE) ? 1 : 0;
879	mr->hwmr.local_rd = 1;
880	mr->hwmr.remote_atomic = (acc & IB_ACCESS_REMOTE_ATOMIC) ? 1 : 0;
881	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
882	if (status)
883		goto umem_err;
884	build_user_pbes(dev, mr);
885	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, acc);
886	if (status)
887		goto mbx_err;
888	mr->ibmr.lkey = mr->hwmr.lkey;
889	if (mr->hwmr.remote_wr || mr->hwmr.remote_rd)
890		mr->ibmr.rkey = mr->hwmr.lkey;
891
892	return &mr->ibmr;
893
894mbx_err:
895	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
896umem_err:
897	kfree(mr);
898	return ERR_PTR(status);
899}
900
901int ocrdma_dereg_mr(struct ib_mr *ib_mr, struct ib_udata *udata)
902{
903	struct ocrdma_mr *mr = get_ocrdma_mr(ib_mr);
904	struct ocrdma_dev *dev = get_ocrdma_dev(ib_mr->device);
905
906	(void) ocrdma_mbx_dealloc_lkey(dev, mr->hwmr.fr_mr, mr->hwmr.lkey);
907
908	kfree(mr->pages);
909	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
910
911	/* it could be user registered memory. */
912	ib_umem_release(mr->umem);
913	kfree(mr);
914
915	/* Don't stop cleanup, in case FW is unresponsive */
916	if (dev->mqe_ctx.fw_error_state) {
917		pr_err("%s(%d) fw not responding.\n",
918		       __func__, dev->id);
919	}
920	return 0;
921}
922
923static int ocrdma_copy_cq_uresp(struct ocrdma_dev *dev, struct ocrdma_cq *cq,
924				struct ib_udata *udata)
925{
926	int status;
927	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
928		udata, struct ocrdma_ucontext, ibucontext);
929	struct ocrdma_create_cq_uresp uresp;
930
931	/* this must be user flow! */
932	if (!udata)
933		return -EINVAL;
934
935	memset(&uresp, 0, sizeof(uresp));
936	uresp.cq_id = cq->id;
937	uresp.page_size = PAGE_ALIGN(cq->len);
938	uresp.num_pages = 1;
939	uresp.max_hw_cqe = cq->max_hw_cqe;
940	uresp.page_addr[0] = virt_to_phys(cq->va);
941	uresp.db_page_addr =  ocrdma_get_db_addr(dev, uctx->cntxt_pd->id);
942	uresp.db_page_size = dev->nic_info.db_page_size;
943	uresp.phase_change = cq->phase_change ? 1 : 0;
944	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
945	if (status) {
946		pr_err("%s(%d) copy error cqid=0x%x.\n",
947		       __func__, dev->id, cq->id);
948		goto err;
949	}
950	status = ocrdma_add_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
951	if (status)
952		goto err;
953	status = ocrdma_add_mmap(uctx, uresp.page_addr[0], uresp.page_size);
954	if (status) {
955		ocrdma_del_mmap(uctx, uresp.db_page_addr, uresp.db_page_size);
956		goto err;
957	}
958	cq->ucontext = uctx;
959err:
960	return status;
961}
962
963int ocrdma_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr,
964		     struct ib_udata *udata)
965{
966	struct ib_device *ibdev = ibcq->device;
967	int entries = attr->cqe;
968	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
969	struct ocrdma_dev *dev = get_ocrdma_dev(ibdev);
970	struct ocrdma_ucontext *uctx = rdma_udata_to_drv_context(
971		udata, struct ocrdma_ucontext, ibucontext);
972	u16 pd_id = 0;
973	int status;
974	struct ocrdma_create_cq_ureq ureq;
975
976	if (attr->flags)
977		return -EINVAL;
978
979	if (udata) {
980		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
981			return -EFAULT;
982	} else
983		ureq.dpp_cq = 0;
984
985	spin_lock_init(&cq->cq_lock);
986	spin_lock_init(&cq->comp_handler_lock);
987	INIT_LIST_HEAD(&cq->sq_head);
988	INIT_LIST_HEAD(&cq->rq_head);
989
990	if (udata)
991		pd_id = uctx->cntxt_pd->id;
992
993	status = ocrdma_mbx_create_cq(dev, cq, entries, ureq.dpp_cq, pd_id);
994	if (status)
995		return status;
996
997	if (udata) {
998		status = ocrdma_copy_cq_uresp(dev, cq, udata);
999		if (status)
1000			goto ctx_err;
1001	}
1002	cq->phase = OCRDMA_CQE_VALID;
1003	dev->cq_tbl[cq->id] = cq;
1004	return 0;
1005
1006ctx_err:
1007	ocrdma_mbx_destroy_cq(dev, cq);
1008	return status;
1009}
1010
1011int ocrdma_resize_cq(struct ib_cq *ibcq, int new_cnt,
1012		     struct ib_udata *udata)
1013{
1014	int status = 0;
1015	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1016
1017	if (new_cnt < 1 || new_cnt > cq->max_hw_cqe) {
1018		status = -EINVAL;
1019		return status;
1020	}
1021	ibcq->cqe = new_cnt;
1022	return status;
1023}
1024
1025static void ocrdma_flush_cq(struct ocrdma_cq *cq)
1026{
1027	int cqe_cnt;
1028	int valid_count = 0;
1029	unsigned long flags;
1030
1031	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
1032	struct ocrdma_cqe *cqe = NULL;
1033
1034	cqe = cq->va;
1035	cqe_cnt = cq->cqe_cnt;
1036
1037	/* Last irq might have scheduled a polling thread
1038	 * sync-up with it before hard flushing.
1039	 */
1040	spin_lock_irqsave(&cq->cq_lock, flags);
1041	while (cqe_cnt) {
1042		if (is_cqe_valid(cq, cqe))
1043			valid_count++;
1044		cqe++;
1045		cqe_cnt--;
1046	}
1047	ocrdma_ring_cq_db(dev, cq->id, false, false, valid_count);
1048	spin_unlock_irqrestore(&cq->cq_lock, flags);
1049}
1050
1051int ocrdma_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata)
1052{
1053	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
1054	struct ocrdma_eq *eq = NULL;
1055	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
1056	int pdid = 0;
1057	u32 irq, indx;
1058
1059	dev->cq_tbl[cq->id] = NULL;
1060	indx = ocrdma_get_eq_table_index(dev, cq->eqn);
1061
1062	eq = &dev->eq_tbl[indx];
1063	irq = ocrdma_get_irq(dev, eq);
1064	synchronize_irq(irq);
1065	ocrdma_flush_cq(cq);
1066
1067	ocrdma_mbx_destroy_cq(dev, cq);
1068	if (cq->ucontext) {
1069		pdid = cq->ucontext->cntxt_pd->id;
1070		ocrdma_del_mmap(cq->ucontext, (u64) cq->pa,
1071				PAGE_ALIGN(cq->len));
1072		ocrdma_del_mmap(cq->ucontext,
1073				ocrdma_get_db_addr(dev, pdid),
1074				dev->nic_info.db_page_size);
1075	}
1076	return 0;
1077}
1078
1079static int ocrdma_add_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1080{
1081	int status = -EINVAL;
1082
1083	if (qp->id < OCRDMA_MAX_QP && dev->qp_tbl[qp->id] == NULL) {
1084		dev->qp_tbl[qp->id] = qp;
1085		status = 0;
1086	}
1087	return status;
1088}
1089
1090static void ocrdma_del_qpn_map(struct ocrdma_dev *dev, struct ocrdma_qp *qp)
1091{
1092	dev->qp_tbl[qp->id] = NULL;
1093}
1094
1095static int ocrdma_check_qp_params(struct ib_pd *ibpd, struct ocrdma_dev *dev,
1096				  struct ib_qp_init_attr *attrs,
1097				  struct ib_udata *udata)
1098{
1099	if ((attrs->qp_type != IB_QPT_GSI) &&
1100	    (attrs->qp_type != IB_QPT_RC) &&
1101	    (attrs->qp_type != IB_QPT_UC) &&
1102	    (attrs->qp_type != IB_QPT_UD)) {
1103		pr_err("%s(%d) unsupported qp type=0x%x requested\n",
1104		       __func__, dev->id, attrs->qp_type);
1105		return -EOPNOTSUPP;
1106	}
1107	/* Skip the check for QP1 to support CM size of 128 */
1108	if ((attrs->qp_type != IB_QPT_GSI) &&
1109	    (attrs->cap.max_send_wr > dev->attr.max_wqe)) {
1110		pr_err("%s(%d) unsupported send_wr=0x%x requested\n",
1111		       __func__, dev->id, attrs->cap.max_send_wr);
1112		pr_err("%s(%d) supported send_wr=0x%x\n",
1113		       __func__, dev->id, dev->attr.max_wqe);
1114		return -EINVAL;
1115	}
1116	if (!attrs->srq && (attrs->cap.max_recv_wr > dev->attr.max_rqe)) {
1117		pr_err("%s(%d) unsupported recv_wr=0x%x requested\n",
1118		       __func__, dev->id, attrs->cap.max_recv_wr);
1119		pr_err("%s(%d) supported recv_wr=0x%x\n",
1120		       __func__, dev->id, dev->attr.max_rqe);
1121		return -EINVAL;
1122	}
1123	if (attrs->cap.max_inline_data > dev->attr.max_inline_data) {
1124		pr_err("%s(%d) unsupported inline data size=0x%x requested\n",
1125		       __func__, dev->id, attrs->cap.max_inline_data);
1126		pr_err("%s(%d) supported inline data size=0x%x\n",
1127		       __func__, dev->id, dev->attr.max_inline_data);
1128		return -EINVAL;
1129	}
1130	if (attrs->cap.max_send_sge > dev->attr.max_send_sge) {
1131		pr_err("%s(%d) unsupported send_sge=0x%x requested\n",
1132		       __func__, dev->id, attrs->cap.max_send_sge);
1133		pr_err("%s(%d) supported send_sge=0x%x\n",
1134		       __func__, dev->id, dev->attr.max_send_sge);
1135		return -EINVAL;
1136	}
1137	if (attrs->cap.max_recv_sge > dev->attr.max_recv_sge) {
1138		pr_err("%s(%d) unsupported recv_sge=0x%x requested\n",
1139		       __func__, dev->id, attrs->cap.max_recv_sge);
1140		pr_err("%s(%d) supported recv_sge=0x%x\n",
1141		       __func__, dev->id, dev->attr.max_recv_sge);
1142		return -EINVAL;
1143	}
1144	/* unprivileged user space cannot create special QP */
1145	if (udata && attrs->qp_type == IB_QPT_GSI) {
1146		pr_err
1147		    ("%s(%d) Userspace can't create special QPs of type=0x%x\n",
1148		     __func__, dev->id, attrs->qp_type);
1149		return -EINVAL;
1150	}
1151	/* allow creating only one GSI type of QP */
1152	if (attrs->qp_type == IB_QPT_GSI && dev->gsi_qp_created) {
1153		pr_err("%s(%d) GSI special QPs already created.\n",
1154		       __func__, dev->id);
1155		return -EINVAL;
1156	}
1157	/* verify consumer QPs are not trying to use GSI QP's CQ */
1158	if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created)) {
1159		if ((dev->gsi_sqcq == get_ocrdma_cq(attrs->send_cq)) ||
1160			(dev->gsi_rqcq == get_ocrdma_cq(attrs->recv_cq))) {
1161			pr_err("%s(%d) Consumer QP cannot use GSI CQs.\n",
1162				__func__, dev->id);
1163			return -EINVAL;
1164		}
1165	}
1166	return 0;
1167}
1168
1169static int ocrdma_copy_qp_uresp(struct ocrdma_qp *qp,
1170				struct ib_udata *udata, int dpp_offset,
1171				int dpp_credit_lmt, int srq)
1172{
1173	int status;
1174	u64 usr_db;
1175	struct ocrdma_create_qp_uresp uresp;
1176	struct ocrdma_pd *pd = qp->pd;
1177	struct ocrdma_dev *dev = get_ocrdma_dev(pd->ibpd.device);
1178
1179	memset(&uresp, 0, sizeof(uresp));
1180	usr_db = dev->nic_info.unmapped_db +
1181			(pd->id * dev->nic_info.db_page_size);
1182	uresp.qp_id = qp->id;
1183	uresp.sq_dbid = qp->sq.dbid;
1184	uresp.num_sq_pages = 1;
1185	uresp.sq_page_size = PAGE_ALIGN(qp->sq.len);
1186	uresp.sq_page_addr[0] = virt_to_phys(qp->sq.va);
1187	uresp.num_wqe_allocated = qp->sq.max_cnt;
1188	if (!srq) {
1189		uresp.rq_dbid = qp->rq.dbid;
1190		uresp.num_rq_pages = 1;
1191		uresp.rq_page_size = PAGE_ALIGN(qp->rq.len);
1192		uresp.rq_page_addr[0] = virt_to_phys(qp->rq.va);
1193		uresp.num_rqe_allocated = qp->rq.max_cnt;
1194	}
1195	uresp.db_page_addr = usr_db;
1196	uresp.db_page_size = dev->nic_info.db_page_size;
1197	uresp.db_sq_offset = OCRDMA_DB_GEN2_SQ_OFFSET;
1198	uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1199	uresp.db_shift = OCRDMA_DB_RQ_SHIFT;
1200
1201	if (qp->dpp_enabled) {
1202		uresp.dpp_credit = dpp_credit_lmt;
1203		uresp.dpp_offset = dpp_offset;
1204	}
1205	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1206	if (status) {
1207		pr_err("%s(%d) user copy error.\n", __func__, dev->id);
1208		goto err;
1209	}
1210	status = ocrdma_add_mmap(pd->uctx, uresp.sq_page_addr[0],
1211				 uresp.sq_page_size);
1212	if (status)
1213		goto err;
1214
1215	if (!srq) {
1216		status = ocrdma_add_mmap(pd->uctx, uresp.rq_page_addr[0],
1217					 uresp.rq_page_size);
1218		if (status)
1219			goto rq_map_err;
1220	}
1221	return status;
1222rq_map_err:
1223	ocrdma_del_mmap(pd->uctx, uresp.sq_page_addr[0], uresp.sq_page_size);
1224err:
1225	return status;
1226}
1227
1228static void ocrdma_set_qp_db(struct ocrdma_dev *dev, struct ocrdma_qp *qp,
1229			     struct ocrdma_pd *pd)
1230{
1231	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1232		qp->sq_db = dev->nic_info.db +
1233			(pd->id * dev->nic_info.db_page_size) +
1234			OCRDMA_DB_GEN2_SQ_OFFSET;
1235		qp->rq_db = dev->nic_info.db +
1236			(pd->id * dev->nic_info.db_page_size) +
1237			OCRDMA_DB_GEN2_RQ_OFFSET;
1238	} else {
1239		qp->sq_db = dev->nic_info.db +
1240			(pd->id * dev->nic_info.db_page_size) +
1241			OCRDMA_DB_SQ_OFFSET;
1242		qp->rq_db = dev->nic_info.db +
1243			(pd->id * dev->nic_info.db_page_size) +
1244			OCRDMA_DB_RQ_OFFSET;
1245	}
1246}
1247
1248static int ocrdma_alloc_wr_id_tbl(struct ocrdma_qp *qp)
1249{
1250	qp->wqe_wr_id_tbl =
1251	    kcalloc(qp->sq.max_cnt, sizeof(*(qp->wqe_wr_id_tbl)),
1252		    GFP_KERNEL);
1253	if (qp->wqe_wr_id_tbl == NULL)
1254		return -ENOMEM;
1255	qp->rqe_wr_id_tbl =
1256	    kcalloc(qp->rq.max_cnt, sizeof(u64), GFP_KERNEL);
1257	if (qp->rqe_wr_id_tbl == NULL)
1258		return -ENOMEM;
1259
1260	return 0;
1261}
1262
1263static void ocrdma_set_qp_init_params(struct ocrdma_qp *qp,
1264				      struct ocrdma_pd *pd,
1265				      struct ib_qp_init_attr *attrs)
1266{
1267	qp->pd = pd;
1268	spin_lock_init(&qp->q_lock);
1269	INIT_LIST_HEAD(&qp->sq_entry);
1270	INIT_LIST_HEAD(&qp->rq_entry);
1271
1272	qp->qp_type = attrs->qp_type;
1273	qp->cap_flags = OCRDMA_QP_INB_RD | OCRDMA_QP_INB_WR;
1274	qp->max_inline_data = attrs->cap.max_inline_data;
1275	qp->sq.max_sges = attrs->cap.max_send_sge;
1276	qp->rq.max_sges = attrs->cap.max_recv_sge;
1277	qp->state = OCRDMA_QPS_RST;
1278	qp->signaled = (attrs->sq_sig_type == IB_SIGNAL_ALL_WR) ? true : false;
1279}
1280
1281static void ocrdma_store_gsi_qp_cq(struct ocrdma_dev *dev,
1282				   struct ib_qp_init_attr *attrs)
1283{
1284	if (attrs->qp_type == IB_QPT_GSI) {
1285		dev->gsi_qp_created = 1;
1286		dev->gsi_sqcq = get_ocrdma_cq(attrs->send_cq);
1287		dev->gsi_rqcq = get_ocrdma_cq(attrs->recv_cq);
1288	}
1289}
1290
1291struct ib_qp *ocrdma_create_qp(struct ib_pd *ibpd,
1292			       struct ib_qp_init_attr *attrs,
1293			       struct ib_udata *udata)
1294{
1295	int status;
1296	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
1297	struct ocrdma_qp *qp;
1298	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
1299	struct ocrdma_create_qp_ureq ureq;
1300	u16 dpp_credit_lmt, dpp_offset;
1301
1302	status = ocrdma_check_qp_params(ibpd, dev, attrs, udata);
1303	if (status)
1304		goto gen_err;
1305
1306	memset(&ureq, 0, sizeof(ureq));
1307	if (udata) {
1308		if (ib_copy_from_udata(&ureq, udata, sizeof(ureq)))
1309			return ERR_PTR(-EFAULT);
1310	}
1311	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
1312	if (!qp) {
1313		status = -ENOMEM;
1314		goto gen_err;
1315	}
1316	ocrdma_set_qp_init_params(qp, pd, attrs);
1317	if (udata == NULL)
1318		qp->cap_flags |= (OCRDMA_QP_MW_BIND | OCRDMA_QP_LKEY0 |
1319					OCRDMA_QP_FAST_REG);
1320
1321	mutex_lock(&dev->dev_lock);
1322	status = ocrdma_mbx_create_qp(qp, attrs, ureq.enable_dpp_cq,
1323					ureq.dpp_cq_id,
1324					&dpp_offset, &dpp_credit_lmt);
1325	if (status)
1326		goto mbx_err;
1327
1328	/* user space QP's wr_id table are managed in library */
1329	if (udata == NULL) {
1330		status = ocrdma_alloc_wr_id_tbl(qp);
1331		if (status)
1332			goto map_err;
1333	}
1334
1335	status = ocrdma_add_qpn_map(dev, qp);
1336	if (status)
1337		goto map_err;
1338	ocrdma_set_qp_db(dev, qp, pd);
1339	if (udata) {
1340		status = ocrdma_copy_qp_uresp(qp, udata, dpp_offset,
1341					      dpp_credit_lmt,
1342					      (attrs->srq != NULL));
1343		if (status)
1344			goto cpy_err;
1345	}
1346	ocrdma_store_gsi_qp_cq(dev, attrs);
1347	qp->ibqp.qp_num = qp->id;
1348	mutex_unlock(&dev->dev_lock);
1349	return &qp->ibqp;
1350
1351cpy_err:
1352	ocrdma_del_qpn_map(dev, qp);
1353map_err:
1354	ocrdma_mbx_destroy_qp(dev, qp);
1355mbx_err:
1356	mutex_unlock(&dev->dev_lock);
1357	kfree(qp->wqe_wr_id_tbl);
1358	kfree(qp->rqe_wr_id_tbl);
1359	kfree(qp);
1360	pr_err("%s(%d) error=%d\n", __func__, dev->id, status);
1361gen_err:
1362	return ERR_PTR(status);
1363}
1364
1365int _ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1366		      int attr_mask)
1367{
1368	int status = 0;
1369	struct ocrdma_qp *qp;
1370	struct ocrdma_dev *dev;
1371	enum ib_qp_state old_qps;
1372
1373	qp = get_ocrdma_qp(ibqp);
1374	dev = get_ocrdma_dev(ibqp->device);
1375	if (attr_mask & IB_QP_STATE)
1376		status = ocrdma_qp_state_change(qp, attr->qp_state, &old_qps);
1377	/* if new and previous states are same hw doesn't need to
1378	 * know about it.
1379	 */
1380	if (status < 0)
1381		return status;
1382	return ocrdma_mbx_modify_qp(dev, qp, attr, attr_mask);
1383}
1384
1385int ocrdma_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1386		     int attr_mask, struct ib_udata *udata)
1387{
1388	unsigned long flags;
1389	int status = -EINVAL;
1390	struct ocrdma_qp *qp;
1391	struct ocrdma_dev *dev;
1392	enum ib_qp_state old_qps, new_qps;
1393
1394	qp = get_ocrdma_qp(ibqp);
1395	dev = get_ocrdma_dev(ibqp->device);
1396
1397	/* syncronize with multiple context trying to change, retrive qps */
1398	mutex_lock(&dev->dev_lock);
1399	/* syncronize with wqe, rqe posting and cqe processing contexts */
1400	spin_lock_irqsave(&qp->q_lock, flags);
1401	old_qps = get_ibqp_state(qp->state);
1402	if (attr_mask & IB_QP_STATE)
1403		new_qps = attr->qp_state;
1404	else
1405		new_qps = old_qps;
1406	spin_unlock_irqrestore(&qp->q_lock, flags);
1407
1408	if (!ib_modify_qp_is_ok(old_qps, new_qps, ibqp->qp_type, attr_mask)) {
1409		pr_err("%s(%d) invalid attribute mask=0x%x specified for\n"
1410		       "qpn=0x%x of type=0x%x old_qps=0x%x, new_qps=0x%x\n",
1411		       __func__, dev->id, attr_mask, qp->id, ibqp->qp_type,
1412		       old_qps, new_qps);
1413		goto param_err;
1414	}
1415
1416	status = _ocrdma_modify_qp(ibqp, attr, attr_mask);
1417	if (status > 0)
1418		status = 0;
1419param_err:
1420	mutex_unlock(&dev->dev_lock);
1421	return status;
1422}
1423
1424static enum ib_mtu ocrdma_mtu_int_to_enum(u16 mtu)
1425{
1426	switch (mtu) {
1427	case 256:
1428		return IB_MTU_256;
1429	case 512:
1430		return IB_MTU_512;
1431	case 1024:
1432		return IB_MTU_1024;
1433	case 2048:
1434		return IB_MTU_2048;
1435	case 4096:
1436		return IB_MTU_4096;
1437	default:
1438		return IB_MTU_1024;
1439	}
1440}
1441
1442static int ocrdma_to_ib_qp_acc_flags(int qp_cap_flags)
1443{
1444	int ib_qp_acc_flags = 0;
1445
1446	if (qp_cap_flags & OCRDMA_QP_INB_WR)
1447		ib_qp_acc_flags |= IB_ACCESS_REMOTE_WRITE;
1448	if (qp_cap_flags & OCRDMA_QP_INB_RD)
1449		ib_qp_acc_flags |= IB_ACCESS_LOCAL_WRITE;
1450	return ib_qp_acc_flags;
1451}
1452
1453int ocrdma_query_qp(struct ib_qp *ibqp,
1454		    struct ib_qp_attr *qp_attr,
1455		    int attr_mask, struct ib_qp_init_attr *qp_init_attr)
1456{
1457	int status;
1458	u32 qp_state;
1459	struct ocrdma_qp_params params;
1460	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
1461	struct ocrdma_dev *dev = get_ocrdma_dev(ibqp->device);
1462
1463	memset(&params, 0, sizeof(params));
1464	mutex_lock(&dev->dev_lock);
1465	status = ocrdma_mbx_query_qp(dev, qp, &params);
1466	mutex_unlock(&dev->dev_lock);
1467	if (status)
1468		goto mbx_err;
1469	if (qp->qp_type == IB_QPT_UD)
1470		qp_attr->qkey = params.qkey;
1471	qp_attr->path_mtu =
1472		ocrdma_mtu_int_to_enum(params.path_mtu_pkey_indx &
1473				OCRDMA_QP_PARAMS_PATH_MTU_MASK) >>
1474				OCRDMA_QP_PARAMS_PATH_MTU_SHIFT;
1475	qp_attr->path_mig_state = IB_MIG_MIGRATED;
1476	qp_attr->rq_psn = params.hop_lmt_rq_psn & OCRDMA_QP_PARAMS_RQ_PSN_MASK;
1477	qp_attr->sq_psn = params.tclass_sq_psn & OCRDMA_QP_PARAMS_SQ_PSN_MASK;
1478	qp_attr->dest_qp_num =
1479	    params.ack_to_rnr_rtc_dest_qpn & OCRDMA_QP_PARAMS_DEST_QPN_MASK;
1480
1481	qp_attr->qp_access_flags = ocrdma_to_ib_qp_acc_flags(qp->cap_flags);
1482	qp_attr->cap.max_send_wr = qp->sq.max_cnt - 1;
1483	qp_attr->cap.max_recv_wr = qp->rq.max_cnt - 1;
1484	qp_attr->cap.max_send_sge = qp->sq.max_sges;
1485	qp_attr->cap.max_recv_sge = qp->rq.max_sges;
1486	qp_attr->cap.max_inline_data = qp->max_inline_data;
1487	qp_init_attr->cap = qp_attr->cap;
1488	qp_attr->ah_attr.type = RDMA_AH_ATTR_TYPE_ROCE;
1489
1490	rdma_ah_set_grh(&qp_attr->ah_attr, NULL,
1491			params.rnt_rc_sl_fl &
1492			  OCRDMA_QP_PARAMS_FLOW_LABEL_MASK,
1493			qp->sgid_idx,
1494			(params.hop_lmt_rq_psn &
1495			 OCRDMA_QP_PARAMS_HOP_LMT_MASK) >>
1496			 OCRDMA_QP_PARAMS_HOP_LMT_SHIFT,
1497			(params.tclass_sq_psn &
1498			 OCRDMA_QP_PARAMS_TCLASS_MASK) >>
1499			 OCRDMA_QP_PARAMS_TCLASS_SHIFT);
1500	rdma_ah_set_dgid_raw(&qp_attr->ah_attr, &params.dgid[0]);
1501
1502	rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
1503	rdma_ah_set_sl(&qp_attr->ah_attr, (params.rnt_rc_sl_fl &
1504					   OCRDMA_QP_PARAMS_SL_MASK) >>
1505					   OCRDMA_QP_PARAMS_SL_SHIFT);
1506	qp_attr->timeout = (params.ack_to_rnr_rtc_dest_qpn &
1507			    OCRDMA_QP_PARAMS_ACK_TIMEOUT_MASK) >>
1508				OCRDMA_QP_PARAMS_ACK_TIMEOUT_SHIFT;
1509	qp_attr->rnr_retry = (params.ack_to_rnr_rtc_dest_qpn &
1510			      OCRDMA_QP_PARAMS_RNR_RETRY_CNT_MASK) >>
1511				OCRDMA_QP_PARAMS_RNR_RETRY_CNT_SHIFT;
1512	qp_attr->retry_cnt =
1513	    (params.rnt_rc_sl_fl & OCRDMA_QP_PARAMS_RETRY_CNT_MASK) >>
1514		OCRDMA_QP_PARAMS_RETRY_CNT_SHIFT;
1515	qp_attr->min_rnr_timer = 0;
1516	qp_attr->pkey_index = 0;
1517	qp_attr->port_num = 1;
1518	rdma_ah_set_path_bits(&qp_attr->ah_attr, 0);
1519	rdma_ah_set_static_rate(&qp_attr->ah_attr, 0);
1520	qp_attr->alt_pkey_index = 0;
1521	qp_attr->alt_port_num = 0;
1522	qp_attr->alt_timeout = 0;
1523	memset(&qp_attr->alt_ah_attr, 0, sizeof(qp_attr->alt_ah_attr));
1524	qp_state = (params.max_sge_recv_flags & OCRDMA_QP_PARAMS_STATE_MASK) >>
1525		    OCRDMA_QP_PARAMS_STATE_SHIFT;
1526	qp_attr->qp_state = get_ibqp_state(qp_state);
1527	qp_attr->cur_qp_state = qp_attr->qp_state;
1528	qp_attr->sq_draining = (qp_state == OCRDMA_QPS_SQ_DRAINING) ? 1 : 0;
1529	qp_attr->max_dest_rd_atomic =
1530	    params.max_ord_ird >> OCRDMA_QP_PARAMS_MAX_ORD_SHIFT;
1531	qp_attr->max_rd_atomic =
1532	    params.max_ord_ird & OCRDMA_QP_PARAMS_MAX_IRD_MASK;
1533	qp_attr->en_sqd_async_notify = (params.max_sge_recv_flags &
1534				OCRDMA_QP_PARAMS_FLAGS_SQD_ASYNC) ? 1 : 0;
1535	/* Sync driver QP state with FW */
1536	ocrdma_qp_state_change(qp, qp_attr->qp_state, NULL);
1537mbx_err:
1538	return status;
1539}
1540
1541static void ocrdma_srq_toggle_bit(struct ocrdma_srq *srq, unsigned int idx)
1542{
1543	unsigned int i = idx / 32;
1544	u32 mask = (1U << (idx % 32));
1545
1546	srq->idx_bit_fields[i] ^= mask;
1547}
1548
1549static int ocrdma_hwq_free_cnt(struct ocrdma_qp_hwq_info *q)
1550{
1551	return ((q->max_wqe_idx - q->head) + q->tail) % q->max_cnt;
1552}
1553
1554static int is_hw_sq_empty(struct ocrdma_qp *qp)
1555{
1556	return (qp->sq.tail == qp->sq.head);
1557}
1558
1559static int is_hw_rq_empty(struct ocrdma_qp *qp)
1560{
1561	return (qp->rq.tail == qp->rq.head);
1562}
1563
1564static void *ocrdma_hwq_head(struct ocrdma_qp_hwq_info *q)
1565{
1566	return q->va + (q->head * q->entry_size);
1567}
1568
1569static void *ocrdma_hwq_head_from_idx(struct ocrdma_qp_hwq_info *q,
1570				      u32 idx)
1571{
1572	return q->va + (idx * q->entry_size);
1573}
1574
1575static void ocrdma_hwq_inc_head(struct ocrdma_qp_hwq_info *q)
1576{
1577	q->head = (q->head + 1) & q->max_wqe_idx;
1578}
1579
1580static void ocrdma_hwq_inc_tail(struct ocrdma_qp_hwq_info *q)
1581{
1582	q->tail = (q->tail + 1) & q->max_wqe_idx;
1583}
1584
1585/* discard the cqe for a given QP */
1586static void ocrdma_discard_cqes(struct ocrdma_qp *qp, struct ocrdma_cq *cq)
1587{
1588	unsigned long cq_flags;
1589	unsigned long flags;
1590	int discard_cnt = 0;
1591	u32 cur_getp, stop_getp;
1592	struct ocrdma_cqe *cqe;
1593	u32 qpn = 0, wqe_idx = 0;
1594
1595	spin_lock_irqsave(&cq->cq_lock, cq_flags);
1596
1597	/* traverse through the CQEs in the hw CQ,
1598	 * find the matching CQE for a given qp,
1599	 * mark the matching one discarded by clearing qpn.
1600	 * ring the doorbell in the poll_cq() as
1601	 * we don't complete out of order cqe.
1602	 */
1603
1604	cur_getp = cq->getp;
1605	/* find upto when do we reap the cq. */
1606	stop_getp = cur_getp;
1607	do {
1608		if (is_hw_sq_empty(qp) && (!qp->srq && is_hw_rq_empty(qp)))
1609			break;
1610
1611		cqe = cq->va + cur_getp;
1612		/* if (a) done reaping whole hw cq, or
1613		 *    (b) qp_xq becomes empty.
1614		 * then exit
1615		 */
1616		qpn = cqe->cmn.qpn & OCRDMA_CQE_QPN_MASK;
1617		/* if previously discarded cqe found, skip that too. */
1618		/* check for matching qp */
1619		if (qpn == 0 || qpn != qp->id)
1620			goto skip_cqe;
1621
1622		if (is_cqe_for_sq(cqe)) {
1623			ocrdma_hwq_inc_tail(&qp->sq);
1624		} else {
1625			if (qp->srq) {
1626				wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
1627					OCRDMA_CQE_BUFTAG_SHIFT) &
1628					qp->srq->rq.max_wqe_idx;
1629				BUG_ON(wqe_idx < 1);
1630				spin_lock_irqsave(&qp->srq->q_lock, flags);
1631				ocrdma_hwq_inc_tail(&qp->srq->rq);
1632				ocrdma_srq_toggle_bit(qp->srq, wqe_idx - 1);
1633				spin_unlock_irqrestore(&qp->srq->q_lock, flags);
1634
1635			} else {
1636				ocrdma_hwq_inc_tail(&qp->rq);
1637			}
1638		}
1639		/* mark cqe discarded so that it is not picked up later
1640		 * in the poll_cq().
1641		 */
1642		discard_cnt += 1;
1643		cqe->cmn.qpn = 0;
1644skip_cqe:
1645		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
1646	} while (cur_getp != stop_getp);
1647	spin_unlock_irqrestore(&cq->cq_lock, cq_flags);
1648}
1649
1650void ocrdma_del_flush_qp(struct ocrdma_qp *qp)
1651{
1652	int found = false;
1653	unsigned long flags;
1654	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
1655	/* sync with any active CQ poll */
1656
1657	spin_lock_irqsave(&dev->flush_q_lock, flags);
1658	found = ocrdma_is_qp_in_sq_flushlist(qp->sq_cq, qp);
1659	if (found)
1660		list_del(&qp->sq_entry);
1661	if (!qp->srq) {
1662		found = ocrdma_is_qp_in_rq_flushlist(qp->rq_cq, qp);
1663		if (found)
1664			list_del(&qp->rq_entry);
1665	}
1666	spin_unlock_irqrestore(&dev->flush_q_lock, flags);
1667}
1668
1669int ocrdma_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata)
1670{
1671	struct ocrdma_pd *pd;
1672	struct ocrdma_qp *qp;
1673	struct ocrdma_dev *dev;
1674	struct ib_qp_attr attrs;
1675	int attr_mask;
1676	unsigned long flags;
1677
1678	qp = get_ocrdma_qp(ibqp);
1679	dev = get_ocrdma_dev(ibqp->device);
1680
1681	pd = qp->pd;
1682
1683	/* change the QP state to ERROR */
1684	if (qp->state != OCRDMA_QPS_RST) {
1685		attrs.qp_state = IB_QPS_ERR;
1686		attr_mask = IB_QP_STATE;
1687		_ocrdma_modify_qp(ibqp, &attrs, attr_mask);
1688	}
1689	/* ensure that CQEs for newly created QP (whose id may be same with
1690	 * one which just getting destroyed are same), dont get
1691	 * discarded until the old CQEs are discarded.
1692	 */
1693	mutex_lock(&dev->dev_lock);
1694	(void) ocrdma_mbx_destroy_qp(dev, qp);
1695
1696	/*
1697	 * acquire CQ lock while destroy is in progress, in order to
1698	 * protect against proessing in-flight CQEs for this QP.
1699	 */
1700	spin_lock_irqsave(&qp->sq_cq->cq_lock, flags);
1701	if (qp->rq_cq && (qp->rq_cq != qp->sq_cq)) {
1702		spin_lock(&qp->rq_cq->cq_lock);
1703		ocrdma_del_qpn_map(dev, qp);
1704		spin_unlock(&qp->rq_cq->cq_lock);
1705	} else {
1706		ocrdma_del_qpn_map(dev, qp);
1707	}
1708	spin_unlock_irqrestore(&qp->sq_cq->cq_lock, flags);
1709
1710	if (!pd->uctx) {
1711		ocrdma_discard_cqes(qp, qp->sq_cq);
1712		ocrdma_discard_cqes(qp, qp->rq_cq);
1713	}
1714	mutex_unlock(&dev->dev_lock);
1715
1716	if (pd->uctx) {
1717		ocrdma_del_mmap(pd->uctx, (u64) qp->sq.pa,
1718				PAGE_ALIGN(qp->sq.len));
1719		if (!qp->srq)
1720			ocrdma_del_mmap(pd->uctx, (u64) qp->rq.pa,
1721					PAGE_ALIGN(qp->rq.len));
1722	}
1723
1724	ocrdma_del_flush_qp(qp);
1725
1726	kfree(qp->wqe_wr_id_tbl);
1727	kfree(qp->rqe_wr_id_tbl);
1728	kfree(qp);
1729	return 0;
1730}
1731
1732static int ocrdma_copy_srq_uresp(struct ocrdma_dev *dev, struct ocrdma_srq *srq,
1733				struct ib_udata *udata)
1734{
1735	int status;
1736	struct ocrdma_create_srq_uresp uresp;
1737
1738	memset(&uresp, 0, sizeof(uresp));
1739	uresp.rq_dbid = srq->rq.dbid;
1740	uresp.num_rq_pages = 1;
1741	uresp.rq_page_addr[0] = virt_to_phys(srq->rq.va);
1742	uresp.rq_page_size = srq->rq.len;
1743	uresp.db_page_addr = dev->nic_info.unmapped_db +
1744	    (srq->pd->id * dev->nic_info.db_page_size);
1745	uresp.db_page_size = dev->nic_info.db_page_size;
1746	uresp.num_rqe_allocated = srq->rq.max_cnt;
1747	if (ocrdma_get_asic_type(dev) == OCRDMA_ASIC_GEN_SKH_R) {
1748		uresp.db_rq_offset = OCRDMA_DB_GEN2_RQ_OFFSET;
1749		uresp.db_shift = 24;
1750	} else {
1751		uresp.db_rq_offset = OCRDMA_DB_RQ_OFFSET;
1752		uresp.db_shift = 16;
1753	}
1754
1755	status = ib_copy_to_udata(udata, &uresp, sizeof(uresp));
1756	if (status)
1757		return status;
1758	status = ocrdma_add_mmap(srq->pd->uctx, uresp.rq_page_addr[0],
1759				 uresp.rq_page_size);
1760	if (status)
1761		return status;
1762	return status;
1763}
1764
1765int ocrdma_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init_attr,
1766		      struct ib_udata *udata)
1767{
1768	int status;
1769	struct ocrdma_pd *pd = get_ocrdma_pd(ibsrq->pd);
1770	struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1771	struct ocrdma_srq *srq = get_ocrdma_srq(ibsrq);
1772
1773	if (init_attr->attr.max_sge > dev->attr.max_recv_sge)
1774		return -EINVAL;
1775	if (init_attr->attr.max_wr > dev->attr.max_rqe)
1776		return -EINVAL;
1777
1778	spin_lock_init(&srq->q_lock);
1779	srq->pd = pd;
1780	srq->db = dev->nic_info.db + (pd->id * dev->nic_info.db_page_size);
1781	status = ocrdma_mbx_create_srq(dev, srq, init_attr, pd);
1782	if (status)
1783		return status;
1784
1785	if (!udata) {
1786		srq->rqe_wr_id_tbl = kcalloc(srq->rq.max_cnt, sizeof(u64),
1787					     GFP_KERNEL);
1788		if (!srq->rqe_wr_id_tbl) {
1789			status = -ENOMEM;
1790			goto arm_err;
1791		}
1792
1793		srq->bit_fields_len = (srq->rq.max_cnt / 32) +
1794		    (srq->rq.max_cnt % 32 ? 1 : 0);
1795		srq->idx_bit_fields =
1796		    kmalloc_array(srq->bit_fields_len, sizeof(u32),
1797				  GFP_KERNEL);
1798		if (!srq->idx_bit_fields) {
1799			status = -ENOMEM;
1800			goto arm_err;
1801		}
1802		memset(srq->idx_bit_fields, 0xff,
1803		       srq->bit_fields_len * sizeof(u32));
1804	}
1805
1806	if (init_attr->attr.srq_limit) {
1807		status = ocrdma_mbx_modify_srq(srq, &init_attr->attr);
1808		if (status)
1809			goto arm_err;
1810	}
1811
1812	if (udata) {
1813		status = ocrdma_copy_srq_uresp(dev, srq, udata);
1814		if (status)
1815			goto arm_err;
1816	}
1817
1818	return 0;
1819
1820arm_err:
1821	ocrdma_mbx_destroy_srq(dev, srq);
1822	kfree(srq->rqe_wr_id_tbl);
1823	kfree(srq->idx_bit_fields);
1824	return status;
1825}
1826
1827int ocrdma_modify_srq(struct ib_srq *ibsrq,
1828		      struct ib_srq_attr *srq_attr,
1829		      enum ib_srq_attr_mask srq_attr_mask,
1830		      struct ib_udata *udata)
1831{
1832	int status;
1833	struct ocrdma_srq *srq;
1834
1835	srq = get_ocrdma_srq(ibsrq);
1836	if (srq_attr_mask & IB_SRQ_MAX_WR)
1837		status = -EINVAL;
1838	else
1839		status = ocrdma_mbx_modify_srq(srq, srq_attr);
1840	return status;
1841}
1842
1843int ocrdma_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *srq_attr)
1844{
1845	int status;
1846	struct ocrdma_srq *srq;
1847
1848	srq = get_ocrdma_srq(ibsrq);
1849	status = ocrdma_mbx_query_srq(srq, srq_attr);
1850	return status;
1851}
1852
1853int ocrdma_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata)
1854{
1855	struct ocrdma_srq *srq;
1856	struct ocrdma_dev *dev = get_ocrdma_dev(ibsrq->device);
1857
1858	srq = get_ocrdma_srq(ibsrq);
1859
1860	ocrdma_mbx_destroy_srq(dev, srq);
1861
1862	if (srq->pd->uctx)
1863		ocrdma_del_mmap(srq->pd->uctx, (u64) srq->rq.pa,
1864				PAGE_ALIGN(srq->rq.len));
1865
1866	kfree(srq->idx_bit_fields);
1867	kfree(srq->rqe_wr_id_tbl);
1868	return 0;
1869}
1870
1871/* unprivileged verbs and their support functions. */
1872static void ocrdma_build_ud_hdr(struct ocrdma_qp *qp,
1873				struct ocrdma_hdr_wqe *hdr,
1874				const struct ib_send_wr *wr)
1875{
1876	struct ocrdma_ewqe_ud_hdr *ud_hdr =
1877		(struct ocrdma_ewqe_ud_hdr *)(hdr + 1);
1878	struct ocrdma_ah *ah = get_ocrdma_ah(ud_wr(wr)->ah);
1879
1880	ud_hdr->rsvd_dest_qpn = ud_wr(wr)->remote_qpn;
1881	if (qp->qp_type == IB_QPT_GSI)
1882		ud_hdr->qkey = qp->qkey;
1883	else
1884		ud_hdr->qkey = ud_wr(wr)->remote_qkey;
1885	ud_hdr->rsvd_ahid = ah->id;
1886	ud_hdr->hdr_type = ah->hdr_type;
1887	if (ah->av->valid & OCRDMA_AV_VLAN_VALID)
1888		hdr->cw |= (OCRDMA_FLAG_AH_VLAN_PR << OCRDMA_WQE_FLAGS_SHIFT);
1889}
1890
1891static void ocrdma_build_sges(struct ocrdma_hdr_wqe *hdr,
1892			      struct ocrdma_sge *sge, int num_sge,
1893			      struct ib_sge *sg_list)
1894{
1895	int i;
1896
1897	for (i = 0; i < num_sge; i++) {
1898		sge[i].lrkey = sg_list[i].lkey;
1899		sge[i].addr_lo = sg_list[i].addr;
1900		sge[i].addr_hi = upper_32_bits(sg_list[i].addr);
1901		sge[i].len = sg_list[i].length;
1902		hdr->total_len += sg_list[i].length;
1903	}
1904	if (num_sge == 0)
1905		memset(sge, 0, sizeof(*sge));
1906}
1907
1908static inline uint32_t ocrdma_sglist_len(struct ib_sge *sg_list, int num_sge)
1909{
1910	uint32_t total_len = 0, i;
1911
1912	for (i = 0; i < num_sge; i++)
1913		total_len += sg_list[i].length;
1914	return total_len;
1915}
1916
1917
1918static int ocrdma_build_inline_sges(struct ocrdma_qp *qp,
1919				    struct ocrdma_hdr_wqe *hdr,
1920				    struct ocrdma_sge *sge,
1921				    const struct ib_send_wr *wr, u32 wqe_size)
1922{
1923	int i;
1924	char *dpp_addr;
1925
1926	if (wr->send_flags & IB_SEND_INLINE && qp->qp_type != IB_QPT_UD) {
1927		hdr->total_len = ocrdma_sglist_len(wr->sg_list, wr->num_sge);
1928		if (unlikely(hdr->total_len > qp->max_inline_data)) {
1929			pr_err("%s() supported_len=0x%x,\n"
1930			       " unsupported len req=0x%x\n", __func__,
1931				qp->max_inline_data, hdr->total_len);
1932			return -EINVAL;
1933		}
1934		dpp_addr = (char *)sge;
1935		for (i = 0; i < wr->num_sge; i++) {
1936			memcpy(dpp_addr,
1937			       (void *)(unsigned long)wr->sg_list[i].addr,
1938			       wr->sg_list[i].length);
1939			dpp_addr += wr->sg_list[i].length;
1940		}
1941
1942		wqe_size += roundup(hdr->total_len, OCRDMA_WQE_ALIGN_BYTES);
1943		if (0 == hdr->total_len)
1944			wqe_size += sizeof(struct ocrdma_sge);
1945		hdr->cw |= (OCRDMA_TYPE_INLINE << OCRDMA_WQE_TYPE_SHIFT);
1946	} else {
1947		ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
1948		if (wr->num_sge)
1949			wqe_size += (wr->num_sge * sizeof(struct ocrdma_sge));
1950		else
1951			wqe_size += sizeof(struct ocrdma_sge);
1952		hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
1953	}
1954	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
1955	return 0;
1956}
1957
1958static int ocrdma_build_send(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1959			     const struct ib_send_wr *wr)
1960{
1961	int status;
1962	struct ocrdma_sge *sge;
1963	u32 wqe_size = sizeof(*hdr);
1964
1965	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
1966		ocrdma_build_ud_hdr(qp, hdr, wr);
1967		sge = (struct ocrdma_sge *)(hdr + 2);
1968		wqe_size += sizeof(struct ocrdma_ewqe_ud_hdr);
1969	} else {
1970		sge = (struct ocrdma_sge *)(hdr + 1);
1971	}
1972
1973	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1974	return status;
1975}
1976
1977static int ocrdma_build_write(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1978			      const struct ib_send_wr *wr)
1979{
1980	int status;
1981	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1982	struct ocrdma_sge *sge = ext_rw + 1;
1983	u32 wqe_size = sizeof(*hdr) + sizeof(*ext_rw);
1984
1985	status = ocrdma_build_inline_sges(qp, hdr, sge, wr, wqe_size);
1986	if (status)
1987		return status;
1988	ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
1989	ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
1990	ext_rw->lrkey = rdma_wr(wr)->rkey;
1991	ext_rw->len = hdr->total_len;
1992	return 0;
1993}
1994
1995static void ocrdma_build_read(struct ocrdma_qp *qp, struct ocrdma_hdr_wqe *hdr,
1996			      const struct ib_send_wr *wr)
1997{
1998	struct ocrdma_sge *ext_rw = (struct ocrdma_sge *)(hdr + 1);
1999	struct ocrdma_sge *sge = ext_rw + 1;
2000	u32 wqe_size = ((wr->num_sge + 1) * sizeof(struct ocrdma_sge)) +
2001	    sizeof(struct ocrdma_hdr_wqe);
2002
2003	ocrdma_build_sges(hdr, sge, wr->num_sge, wr->sg_list);
2004	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2005	hdr->cw |= (OCRDMA_READ << OCRDMA_WQE_OPCODE_SHIFT);
2006	hdr->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2007
2008	ext_rw->addr_lo = rdma_wr(wr)->remote_addr;
2009	ext_rw->addr_hi = upper_32_bits(rdma_wr(wr)->remote_addr);
2010	ext_rw->lrkey = rdma_wr(wr)->rkey;
2011	ext_rw->len = hdr->total_len;
2012}
2013
2014static int get_encoded_page_size(int pg_sz)
2015{
2016	/* Max size is 256M 4096 << 16 */
2017	int i = 0;
2018	for (; i < 17; i++)
2019		if (pg_sz == (4096 << i))
2020			break;
2021	return i;
2022}
2023
2024static int ocrdma_build_reg(struct ocrdma_qp *qp,
2025			    struct ocrdma_hdr_wqe *hdr,
2026			    const struct ib_reg_wr *wr)
2027{
2028	u64 fbo;
2029	struct ocrdma_ewqe_fr *fast_reg = (struct ocrdma_ewqe_fr *)(hdr + 1);
2030	struct ocrdma_mr *mr = get_ocrdma_mr(wr->mr);
2031	struct ocrdma_pbl *pbl_tbl = mr->hwmr.pbl_table;
2032	struct ocrdma_pbe *pbe;
2033	u32 wqe_size = sizeof(*fast_reg) + sizeof(*hdr);
2034	int num_pbes = 0, i;
2035
2036	wqe_size = roundup(wqe_size, OCRDMA_WQE_ALIGN_BYTES);
2037
2038	hdr->cw |= (OCRDMA_FR_MR << OCRDMA_WQE_OPCODE_SHIFT);
2039	hdr->cw |= ((wqe_size / OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT);
2040
2041	if (wr->access & IB_ACCESS_LOCAL_WRITE)
2042		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_LOCAL_WR;
2043	if (wr->access & IB_ACCESS_REMOTE_WRITE)
2044		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_WR;
2045	if (wr->access & IB_ACCESS_REMOTE_READ)
2046		hdr->rsvd_lkey_flags |= OCRDMA_LKEY_FLAG_REMOTE_RD;
2047	hdr->lkey = wr->key;
2048	hdr->total_len = mr->ibmr.length;
2049
2050	fbo = mr->ibmr.iova - mr->pages[0];
2051
2052	fast_reg->va_hi = upper_32_bits(mr->ibmr.iova);
2053	fast_reg->va_lo = (u32) (mr->ibmr.iova & 0xffffffff);
2054	fast_reg->fbo_hi = upper_32_bits(fbo);
2055	fast_reg->fbo_lo = (u32) fbo & 0xffffffff;
2056	fast_reg->num_sges = mr->npages;
2057	fast_reg->size_sge = get_encoded_page_size(mr->ibmr.page_size);
2058
2059	pbe = pbl_tbl->va;
2060	for (i = 0; i < mr->npages; i++) {
2061		u64 buf_addr = mr->pages[i];
2062
2063		pbe->pa_lo = cpu_to_le32((u32) (buf_addr & PAGE_MASK));
2064		pbe->pa_hi = cpu_to_le32((u32) upper_32_bits(buf_addr));
2065		num_pbes += 1;
2066		pbe++;
2067
2068		/* if the pbl is full storing the pbes,
2069		 * move to next pbl.
2070		*/
2071		if (num_pbes == (mr->hwmr.pbl_size/sizeof(u64))) {
2072			pbl_tbl++;
2073			pbe = (struct ocrdma_pbe *)pbl_tbl->va;
2074		}
2075	}
2076
2077	return 0;
2078}
2079
2080static void ocrdma_ring_sq_db(struct ocrdma_qp *qp)
2081{
2082	u32 val = qp->sq.dbid | (1 << OCRDMA_DB_SQ_SHIFT);
2083
2084	iowrite32(val, qp->sq_db);
2085}
2086
2087int ocrdma_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr,
2088		     const struct ib_send_wr **bad_wr)
2089{
2090	int status = 0;
2091	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2092	struct ocrdma_hdr_wqe *hdr;
2093	unsigned long flags;
2094
2095	spin_lock_irqsave(&qp->q_lock, flags);
2096	if (qp->state != OCRDMA_QPS_RTS && qp->state != OCRDMA_QPS_SQD) {
2097		spin_unlock_irqrestore(&qp->q_lock, flags);
2098		*bad_wr = wr;
2099		return -EINVAL;
2100	}
2101
2102	while (wr) {
2103		if (qp->qp_type == IB_QPT_UD &&
2104		    (wr->opcode != IB_WR_SEND &&
2105		     wr->opcode != IB_WR_SEND_WITH_IMM)) {
2106			*bad_wr = wr;
2107			status = -EINVAL;
2108			break;
2109		}
2110		if (ocrdma_hwq_free_cnt(&qp->sq) == 0 ||
2111		    wr->num_sge > qp->sq.max_sges) {
2112			*bad_wr = wr;
2113			status = -ENOMEM;
2114			break;
2115		}
2116		hdr = ocrdma_hwq_head(&qp->sq);
2117		hdr->cw = 0;
2118		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2119			hdr->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2120		if (wr->send_flags & IB_SEND_FENCE)
2121			hdr->cw |=
2122			    (OCRDMA_FLAG_FENCE_L << OCRDMA_WQE_FLAGS_SHIFT);
2123		if (wr->send_flags & IB_SEND_SOLICITED)
2124			hdr->cw |=
2125			    (OCRDMA_FLAG_SOLICIT << OCRDMA_WQE_FLAGS_SHIFT);
2126		hdr->total_len = 0;
2127		switch (wr->opcode) {
2128		case IB_WR_SEND_WITH_IMM:
2129			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2130			hdr->immdt = ntohl(wr->ex.imm_data);
2131			fallthrough;
2132		case IB_WR_SEND:
2133			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2134			ocrdma_build_send(qp, hdr, wr);
2135			break;
2136		case IB_WR_SEND_WITH_INV:
2137			hdr->cw |= (OCRDMA_FLAG_INV << OCRDMA_WQE_FLAGS_SHIFT);
2138			hdr->cw |= (OCRDMA_SEND << OCRDMA_WQE_OPCODE_SHIFT);
2139			hdr->lkey = wr->ex.invalidate_rkey;
2140			status = ocrdma_build_send(qp, hdr, wr);
2141			break;
2142		case IB_WR_RDMA_WRITE_WITH_IMM:
2143			hdr->cw |= (OCRDMA_FLAG_IMM << OCRDMA_WQE_FLAGS_SHIFT);
2144			hdr->immdt = ntohl(wr->ex.imm_data);
2145			fallthrough;
2146		case IB_WR_RDMA_WRITE:
2147			hdr->cw |= (OCRDMA_WRITE << OCRDMA_WQE_OPCODE_SHIFT);
2148			status = ocrdma_build_write(qp, hdr, wr);
2149			break;
2150		case IB_WR_RDMA_READ:
2151			ocrdma_build_read(qp, hdr, wr);
2152			break;
2153		case IB_WR_LOCAL_INV:
2154			hdr->cw |=
2155			    (OCRDMA_LKEY_INV << OCRDMA_WQE_OPCODE_SHIFT);
2156			hdr->cw |= ((sizeof(struct ocrdma_hdr_wqe) +
2157					sizeof(struct ocrdma_sge)) /
2158				OCRDMA_WQE_STRIDE) << OCRDMA_WQE_SIZE_SHIFT;
2159			hdr->lkey = wr->ex.invalidate_rkey;
2160			break;
2161		case IB_WR_REG_MR:
2162			status = ocrdma_build_reg(qp, hdr, reg_wr(wr));
2163			break;
2164		default:
2165			status = -EINVAL;
2166			break;
2167		}
2168		if (status) {
2169			*bad_wr = wr;
2170			break;
2171		}
2172		if (wr->send_flags & IB_SEND_SIGNALED || qp->signaled)
2173			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 1;
2174		else
2175			qp->wqe_wr_id_tbl[qp->sq.head].signaled = 0;
2176		qp->wqe_wr_id_tbl[qp->sq.head].wrid = wr->wr_id;
2177		ocrdma_cpu_to_le32(hdr, ((hdr->cw >> OCRDMA_WQE_SIZE_SHIFT) &
2178				   OCRDMA_WQE_SIZE_MASK) * OCRDMA_WQE_STRIDE);
2179		/* make sure wqe is written before adapter can access it */
2180		wmb();
2181		/* inform hw to start processing it */
2182		ocrdma_ring_sq_db(qp);
2183
2184		/* update pointer, counter for next wr */
2185		ocrdma_hwq_inc_head(&qp->sq);
2186		wr = wr->next;
2187	}
2188	spin_unlock_irqrestore(&qp->q_lock, flags);
2189	return status;
2190}
2191
2192static void ocrdma_ring_rq_db(struct ocrdma_qp *qp)
2193{
2194	u32 val = qp->rq.dbid | (1 << OCRDMA_DB_RQ_SHIFT);
2195
2196	iowrite32(val, qp->rq_db);
2197}
2198
2199static void ocrdma_build_rqe(struct ocrdma_hdr_wqe *rqe,
2200			     const struct ib_recv_wr *wr, u16 tag)
2201{
2202	u32 wqe_size = 0;
2203	struct ocrdma_sge *sge;
2204	if (wr->num_sge)
2205		wqe_size = (wr->num_sge * sizeof(*sge)) + sizeof(*rqe);
2206	else
2207		wqe_size = sizeof(*sge) + sizeof(*rqe);
2208
2209	rqe->cw = ((wqe_size / OCRDMA_WQE_STRIDE) <<
2210				OCRDMA_WQE_SIZE_SHIFT);
2211	rqe->cw |= (OCRDMA_FLAG_SIG << OCRDMA_WQE_FLAGS_SHIFT);
2212	rqe->cw |= (OCRDMA_TYPE_LKEY << OCRDMA_WQE_TYPE_SHIFT);
2213	rqe->total_len = 0;
2214	rqe->rsvd_tag = tag;
2215	sge = (struct ocrdma_sge *)(rqe + 1);
2216	ocrdma_build_sges(rqe, sge, wr->num_sge, wr->sg_list);
2217	ocrdma_cpu_to_le32(rqe, wqe_size);
2218}
2219
2220int ocrdma_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr,
2221		     const struct ib_recv_wr **bad_wr)
2222{
2223	int status = 0;
2224	unsigned long flags;
2225	struct ocrdma_qp *qp = get_ocrdma_qp(ibqp);
2226	struct ocrdma_hdr_wqe *rqe;
2227
2228	spin_lock_irqsave(&qp->q_lock, flags);
2229	if (qp->state == OCRDMA_QPS_RST || qp->state == OCRDMA_QPS_ERR) {
2230		spin_unlock_irqrestore(&qp->q_lock, flags);
2231		*bad_wr = wr;
2232		return -EINVAL;
2233	}
2234	while (wr) {
2235		if (ocrdma_hwq_free_cnt(&qp->rq) == 0 ||
2236		    wr->num_sge > qp->rq.max_sges) {
2237			*bad_wr = wr;
2238			status = -ENOMEM;
2239			break;
2240		}
2241		rqe = ocrdma_hwq_head(&qp->rq);
2242		ocrdma_build_rqe(rqe, wr, 0);
2243
2244		qp->rqe_wr_id_tbl[qp->rq.head] = wr->wr_id;
2245		/* make sure rqe is written before adapter can access it */
2246		wmb();
2247
2248		/* inform hw to start processing it */
2249		ocrdma_ring_rq_db(qp);
2250
2251		/* update pointer, counter for next wr */
2252		ocrdma_hwq_inc_head(&qp->rq);
2253		wr = wr->next;
2254	}
2255	spin_unlock_irqrestore(&qp->q_lock, flags);
2256	return status;
2257}
2258
2259/* cqe for srq's rqe can potentially arrive out of order.
2260 * index gives the entry in the shadow table where to store
2261 * the wr_id. tag/index is returned in cqe to reference back
2262 * for a given rqe.
2263 */
2264static int ocrdma_srq_get_idx(struct ocrdma_srq *srq)
2265{
2266	int row = 0;
2267	int indx = 0;
2268
2269	for (row = 0; row < srq->bit_fields_len; row++) {
2270		if (srq->idx_bit_fields[row]) {
2271			indx = ffs(srq->idx_bit_fields[row]);
2272			indx = (row * 32) + (indx - 1);
2273			BUG_ON(indx >= srq->rq.max_cnt);
2274			ocrdma_srq_toggle_bit(srq, indx);
2275			break;
2276		}
2277	}
2278
2279	BUG_ON(row == srq->bit_fields_len);
2280	return indx + 1; /* Use from index 1 */
2281}
2282
2283static void ocrdma_ring_srq_db(struct ocrdma_srq *srq)
2284{
2285	u32 val = srq->rq.dbid | (1 << 16);
2286
2287	iowrite32(val, srq->db + OCRDMA_DB_GEN2_SRQ_OFFSET);
2288}
2289
2290int ocrdma_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr,
2291			 const struct ib_recv_wr **bad_wr)
2292{
2293	int status = 0;
2294	unsigned long flags;
2295	struct ocrdma_srq *srq;
2296	struct ocrdma_hdr_wqe *rqe;
2297	u16 tag;
2298
2299	srq = get_ocrdma_srq(ibsrq);
2300
2301	spin_lock_irqsave(&srq->q_lock, flags);
2302	while (wr) {
2303		if (ocrdma_hwq_free_cnt(&srq->rq) == 0 ||
2304		    wr->num_sge > srq->rq.max_sges) {
2305			status = -ENOMEM;
2306			*bad_wr = wr;
2307			break;
2308		}
2309		tag = ocrdma_srq_get_idx(srq);
2310		rqe = ocrdma_hwq_head(&srq->rq);
2311		ocrdma_build_rqe(rqe, wr, tag);
2312
2313		srq->rqe_wr_id_tbl[tag] = wr->wr_id;
2314		/* make sure rqe is written before adapter can perform DMA */
2315		wmb();
2316		/* inform hw to start processing it */
2317		ocrdma_ring_srq_db(srq);
2318		/* update pointer, counter for next wr */
2319		ocrdma_hwq_inc_head(&srq->rq);
2320		wr = wr->next;
2321	}
2322	spin_unlock_irqrestore(&srq->q_lock, flags);
2323	return status;
2324}
2325
2326static enum ib_wc_status ocrdma_to_ibwc_err(u16 status)
2327{
2328	enum ib_wc_status ibwc_status;
2329
2330	switch (status) {
2331	case OCRDMA_CQE_GENERAL_ERR:
2332		ibwc_status = IB_WC_GENERAL_ERR;
2333		break;
2334	case OCRDMA_CQE_LOC_LEN_ERR:
2335		ibwc_status = IB_WC_LOC_LEN_ERR;
2336		break;
2337	case OCRDMA_CQE_LOC_QP_OP_ERR:
2338		ibwc_status = IB_WC_LOC_QP_OP_ERR;
2339		break;
2340	case OCRDMA_CQE_LOC_EEC_OP_ERR:
2341		ibwc_status = IB_WC_LOC_EEC_OP_ERR;
2342		break;
2343	case OCRDMA_CQE_LOC_PROT_ERR:
2344		ibwc_status = IB_WC_LOC_PROT_ERR;
2345		break;
2346	case OCRDMA_CQE_WR_FLUSH_ERR:
2347		ibwc_status = IB_WC_WR_FLUSH_ERR;
2348		break;
2349	case OCRDMA_CQE_MW_BIND_ERR:
2350		ibwc_status = IB_WC_MW_BIND_ERR;
2351		break;
2352	case OCRDMA_CQE_BAD_RESP_ERR:
2353		ibwc_status = IB_WC_BAD_RESP_ERR;
2354		break;
2355	case OCRDMA_CQE_LOC_ACCESS_ERR:
2356		ibwc_status = IB_WC_LOC_ACCESS_ERR;
2357		break;
2358	case OCRDMA_CQE_REM_INV_REQ_ERR:
2359		ibwc_status = IB_WC_REM_INV_REQ_ERR;
2360		break;
2361	case OCRDMA_CQE_REM_ACCESS_ERR:
2362		ibwc_status = IB_WC_REM_ACCESS_ERR;
2363		break;
2364	case OCRDMA_CQE_REM_OP_ERR:
2365		ibwc_status = IB_WC_REM_OP_ERR;
2366		break;
2367	case OCRDMA_CQE_RETRY_EXC_ERR:
2368		ibwc_status = IB_WC_RETRY_EXC_ERR;
2369		break;
2370	case OCRDMA_CQE_RNR_RETRY_EXC_ERR:
2371		ibwc_status = IB_WC_RNR_RETRY_EXC_ERR;
2372		break;
2373	case OCRDMA_CQE_LOC_RDD_VIOL_ERR:
2374		ibwc_status = IB_WC_LOC_RDD_VIOL_ERR;
2375		break;
2376	case OCRDMA_CQE_REM_INV_RD_REQ_ERR:
2377		ibwc_status = IB_WC_REM_INV_RD_REQ_ERR;
2378		break;
2379	case OCRDMA_CQE_REM_ABORT_ERR:
2380		ibwc_status = IB_WC_REM_ABORT_ERR;
2381		break;
2382	case OCRDMA_CQE_INV_EECN_ERR:
2383		ibwc_status = IB_WC_INV_EECN_ERR;
2384		break;
2385	case OCRDMA_CQE_INV_EEC_STATE_ERR:
2386		ibwc_status = IB_WC_INV_EEC_STATE_ERR;
2387		break;
2388	case OCRDMA_CQE_FATAL_ERR:
2389		ibwc_status = IB_WC_FATAL_ERR;
2390		break;
2391	case OCRDMA_CQE_RESP_TIMEOUT_ERR:
2392		ibwc_status = IB_WC_RESP_TIMEOUT_ERR;
2393		break;
2394	default:
2395		ibwc_status = IB_WC_GENERAL_ERR;
2396		break;
2397	}
2398	return ibwc_status;
2399}
2400
2401static void ocrdma_update_wc(struct ocrdma_qp *qp, struct ib_wc *ibwc,
2402		      u32 wqe_idx)
2403{
2404	struct ocrdma_hdr_wqe *hdr;
2405	struct ocrdma_sge *rw;
2406	int opcode;
2407
2408	hdr = ocrdma_hwq_head_from_idx(&qp->sq, wqe_idx);
2409
2410	ibwc->wr_id = qp->wqe_wr_id_tbl[wqe_idx].wrid;
2411	/* Undo the hdr->cw swap */
2412	opcode = le32_to_cpu(hdr->cw) & OCRDMA_WQE_OPCODE_MASK;
2413	switch (opcode) {
2414	case OCRDMA_WRITE:
2415		ibwc->opcode = IB_WC_RDMA_WRITE;
2416		break;
2417	case OCRDMA_READ:
2418		rw = (struct ocrdma_sge *)(hdr + 1);
2419		ibwc->opcode = IB_WC_RDMA_READ;
2420		ibwc->byte_len = rw->len;
2421		break;
2422	case OCRDMA_SEND:
2423		ibwc->opcode = IB_WC_SEND;
2424		break;
2425	case OCRDMA_FR_MR:
2426		ibwc->opcode = IB_WC_REG_MR;
2427		break;
2428	case OCRDMA_LKEY_INV:
2429		ibwc->opcode = IB_WC_LOCAL_INV;
2430		break;
2431	default:
2432		ibwc->status = IB_WC_GENERAL_ERR;
2433		pr_err("%s() invalid opcode received = 0x%x\n",
2434		       __func__, hdr->cw & OCRDMA_WQE_OPCODE_MASK);
2435		break;
2436	}
2437}
2438
2439static void ocrdma_set_cqe_status_flushed(struct ocrdma_qp *qp,
2440						struct ocrdma_cqe *cqe)
2441{
2442	if (is_cqe_for_sq(cqe)) {
2443		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2444				cqe->flags_status_srcqpn) &
2445					~OCRDMA_CQE_STATUS_MASK);
2446		cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2447				cqe->flags_status_srcqpn) |
2448				(OCRDMA_CQE_WR_FLUSH_ERR <<
2449					OCRDMA_CQE_STATUS_SHIFT));
2450	} else {
2451		if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2452			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2453					cqe->flags_status_srcqpn) &
2454						~OCRDMA_CQE_UD_STATUS_MASK);
2455			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2456					cqe->flags_status_srcqpn) |
2457					(OCRDMA_CQE_WR_FLUSH_ERR <<
2458						OCRDMA_CQE_UD_STATUS_SHIFT));
2459		} else {
2460			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2461					cqe->flags_status_srcqpn) &
2462						~OCRDMA_CQE_STATUS_MASK);
2463			cqe->flags_status_srcqpn = cpu_to_le32(le32_to_cpu(
2464					cqe->flags_status_srcqpn) |
2465					(OCRDMA_CQE_WR_FLUSH_ERR <<
2466						OCRDMA_CQE_STATUS_SHIFT));
2467		}
2468	}
2469}
2470
2471static bool ocrdma_update_err_cqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2472				  struct ocrdma_qp *qp, int status)
2473{
2474	bool expand = false;
2475
2476	ibwc->byte_len = 0;
2477	ibwc->qp = &qp->ibqp;
2478	ibwc->status = ocrdma_to_ibwc_err(status);
2479
2480	ocrdma_flush_qp(qp);
2481	ocrdma_qp_state_change(qp, IB_QPS_ERR, NULL);
2482
2483	/* if wqe/rqe pending for which cqe needs to be returned,
2484	 * trigger inflating it.
2485	 */
2486	if (!is_hw_rq_empty(qp) || !is_hw_sq_empty(qp)) {
2487		expand = true;
2488		ocrdma_set_cqe_status_flushed(qp, cqe);
2489	}
2490	return expand;
2491}
2492
2493static int ocrdma_update_err_rcqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2494				  struct ocrdma_qp *qp, int status)
2495{
2496	ibwc->opcode = IB_WC_RECV;
2497	ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2498	ocrdma_hwq_inc_tail(&qp->rq);
2499
2500	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2501}
2502
2503static int ocrdma_update_err_scqe(struct ib_wc *ibwc, struct ocrdma_cqe *cqe,
2504				  struct ocrdma_qp *qp, int status)
2505{
2506	ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2507	ocrdma_hwq_inc_tail(&qp->sq);
2508
2509	return ocrdma_update_err_cqe(ibwc, cqe, qp, status);
2510}
2511
2512
2513static bool ocrdma_poll_err_scqe(struct ocrdma_qp *qp,
2514				 struct ocrdma_cqe *cqe, struct ib_wc *ibwc,
2515				 bool *polled, bool *stop)
2516{
2517	bool expand;
2518	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2519	int status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2520		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2521	if (status < OCRDMA_MAX_CQE_ERR)
2522		atomic_inc(&dev->cqe_err_stats[status]);
2523
2524	/* when hw sq is empty, but rq is not empty, so we continue
2525	 * to keep the cqe in order to get the cq event again.
2526	 */
2527	if (is_hw_sq_empty(qp) && !is_hw_rq_empty(qp)) {
2528		/* when cq for rq and sq is same, it is safe to return
2529		 * flush cqe for RQEs.
2530		 */
2531		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2532			*polled = true;
2533			status = OCRDMA_CQE_WR_FLUSH_ERR;
2534			expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2535		} else {
2536			/* stop processing further cqe as this cqe is used for
2537			 * triggering cq event on buddy cq of RQ.
2538			 * When QP is destroyed, this cqe will be removed
2539			 * from the cq's hardware q.
2540			 */
2541			*polled = false;
2542			*stop = true;
2543			expand = false;
2544		}
2545	} else if (is_hw_sq_empty(qp)) {
2546		/* Do nothing */
2547		expand = false;
2548		*polled = false;
2549		*stop = false;
2550	} else {
2551		*polled = true;
2552		expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2553	}
2554	return expand;
2555}
2556
2557static bool ocrdma_poll_success_scqe(struct ocrdma_qp *qp,
2558				     struct ocrdma_cqe *cqe,
2559				     struct ib_wc *ibwc, bool *polled)
2560{
2561	bool expand = false;
2562	int tail = qp->sq.tail;
2563	u32 wqe_idx;
2564
2565	if (!qp->wqe_wr_id_tbl[tail].signaled) {
2566		*polled = false;    /* WC cannot be consumed yet */
2567	} else {
2568		ibwc->status = IB_WC_SUCCESS;
2569		ibwc->wc_flags = 0;
2570		ibwc->qp = &qp->ibqp;
2571		ocrdma_update_wc(qp, ibwc, tail);
2572		*polled = true;
2573	}
2574	wqe_idx = (le32_to_cpu(cqe->wq.wqeidx) &
2575			OCRDMA_CQE_WQEIDX_MASK) & qp->sq.max_wqe_idx;
2576	if (tail != wqe_idx)
2577		expand = true; /* Coalesced CQE can't be consumed yet */
2578
2579	ocrdma_hwq_inc_tail(&qp->sq);
2580	return expand;
2581}
2582
2583static bool ocrdma_poll_scqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2584			     struct ib_wc *ibwc, bool *polled, bool *stop)
2585{
2586	int status;
2587	bool expand;
2588
2589	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2590		OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2591
2592	if (status == OCRDMA_CQE_SUCCESS)
2593		expand = ocrdma_poll_success_scqe(qp, cqe, ibwc, polled);
2594	else
2595		expand = ocrdma_poll_err_scqe(qp, cqe, ibwc, polled, stop);
2596	return expand;
2597}
2598
2599static int ocrdma_update_ud_rcqe(struct ocrdma_dev *dev, struct ib_wc *ibwc,
2600				 struct ocrdma_cqe *cqe)
2601{
2602	int status;
2603	u16 hdr_type = 0;
2604
2605	status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2606		OCRDMA_CQE_UD_STATUS_MASK) >> OCRDMA_CQE_UD_STATUS_SHIFT;
2607	ibwc->src_qp = le32_to_cpu(cqe->flags_status_srcqpn) &
2608						OCRDMA_CQE_SRCQP_MASK;
2609	ibwc->pkey_index = 0;
2610	ibwc->wc_flags = IB_WC_GRH;
2611	ibwc->byte_len = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2612			  OCRDMA_CQE_UD_XFER_LEN_SHIFT) &
2613			  OCRDMA_CQE_UD_XFER_LEN_MASK;
2614
2615	if (ocrdma_is_udp_encap_supported(dev)) {
2616		hdr_type = (le32_to_cpu(cqe->ud.rxlen_pkey) >>
2617			    OCRDMA_CQE_UD_L3TYPE_SHIFT) &
2618			    OCRDMA_CQE_UD_L3TYPE_MASK;
2619		ibwc->wc_flags |= IB_WC_WITH_NETWORK_HDR_TYPE;
2620		ibwc->network_hdr_type = hdr_type;
2621	}
2622
2623	return status;
2624}
2625
2626static void ocrdma_update_free_srq_cqe(struct ib_wc *ibwc,
2627				       struct ocrdma_cqe *cqe,
2628				       struct ocrdma_qp *qp)
2629{
2630	unsigned long flags;
2631	struct ocrdma_srq *srq;
2632	u32 wqe_idx;
2633
2634	srq = get_ocrdma_srq(qp->ibqp.srq);
2635	wqe_idx = (le32_to_cpu(cqe->rq.buftag_qpn) >>
2636		OCRDMA_CQE_BUFTAG_SHIFT) & srq->rq.max_wqe_idx;
2637	BUG_ON(wqe_idx < 1);
2638
2639	ibwc->wr_id = srq->rqe_wr_id_tbl[wqe_idx];
2640	spin_lock_irqsave(&srq->q_lock, flags);
2641	ocrdma_srq_toggle_bit(srq, wqe_idx - 1);
2642	spin_unlock_irqrestore(&srq->q_lock, flags);
2643	ocrdma_hwq_inc_tail(&srq->rq);
2644}
2645
2646static bool ocrdma_poll_err_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2647				struct ib_wc *ibwc, bool *polled, bool *stop,
2648				int status)
2649{
2650	bool expand;
2651	struct ocrdma_dev *dev = get_ocrdma_dev(qp->ibqp.device);
2652
2653	if (status < OCRDMA_MAX_CQE_ERR)
2654		atomic_inc(&dev->cqe_err_stats[status]);
2655
2656	/* when hw_rq is empty, but wq is not empty, so continue
2657	 * to keep the cqe to get the cq event again.
2658	 */
2659	if (is_hw_rq_empty(qp) && !is_hw_sq_empty(qp)) {
2660		if (!qp->srq && (qp->sq_cq == qp->rq_cq)) {
2661			*polled = true;
2662			status = OCRDMA_CQE_WR_FLUSH_ERR;
2663			expand = ocrdma_update_err_scqe(ibwc, cqe, qp, status);
2664		} else {
2665			*polled = false;
2666			*stop = true;
2667			expand = false;
2668		}
2669	} else if (is_hw_rq_empty(qp)) {
2670		/* Do nothing */
2671		expand = false;
2672		*polled = false;
2673		*stop = false;
2674	} else {
2675		*polled = true;
2676		expand = ocrdma_update_err_rcqe(ibwc, cqe, qp, status);
2677	}
2678	return expand;
2679}
2680
2681static void ocrdma_poll_success_rcqe(struct ocrdma_qp *qp,
2682				     struct ocrdma_cqe *cqe, struct ib_wc *ibwc)
2683{
2684	struct ocrdma_dev *dev;
2685
2686	dev = get_ocrdma_dev(qp->ibqp.device);
2687	ibwc->opcode = IB_WC_RECV;
2688	ibwc->qp = &qp->ibqp;
2689	ibwc->status = IB_WC_SUCCESS;
2690
2691	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI)
2692		ocrdma_update_ud_rcqe(dev, ibwc, cqe);
2693	else
2694		ibwc->byte_len = le32_to_cpu(cqe->rq.rxlen);
2695
2696	if (is_cqe_imm(cqe)) {
2697		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2698		ibwc->wc_flags |= IB_WC_WITH_IMM;
2699	} else if (is_cqe_wr_imm(cqe)) {
2700		ibwc->opcode = IB_WC_RECV_RDMA_WITH_IMM;
2701		ibwc->ex.imm_data = htonl(le32_to_cpu(cqe->rq.lkey_immdt));
2702		ibwc->wc_flags |= IB_WC_WITH_IMM;
2703	} else if (is_cqe_invalidated(cqe)) {
2704		ibwc->ex.invalidate_rkey = le32_to_cpu(cqe->rq.lkey_immdt);
2705		ibwc->wc_flags |= IB_WC_WITH_INVALIDATE;
2706	}
2707	if (qp->ibqp.srq) {
2708		ocrdma_update_free_srq_cqe(ibwc, cqe, qp);
2709	} else {
2710		ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2711		ocrdma_hwq_inc_tail(&qp->rq);
2712	}
2713}
2714
2715static bool ocrdma_poll_rcqe(struct ocrdma_qp *qp, struct ocrdma_cqe *cqe,
2716			     struct ib_wc *ibwc, bool *polled, bool *stop)
2717{
2718	int status;
2719	bool expand = false;
2720
2721	ibwc->wc_flags = 0;
2722	if (qp->qp_type == IB_QPT_UD || qp->qp_type == IB_QPT_GSI) {
2723		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2724					OCRDMA_CQE_UD_STATUS_MASK) >>
2725					OCRDMA_CQE_UD_STATUS_SHIFT;
2726	} else {
2727		status = (le32_to_cpu(cqe->flags_status_srcqpn) &
2728			     OCRDMA_CQE_STATUS_MASK) >> OCRDMA_CQE_STATUS_SHIFT;
2729	}
2730
2731	if (status == OCRDMA_CQE_SUCCESS) {
2732		*polled = true;
2733		ocrdma_poll_success_rcqe(qp, cqe, ibwc);
2734	} else {
2735		expand = ocrdma_poll_err_rcqe(qp, cqe, ibwc, polled, stop,
2736					      status);
2737	}
2738	return expand;
2739}
2740
2741static void ocrdma_change_cq_phase(struct ocrdma_cq *cq, struct ocrdma_cqe *cqe,
2742				   u16 cur_getp)
2743{
2744	if (cq->phase_change) {
2745		if (cur_getp == 0)
2746			cq->phase = (~cq->phase & OCRDMA_CQE_VALID);
2747	} else {
2748		/* clear valid bit */
2749		cqe->flags_status_srcqpn = 0;
2750	}
2751}
2752
2753static int ocrdma_poll_hwcq(struct ocrdma_cq *cq, int num_entries,
2754			    struct ib_wc *ibwc)
2755{
2756	u16 qpn = 0;
2757	int i = 0;
2758	bool expand = false;
2759	int polled_hw_cqes = 0;
2760	struct ocrdma_qp *qp = NULL;
2761	struct ocrdma_dev *dev = get_ocrdma_dev(cq->ibcq.device);
2762	struct ocrdma_cqe *cqe;
2763	u16 cur_getp; bool polled = false; bool stop = false;
2764
2765	cur_getp = cq->getp;
2766	while (num_entries) {
2767		cqe = cq->va + cur_getp;
2768		/* check whether valid cqe or not */
2769		if (!is_cqe_valid(cq, cqe))
2770			break;
2771		qpn = (le32_to_cpu(cqe->cmn.qpn) & OCRDMA_CQE_QPN_MASK);
2772		/* ignore discarded cqe */
2773		if (qpn == 0)
2774			goto skip_cqe;
2775		qp = dev->qp_tbl[qpn];
2776		BUG_ON(qp == NULL);
2777
2778		if (is_cqe_for_sq(cqe)) {
2779			expand = ocrdma_poll_scqe(qp, cqe, ibwc, &polled,
2780						  &stop);
2781		} else {
2782			expand = ocrdma_poll_rcqe(qp, cqe, ibwc, &polled,
2783						  &stop);
2784		}
2785		if (expand)
2786			goto expand_cqe;
2787		if (stop)
2788			goto stop_cqe;
2789		/* clear qpn to avoid duplicate processing by discard_cqe() */
2790		cqe->cmn.qpn = 0;
2791skip_cqe:
2792		polled_hw_cqes += 1;
2793		cur_getp = (cur_getp + 1) % cq->max_hw_cqe;
2794		ocrdma_change_cq_phase(cq, cqe, cur_getp);
2795expand_cqe:
2796		if (polled) {
2797			num_entries -= 1;
2798			i += 1;
2799			ibwc = ibwc + 1;
2800			polled = false;
2801		}
2802	}
2803stop_cqe:
2804	cq->getp = cur_getp;
2805
2806	if (polled_hw_cqes)
2807		ocrdma_ring_cq_db(dev, cq->id, false, false, polled_hw_cqes);
2808
2809	return i;
2810}
2811
2812/* insert error cqe if the QP's SQ or RQ's CQ matches the CQ under poll. */
2813static int ocrdma_add_err_cqe(struct ocrdma_cq *cq, int num_entries,
2814			      struct ocrdma_qp *qp, struct ib_wc *ibwc)
2815{
2816	int err_cqes = 0;
2817
2818	while (num_entries) {
2819		if (is_hw_sq_empty(qp) && is_hw_rq_empty(qp))
2820			break;
2821		if (!is_hw_sq_empty(qp) && qp->sq_cq == cq) {
2822			ocrdma_update_wc(qp, ibwc, qp->sq.tail);
2823			ocrdma_hwq_inc_tail(&qp->sq);
2824		} else if (!is_hw_rq_empty(qp) && qp->rq_cq == cq) {
2825			ibwc->wr_id = qp->rqe_wr_id_tbl[qp->rq.tail];
2826			ocrdma_hwq_inc_tail(&qp->rq);
2827		} else {
2828			return err_cqes;
2829		}
2830		ibwc->byte_len = 0;
2831		ibwc->status = IB_WC_WR_FLUSH_ERR;
2832		ibwc = ibwc + 1;
2833		err_cqes += 1;
2834		num_entries -= 1;
2835	}
2836	return err_cqes;
2837}
2838
2839int ocrdma_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
2840{
2841	int cqes_to_poll = num_entries;
2842	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2843	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2844	int num_os_cqe = 0, err_cqes = 0;
2845	struct ocrdma_qp *qp;
2846	unsigned long flags;
2847
2848	/* poll cqes from adapter CQ */
2849	spin_lock_irqsave(&cq->cq_lock, flags);
2850	num_os_cqe = ocrdma_poll_hwcq(cq, cqes_to_poll, wc);
2851	spin_unlock_irqrestore(&cq->cq_lock, flags);
2852	cqes_to_poll -= num_os_cqe;
2853
2854	if (cqes_to_poll) {
2855		wc = wc + num_os_cqe;
2856		/* adapter returns single error cqe when qp moves to
2857		 * error state. So insert error cqes with wc_status as
2858		 * FLUSHED for pending WQEs and RQEs of QP's SQ and RQ
2859		 * respectively which uses this CQ.
2860		 */
2861		spin_lock_irqsave(&dev->flush_q_lock, flags);
2862		list_for_each_entry(qp, &cq->sq_head, sq_entry) {
2863			if (cqes_to_poll == 0)
2864				break;
2865			err_cqes = ocrdma_add_err_cqe(cq, cqes_to_poll, qp, wc);
2866			cqes_to_poll -= err_cqes;
2867			num_os_cqe += err_cqes;
2868			wc = wc + err_cqes;
2869		}
2870		spin_unlock_irqrestore(&dev->flush_q_lock, flags);
2871	}
2872	return num_os_cqe;
2873}
2874
2875int ocrdma_arm_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags cq_flags)
2876{
2877	struct ocrdma_cq *cq = get_ocrdma_cq(ibcq);
2878	struct ocrdma_dev *dev = get_ocrdma_dev(ibcq->device);
2879	u16 cq_id;
2880	unsigned long flags;
2881	bool arm_needed = false, sol_needed = false;
2882
2883	cq_id = cq->id;
2884
2885	spin_lock_irqsave(&cq->cq_lock, flags);
2886	if (cq_flags & IB_CQ_NEXT_COMP || cq_flags & IB_CQ_SOLICITED)
2887		arm_needed = true;
2888	if (cq_flags & IB_CQ_SOLICITED)
2889		sol_needed = true;
2890
2891	ocrdma_ring_cq_db(dev, cq_id, arm_needed, sol_needed, 0);
2892	spin_unlock_irqrestore(&cq->cq_lock, flags);
2893
2894	return 0;
2895}
2896
2897struct ib_mr *ocrdma_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type,
2898			      u32 max_num_sg)
2899{
2900	int status;
2901	struct ocrdma_mr *mr;
2902	struct ocrdma_pd *pd = get_ocrdma_pd(ibpd);
2903	struct ocrdma_dev *dev = get_ocrdma_dev(ibpd->device);
2904
2905	if (mr_type != IB_MR_TYPE_MEM_REG)
2906		return ERR_PTR(-EINVAL);
2907
2908	if (max_num_sg > dev->attr.max_pages_per_frmr)
2909		return ERR_PTR(-EINVAL);
2910
2911	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
2912	if (!mr)
2913		return ERR_PTR(-ENOMEM);
2914
2915	mr->pages = kcalloc(max_num_sg, sizeof(u64), GFP_KERNEL);
2916	if (!mr->pages) {
2917		status = -ENOMEM;
2918		goto pl_err;
2919	}
2920
2921	status = ocrdma_get_pbl_info(dev, mr, max_num_sg);
2922	if (status)
2923		goto pbl_err;
2924	mr->hwmr.fr_mr = 1;
2925	mr->hwmr.remote_rd = 0;
2926	mr->hwmr.remote_wr = 0;
2927	mr->hwmr.local_rd = 0;
2928	mr->hwmr.local_wr = 0;
2929	mr->hwmr.mw_bind = 0;
2930	status = ocrdma_build_pbl_tbl(dev, &mr->hwmr);
2931	if (status)
2932		goto pbl_err;
2933	status = ocrdma_reg_mr(dev, &mr->hwmr, pd->id, 0);
2934	if (status)
2935		goto mbx_err;
2936	mr->ibmr.rkey = mr->hwmr.lkey;
2937	mr->ibmr.lkey = mr->hwmr.lkey;
2938	dev->stag_arr[(mr->hwmr.lkey >> 8) & (OCRDMA_MAX_STAG - 1)] =
2939		(unsigned long) mr;
2940	return &mr->ibmr;
2941mbx_err:
2942	ocrdma_free_mr_pbl_tbl(dev, &mr->hwmr);
2943pbl_err:
2944	kfree(mr->pages);
2945pl_err:
2946	kfree(mr);
2947	return ERR_PTR(-ENOMEM);
2948}
2949
2950static int ocrdma_set_page(struct ib_mr *ibmr, u64 addr)
2951{
2952	struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
2953
2954	if (unlikely(mr->npages == mr->hwmr.num_pbes))
2955		return -ENOMEM;
2956
2957	mr->pages[mr->npages++] = addr;
2958
2959	return 0;
2960}
2961
2962int ocrdma_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2963		     unsigned int *sg_offset)
2964{
2965	struct ocrdma_mr *mr = get_ocrdma_mr(ibmr);
2966
2967	mr->npages = 0;
2968
2969	return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, ocrdma_set_page);
2970}
2971