1/*
2 * Copyright (c) 2016 Hisilicon Limited.
3 * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
4 *
5 * This software is available to you under a choice of one of two
6 * licenses.  You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
10 *
11 *     Redistribution and use in source and binary forms, with or
12 *     without modification, are permitted provided that the following
13 *     conditions are met:
14 *
15 *      - Redistributions of source code must retain the above
16 *        copyright notice, this list of conditions and the following
17 *        disclaimer.
18 *
19 *      - Redistributions in binary form must reproduce the above
20 *        copyright notice, this list of conditions and the following
21 *        disclaimer in the documentation and/or other materials
22 *        provided with the distribution.
23 *
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 * SOFTWARE.
32 */
33
34#include <linux/vmalloc.h>
35#include <rdma/ib_umem.h>
36#include <linux/math.h>
37#include "hns_roce_device.h"
38#include "hns_roce_cmd.h"
39#include "hns_roce_hem.h"
40
41static u32 hw_index_to_key(int ind)
42{
43	return ((u32)ind >> 24) | ((u32)ind << 8);
44}
45
46unsigned long key_to_hw_index(u32 key)
47{
48	return (key << 24) | (key >> 8);
49}
50
51static int alloc_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
52{
53	struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
54	struct ib_device *ibdev = &hr_dev->ib_dev;
55	int err;
56	int id;
57
58	/* Allocate a key for mr from mr_table */
59	id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
60			     GFP_KERNEL);
61	if (id < 0) {
62		ibdev_err(ibdev, "failed to alloc id for MR key, id(%d)\n", id);
63		return -ENOMEM;
64	}
65
66	mr->key = hw_index_to_key(id); /* MR key */
67
68	err = hns_roce_table_get(hr_dev, &hr_dev->mr_table.mtpt_table,
69				 (unsigned long)id);
70	if (err) {
71		ibdev_err(ibdev, "failed to alloc mtpt, ret = %d.\n", err);
72		goto err_free_bitmap;
73	}
74
75	return 0;
76err_free_bitmap:
77	ida_free(&mtpt_ida->ida, id);
78	return err;
79}
80
81static void free_mr_key(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
82{
83	unsigned long obj = key_to_hw_index(mr->key);
84
85	hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table, obj);
86	ida_free(&hr_dev->mr_table.mtpt_ida.ida, (int)obj);
87}
88
89static int alloc_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr,
90			struct ib_udata *udata, u64 start)
91{
92	struct ib_device *ibdev = &hr_dev->ib_dev;
93	bool is_fast = mr->type == MR_TYPE_FRMR;
94	struct hns_roce_buf_attr buf_attr = {};
95	int err;
96
97	mr->pbl_hop_num = is_fast ? 1 : hr_dev->caps.pbl_hop_num;
98	buf_attr.page_shift = is_fast ? PAGE_SHIFT :
99			      hr_dev->caps.pbl_buf_pg_sz + PAGE_SHIFT;
100	buf_attr.region[0].size = mr->size;
101	buf_attr.region[0].hopnum = mr->pbl_hop_num;
102	buf_attr.region_count = 1;
103	buf_attr.user_access = mr->access;
104	/* fast MR's buffer is alloced before mapping, not at creation */
105	buf_attr.mtt_only = is_fast;
106
107	err = hns_roce_mtr_create(hr_dev, &mr->pbl_mtr, &buf_attr,
108				  hr_dev->caps.pbl_ba_pg_sz + PAGE_SHIFT,
109				  udata, start);
110	if (err)
111		ibdev_err(ibdev, "failed to alloc pbl mtr, ret = %d.\n", err);
112	else
113		mr->npages = mr->pbl_mtr.hem_cfg.buf_pg_count;
114
115	return err;
116}
117
118static void free_mr_pbl(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
119{
120	hns_roce_mtr_destroy(hr_dev, &mr->pbl_mtr);
121}
122
123static void hns_roce_mr_free(struct hns_roce_dev *hr_dev, struct hns_roce_mr *mr)
124{
125	struct ib_device *ibdev = &hr_dev->ib_dev;
126	int ret;
127
128	if (mr->enabled) {
129		ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
130					      key_to_hw_index(mr->key) &
131					      (hr_dev->caps.num_mtpts - 1));
132		if (ret)
133			ibdev_warn(ibdev, "failed to destroy mpt, ret = %d.\n",
134				   ret);
135	}
136
137	free_mr_pbl(hr_dev, mr);
138	free_mr_key(hr_dev, mr);
139}
140
141static int hns_roce_mr_enable(struct hns_roce_dev *hr_dev,
142			      struct hns_roce_mr *mr)
143{
144	unsigned long mtpt_idx = key_to_hw_index(mr->key);
145	struct hns_roce_cmd_mailbox *mailbox;
146	struct device *dev = hr_dev->dev;
147	int ret;
148
149	/* Allocate mailbox memory */
150	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
151	if (IS_ERR(mailbox))
152		return PTR_ERR(mailbox);
153
154	if (mr->type != MR_TYPE_FRMR)
155		ret = hr_dev->hw->write_mtpt(hr_dev, mailbox->buf, mr);
156	else
157		ret = hr_dev->hw->frmr_write_mtpt(hr_dev, mailbox->buf, mr);
158	if (ret) {
159		dev_err(dev, "failed to write mtpt, ret = %d.\n", ret);
160		goto err_page;
161	}
162
163	ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
164				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
165	if (ret) {
166		dev_err(dev, "failed to create mpt, ret = %d.\n", ret);
167		goto err_page;
168	}
169
170	mr->enabled = 1;
171
172err_page:
173	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
174
175	return ret;
176}
177
178void hns_roce_init_mr_table(struct hns_roce_dev *hr_dev)
179{
180	struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
181
182	ida_init(&mtpt_ida->ida);
183	mtpt_ida->max = hr_dev->caps.num_mtpts - 1;
184	mtpt_ida->min = hr_dev->caps.reserved_mrws;
185}
186
187struct ib_mr *hns_roce_get_dma_mr(struct ib_pd *pd, int acc)
188{
189	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
190	struct hns_roce_mr *mr;
191	int ret;
192
193	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
194	if (!mr)
195		return  ERR_PTR(-ENOMEM);
196
197	mr->type = MR_TYPE_DMA;
198	mr->pd = to_hr_pd(pd)->pdn;
199	mr->access = acc;
200
201	/* Allocate memory region key */
202	hns_roce_hem_list_init(&mr->pbl_mtr.hem_list);
203	ret = alloc_mr_key(hr_dev, mr);
204	if (ret)
205		goto err_free;
206
207	ret = hns_roce_mr_enable(hr_dev, mr);
208	if (ret)
209		goto err_mr;
210
211	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
212
213	return &mr->ibmr;
214err_mr:
215	free_mr_key(hr_dev, mr);
216
217err_free:
218	kfree(mr);
219	return ERR_PTR(ret);
220}
221
222struct ib_mr *hns_roce_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
223				   u64 virt_addr, int access_flags,
224				   struct ib_udata *udata)
225{
226	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
227	struct hns_roce_mr *mr;
228	int ret;
229
230	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
231	if (!mr)
232		return ERR_PTR(-ENOMEM);
233
234	mr->iova = virt_addr;
235	mr->size = length;
236	mr->pd = to_hr_pd(pd)->pdn;
237	mr->access = access_flags;
238	mr->type = MR_TYPE_MR;
239
240	ret = alloc_mr_key(hr_dev, mr);
241	if (ret)
242		goto err_alloc_mr;
243
244	ret = alloc_mr_pbl(hr_dev, mr, udata, start);
245	if (ret)
246		goto err_alloc_key;
247
248	ret = hns_roce_mr_enable(hr_dev, mr);
249	if (ret)
250		goto err_alloc_pbl;
251
252	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
253
254	return &mr->ibmr;
255
256err_alloc_pbl:
257	free_mr_pbl(hr_dev, mr);
258err_alloc_key:
259	free_mr_key(hr_dev, mr);
260err_alloc_mr:
261	kfree(mr);
262	return ERR_PTR(ret);
263}
264
265struct ib_mr *hns_roce_rereg_user_mr(struct ib_mr *ibmr, int flags, u64 start,
266				     u64 length, u64 virt_addr,
267				     int mr_access_flags, struct ib_pd *pd,
268				     struct ib_udata *udata)
269{
270	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
271	struct ib_device *ib_dev = &hr_dev->ib_dev;
272	struct hns_roce_mr *mr = to_hr_mr(ibmr);
273	struct hns_roce_cmd_mailbox *mailbox;
274	unsigned long mtpt_idx;
275	int ret;
276
277	if (!mr->enabled)
278		return ERR_PTR(-EINVAL);
279
280	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
281	if (IS_ERR(mailbox))
282		return ERR_CAST(mailbox);
283
284	mtpt_idx = key_to_hw_index(mr->key) & (hr_dev->caps.num_mtpts - 1);
285
286	ret = hns_roce_cmd_mbox(hr_dev, 0, mailbox->dma, HNS_ROCE_CMD_QUERY_MPT,
287				mtpt_idx);
288	if (ret)
289		goto free_cmd_mbox;
290
291	ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
292				      mtpt_idx);
293	if (ret)
294		ibdev_warn(ib_dev, "failed to destroy MPT, ret = %d.\n", ret);
295
296	mr->enabled = 0;
297	mr->iova = virt_addr;
298	mr->size = length;
299
300	if (flags & IB_MR_REREG_PD)
301		mr->pd = to_hr_pd(pd)->pdn;
302
303	if (flags & IB_MR_REREG_ACCESS)
304		mr->access = mr_access_flags;
305
306	if (flags & IB_MR_REREG_TRANS) {
307		free_mr_pbl(hr_dev, mr);
308		ret = alloc_mr_pbl(hr_dev, mr, udata, start);
309		if (ret) {
310			ibdev_err(ib_dev, "failed to alloc mr PBL, ret = %d.\n",
311				  ret);
312			goto free_cmd_mbox;
313		}
314	}
315
316	ret = hr_dev->hw->rereg_write_mtpt(hr_dev, mr, flags, mailbox->buf);
317	if (ret) {
318		ibdev_err(ib_dev, "failed to write mtpt, ret = %d.\n", ret);
319		goto free_cmd_mbox;
320	}
321
322	ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
323				     mtpt_idx);
324	if (ret) {
325		ibdev_err(ib_dev, "failed to create MPT, ret = %d.\n", ret);
326		goto free_cmd_mbox;
327	}
328
329	mr->enabled = 1;
330
331free_cmd_mbox:
332	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
333
334	if (ret)
335		return ERR_PTR(ret);
336	return NULL;
337}
338
339int hns_roce_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
340{
341	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
342	struct hns_roce_mr *mr = to_hr_mr(ibmr);
343
344	if (hr_dev->hw->dereg_mr)
345		hr_dev->hw->dereg_mr(hr_dev);
346
347	hns_roce_mr_free(hr_dev, mr);
348	kfree(mr);
349
350	return 0;
351}
352
353struct ib_mr *hns_roce_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
354				u32 max_num_sg)
355{
356	struct hns_roce_dev *hr_dev = to_hr_dev(pd->device);
357	struct device *dev = hr_dev->dev;
358	struct hns_roce_mr *mr;
359	int ret;
360
361	if (mr_type != IB_MR_TYPE_MEM_REG)
362		return ERR_PTR(-EINVAL);
363
364	if (max_num_sg > HNS_ROCE_FRMR_MAX_PA) {
365		dev_err(dev, "max_num_sg larger than %d\n",
366			HNS_ROCE_FRMR_MAX_PA);
367		return ERR_PTR(-EINVAL);
368	}
369
370	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
371	if (!mr)
372		return ERR_PTR(-ENOMEM);
373
374	mr->type = MR_TYPE_FRMR;
375	mr->pd = to_hr_pd(pd)->pdn;
376	mr->size = max_num_sg * (1 << PAGE_SHIFT);
377
378	/* Allocate memory region key */
379	ret = alloc_mr_key(hr_dev, mr);
380	if (ret)
381		goto err_free;
382
383	ret = alloc_mr_pbl(hr_dev, mr, NULL, 0);
384	if (ret)
385		goto err_key;
386
387	ret = hns_roce_mr_enable(hr_dev, mr);
388	if (ret)
389		goto err_pbl;
390
391	mr->ibmr.rkey = mr->ibmr.lkey = mr->key;
392	mr->ibmr.length = mr->size;
393
394	return &mr->ibmr;
395
396err_pbl:
397	free_mr_pbl(hr_dev, mr);
398err_key:
399	free_mr_key(hr_dev, mr);
400err_free:
401	kfree(mr);
402	return ERR_PTR(ret);
403}
404
405static int hns_roce_set_page(struct ib_mr *ibmr, u64 addr)
406{
407	struct hns_roce_mr *mr = to_hr_mr(ibmr);
408
409	if (likely(mr->npages < mr->pbl_mtr.hem_cfg.buf_pg_count)) {
410		mr->page_list[mr->npages++] = addr;
411		return 0;
412	}
413
414	return -ENOBUFS;
415}
416
417int hns_roce_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
418		       unsigned int *sg_offset)
419{
420	struct hns_roce_dev *hr_dev = to_hr_dev(ibmr->device);
421	struct ib_device *ibdev = &hr_dev->ib_dev;
422	struct hns_roce_mr *mr = to_hr_mr(ibmr);
423	struct hns_roce_mtr *mtr = &mr->pbl_mtr;
424	int ret = 0;
425
426	mr->npages = 0;
427	mr->page_list = kvcalloc(mr->pbl_mtr.hem_cfg.buf_pg_count,
428				 sizeof(dma_addr_t), GFP_KERNEL);
429	if (!mr->page_list)
430		return ret;
431
432	ret = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, hns_roce_set_page);
433	if (ret < 1) {
434		ibdev_err(ibdev, "failed to store sg pages %u %u, cnt = %d.\n",
435			  mr->npages, mr->pbl_mtr.hem_cfg.buf_pg_count, ret);
436		goto err_page_list;
437	}
438
439	mtr->hem_cfg.region[0].offset = 0;
440	mtr->hem_cfg.region[0].count = mr->npages;
441	mtr->hem_cfg.region[0].hopnum = mr->pbl_hop_num;
442	mtr->hem_cfg.region_count = 1;
443	ret = hns_roce_mtr_map(hr_dev, mtr, mr->page_list, mr->npages);
444	if (ret) {
445		ibdev_err(ibdev, "failed to map sg mtr, ret = %d.\n", ret);
446		ret = 0;
447	} else {
448		mr->pbl_mtr.hem_cfg.buf_pg_shift = (u32)ilog2(ibmr->page_size);
449		ret = mr->npages;
450	}
451
452err_page_list:
453	kvfree(mr->page_list);
454	mr->page_list = NULL;
455
456	return ret;
457}
458
459static void hns_roce_mw_free(struct hns_roce_dev *hr_dev,
460			     struct hns_roce_mw *mw)
461{
462	struct device *dev = hr_dev->dev;
463	int ret;
464
465	if (mw->enabled) {
466		ret = hns_roce_destroy_hw_ctx(hr_dev, HNS_ROCE_CMD_DESTROY_MPT,
467					      key_to_hw_index(mw->rkey) &
468					      (hr_dev->caps.num_mtpts - 1));
469		if (ret)
470			dev_warn(dev, "MW DESTROY_MPT failed (%d)\n", ret);
471
472		hns_roce_table_put(hr_dev, &hr_dev->mr_table.mtpt_table,
473				   key_to_hw_index(mw->rkey));
474	}
475
476	ida_free(&hr_dev->mr_table.mtpt_ida.ida,
477		 (int)key_to_hw_index(mw->rkey));
478}
479
480static int hns_roce_mw_enable(struct hns_roce_dev *hr_dev,
481			      struct hns_roce_mw *mw)
482{
483	struct hns_roce_mr_table *mr_table = &hr_dev->mr_table;
484	struct hns_roce_cmd_mailbox *mailbox;
485	struct device *dev = hr_dev->dev;
486	unsigned long mtpt_idx = key_to_hw_index(mw->rkey);
487	int ret;
488
489	/* prepare HEM entry memory */
490	ret = hns_roce_table_get(hr_dev, &mr_table->mtpt_table, mtpt_idx);
491	if (ret)
492		return ret;
493
494	mailbox = hns_roce_alloc_cmd_mailbox(hr_dev);
495	if (IS_ERR(mailbox)) {
496		ret = PTR_ERR(mailbox);
497		goto err_table;
498	}
499
500	ret = hr_dev->hw->mw_write_mtpt(mailbox->buf, mw);
501	if (ret) {
502		dev_err(dev, "MW write mtpt fail!\n");
503		goto err_page;
504	}
505
506	ret = hns_roce_create_hw_ctx(hr_dev, mailbox, HNS_ROCE_CMD_CREATE_MPT,
507				     mtpt_idx & (hr_dev->caps.num_mtpts - 1));
508	if (ret) {
509		dev_err(dev, "MW CREATE_MPT failed (%d)\n", ret);
510		goto err_page;
511	}
512
513	mw->enabled = 1;
514
515	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
516
517	return 0;
518
519err_page:
520	hns_roce_free_cmd_mailbox(hr_dev, mailbox);
521
522err_table:
523	hns_roce_table_put(hr_dev, &mr_table->mtpt_table, mtpt_idx);
524
525	return ret;
526}
527
528int hns_roce_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
529{
530	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
531	struct hns_roce_ida *mtpt_ida = &hr_dev->mr_table.mtpt_ida;
532	struct ib_device *ibdev = &hr_dev->ib_dev;
533	struct hns_roce_mw *mw = to_hr_mw(ibmw);
534	int ret;
535	int id;
536
537	/* Allocate a key for mw from mr_table */
538	id = ida_alloc_range(&mtpt_ida->ida, mtpt_ida->min, mtpt_ida->max,
539			     GFP_KERNEL);
540	if (id < 0) {
541		ibdev_err(ibdev, "failed to alloc id for MW key, id(%d)\n", id);
542		return -ENOMEM;
543	}
544
545	mw->rkey = hw_index_to_key(id);
546
547	ibmw->rkey = mw->rkey;
548	mw->pdn = to_hr_pd(ibmw->pd)->pdn;
549	mw->pbl_hop_num = hr_dev->caps.pbl_hop_num;
550	mw->pbl_ba_pg_sz = hr_dev->caps.pbl_ba_pg_sz;
551	mw->pbl_buf_pg_sz = hr_dev->caps.pbl_buf_pg_sz;
552
553	ret = hns_roce_mw_enable(hr_dev, mw);
554	if (ret)
555		goto err_mw;
556
557	return 0;
558
559err_mw:
560	hns_roce_mw_free(hr_dev, mw);
561	return ret;
562}
563
564int hns_roce_dealloc_mw(struct ib_mw *ibmw)
565{
566	struct hns_roce_dev *hr_dev = to_hr_dev(ibmw->device);
567	struct hns_roce_mw *mw = to_hr_mw(ibmw);
568
569	hns_roce_mw_free(hr_dev, mw);
570	return 0;
571}
572
573static int mtr_map_region(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
574			  struct hns_roce_buf_region *region, dma_addr_t *pages,
575			  int max_count)
576{
577	int count, npage;
578	int offset, end;
579	__le64 *mtts;
580	u64 addr;
581	int i;
582
583	offset = region->offset;
584	end = offset + region->count;
585	npage = 0;
586	while (offset < end && npage < max_count) {
587		count = 0;
588		mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
589						  offset, &count);
590		if (!mtts)
591			return -ENOBUFS;
592
593		for (i = 0; i < count && npage < max_count; i++) {
594			addr = pages[npage];
595
596			mtts[i] = cpu_to_le64(addr);
597			npage++;
598		}
599		offset += count;
600	}
601
602	return npage;
603}
604
605static inline bool mtr_has_mtt(struct hns_roce_buf_attr *attr)
606{
607	int i;
608
609	for (i = 0; i < attr->region_count; i++)
610		if (attr->region[i].hopnum != HNS_ROCE_HOP_NUM_0 &&
611		    attr->region[i].hopnum > 0)
612			return true;
613
614	/* because the mtr only one root base address, when hopnum is 0 means
615	 * root base address equals the first buffer address, thus all alloced
616	 * memory must in a continuous space accessed by direct mode.
617	 */
618	return false;
619}
620
621static inline size_t mtr_bufs_size(struct hns_roce_buf_attr *attr)
622{
623	size_t size = 0;
624	int i;
625
626	for (i = 0; i < attr->region_count; i++)
627		size += attr->region[i].size;
628
629	return size;
630}
631
632/*
633 * check the given pages in continuous address space
634 * Returns 0 on success, or the error page num.
635 */
636static inline int mtr_check_direct_pages(dma_addr_t *pages, int page_count,
637					 unsigned int page_shift)
638{
639	size_t page_size = 1 << page_shift;
640	int i;
641
642	for (i = 1; i < page_count; i++)
643		if (pages[i] - pages[i - 1] != page_size)
644			return i;
645
646	return 0;
647}
648
649static void mtr_free_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
650{
651	/* release user buffers */
652	if (mtr->umem) {
653		ib_umem_release(mtr->umem);
654		mtr->umem = NULL;
655	}
656
657	/* release kernel buffers */
658	if (mtr->kmem) {
659		hns_roce_buf_free(hr_dev, mtr->kmem);
660		mtr->kmem = NULL;
661	}
662}
663
664static int mtr_alloc_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
665			  struct hns_roce_buf_attr *buf_attr,
666			  struct ib_udata *udata, unsigned long user_addr)
667{
668	struct ib_device *ibdev = &hr_dev->ib_dev;
669	size_t total_size;
670
671	total_size = mtr_bufs_size(buf_attr);
672
673	if (udata) {
674		mtr->kmem = NULL;
675		mtr->umem = ib_umem_get(ibdev, user_addr, total_size,
676					buf_attr->user_access);
677		if (IS_ERR_OR_NULL(mtr->umem)) {
678			ibdev_err(ibdev, "failed to get umem, ret = %ld.\n",
679				  PTR_ERR(mtr->umem));
680			return -ENOMEM;
681		}
682	} else {
683		mtr->umem = NULL;
684		mtr->kmem = hns_roce_buf_alloc(hr_dev, total_size,
685					       buf_attr->page_shift,
686					       mtr->hem_cfg.is_direct ?
687					       HNS_ROCE_BUF_DIRECT : 0);
688		if (IS_ERR(mtr->kmem)) {
689			ibdev_err(ibdev, "failed to alloc kmem, ret = %ld.\n",
690				  PTR_ERR(mtr->kmem));
691			return PTR_ERR(mtr->kmem);
692		}
693	}
694
695	return 0;
696}
697
698static int mtr_map_bufs(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
699			int page_count, unsigned int page_shift)
700{
701	struct ib_device *ibdev = &hr_dev->ib_dev;
702	dma_addr_t *pages;
703	int npage;
704	int ret;
705
706	/* alloc a tmp array to store buffer's dma address */
707	pages = kvcalloc(page_count, sizeof(dma_addr_t), GFP_KERNEL);
708	if (!pages)
709		return -ENOMEM;
710
711	if (mtr->umem)
712		npage = hns_roce_get_umem_bufs(hr_dev, pages, page_count,
713					       mtr->umem, page_shift);
714	else
715		npage = hns_roce_get_kmem_bufs(hr_dev, pages, page_count,
716					       mtr->kmem, page_shift);
717
718	if (npage != page_count) {
719		ibdev_err(ibdev, "failed to get mtr page %d != %d.\n", npage,
720			  page_count);
721		ret = -ENOBUFS;
722		goto err_alloc_list;
723	}
724
725	if (mtr->hem_cfg.is_direct && npage > 1) {
726		ret = mtr_check_direct_pages(pages, npage, page_shift);
727		if (ret) {
728			ibdev_err(ibdev, "failed to check %s page: %d / %d.\n",
729				  mtr->umem ? "umtr" : "kmtr", ret, npage);
730			ret = -ENOBUFS;
731			goto err_alloc_list;
732		}
733	}
734
735	ret = hns_roce_mtr_map(hr_dev, mtr, pages, page_count);
736	if (ret)
737		ibdev_err(ibdev, "failed to map mtr pages, ret = %d.\n", ret);
738
739err_alloc_list:
740	kvfree(pages);
741
742	return ret;
743}
744
745int hns_roce_mtr_map(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
746		     dma_addr_t *pages, unsigned int page_cnt)
747{
748	struct ib_device *ibdev = &hr_dev->ib_dev;
749	struct hns_roce_buf_region *r;
750	unsigned int i, mapped_cnt;
751	int ret = 0;
752
753	/*
754	 * Only use the first page address as root ba when hopnum is 0, this
755	 * is because the addresses of all pages are consecutive in this case.
756	 */
757	if (mtr->hem_cfg.is_direct) {
758		mtr->hem_cfg.root_ba = pages[0];
759		return 0;
760	}
761
762	for (i = 0, mapped_cnt = 0; i < mtr->hem_cfg.region_count &&
763	     mapped_cnt < page_cnt; i++) {
764		r = &mtr->hem_cfg.region[i];
765		/* if hopnum is 0, no need to map pages in this region */
766		if (!r->hopnum) {
767			mapped_cnt += r->count;
768			continue;
769		}
770
771		if (r->offset + r->count > page_cnt) {
772			ret = -EINVAL;
773			ibdev_err(ibdev,
774				  "failed to check mtr%u count %u + %u > %u.\n",
775				  i, r->offset, r->count, page_cnt);
776			return ret;
777		}
778
779		ret = mtr_map_region(hr_dev, mtr, r, &pages[r->offset],
780				     page_cnt - mapped_cnt);
781		if (ret < 0) {
782			ibdev_err(ibdev,
783				  "failed to map mtr%u offset %u, ret = %d.\n",
784				  i, r->offset, ret);
785			return ret;
786		}
787		mapped_cnt += ret;
788		ret = 0;
789	}
790
791	if (mapped_cnt < page_cnt) {
792		ret = -ENOBUFS;
793		ibdev_err(ibdev, "failed to map mtr pages count: %u < %u.\n",
794			  mapped_cnt, page_cnt);
795	}
796
797	return ret;
798}
799
800int hns_roce_mtr_find(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
801		      u32 offset, u64 *mtt_buf, int mtt_max, u64 *base_addr)
802{
803	struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
804	int mtt_count, left;
805	u32 start_index;
806	int total = 0;
807	__le64 *mtts;
808	u32 npage;
809	u64 addr;
810
811	if (!mtt_buf || mtt_max < 1)
812		goto done;
813
814	/* no mtt memory in direct mode, so just return the buffer address */
815	if (cfg->is_direct) {
816		start_index = offset >> HNS_HW_PAGE_SHIFT;
817		for (mtt_count = 0; mtt_count < cfg->region_count &&
818		     total < mtt_max; mtt_count++) {
819			npage = cfg->region[mtt_count].offset;
820			if (npage < start_index)
821				continue;
822
823			addr = cfg->root_ba + (npage << HNS_HW_PAGE_SHIFT);
824			mtt_buf[total] = addr;
825
826			total++;
827		}
828
829		goto done;
830	}
831
832	start_index = offset >> cfg->buf_pg_shift;
833	left = mtt_max;
834	while (left > 0) {
835		mtt_count = 0;
836		mtts = hns_roce_hem_list_find_mtt(hr_dev, &mtr->hem_list,
837						  start_index + total,
838						  &mtt_count);
839		if (!mtts || !mtt_count)
840			goto done;
841
842		npage = min(mtt_count, left);
843		left -= npage;
844		for (mtt_count = 0; mtt_count < npage; mtt_count++)
845			mtt_buf[total++] = le64_to_cpu(mtts[mtt_count]);
846	}
847
848done:
849	if (base_addr)
850		*base_addr = cfg->root_ba;
851
852	return total;
853}
854
855static int mtr_init_buf_cfg(struct hns_roce_dev *hr_dev,
856			    struct hns_roce_buf_attr *attr,
857			    struct hns_roce_hem_cfg *cfg,
858			    unsigned int *buf_page_shift, u64 unalinged_size)
859{
860	struct hns_roce_buf_region *r;
861	u64 first_region_padding;
862	int page_cnt, region_cnt;
863	unsigned int page_shift;
864	size_t buf_size;
865
866	/* If mtt is disabled, all pages must be within a continuous range */
867	cfg->is_direct = !mtr_has_mtt(attr);
868	buf_size = mtr_bufs_size(attr);
869	if (cfg->is_direct) {
870		/* When HEM buffer uses 0-level addressing, the page size is
871		 * equal to the whole buffer size, and we split the buffer into
872		 * small pages which is used to check whether the adjacent
873		 * units are in the continuous space and its size is fixed to
874		 * 4K based on hns ROCEE's requirement.
875		 */
876		page_shift = HNS_HW_PAGE_SHIFT;
877
878		/* The ROCEE requires the page size to be 4K * 2 ^ N. */
879		cfg->buf_pg_count = 1;
880		cfg->buf_pg_shift = HNS_HW_PAGE_SHIFT +
881			order_base_2(DIV_ROUND_UP(buf_size, HNS_HW_PAGE_SIZE));
882		first_region_padding = 0;
883	} else {
884		page_shift = attr->page_shift;
885		cfg->buf_pg_count = DIV_ROUND_UP(buf_size + unalinged_size,
886						 1 << page_shift);
887		cfg->buf_pg_shift = page_shift;
888		first_region_padding = unalinged_size;
889	}
890
891	/* Convert buffer size to page index and page count for each region and
892	 * the buffer's offset needs to be appended to the first region.
893	 */
894	for (page_cnt = 0, region_cnt = 0; region_cnt < attr->region_count &&
895	     region_cnt < ARRAY_SIZE(cfg->region); region_cnt++) {
896		r = &cfg->region[region_cnt];
897		r->offset = page_cnt;
898		buf_size = hr_hw_page_align(attr->region[region_cnt].size +
899					    first_region_padding);
900		r->count = DIV_ROUND_UP(buf_size, 1 << page_shift);
901		first_region_padding = 0;
902		page_cnt += r->count;
903		r->hopnum = to_hr_hem_hopnum(attr->region[region_cnt].hopnum,
904					     r->count);
905	}
906
907	cfg->region_count = region_cnt;
908	*buf_page_shift = page_shift;
909
910	return page_cnt;
911}
912
913static u64 cal_pages_per_l1ba(unsigned int ba_per_bt, unsigned int hopnum)
914{
915	return int_pow(ba_per_bt, hopnum - 1);
916}
917
918static unsigned int cal_best_bt_pg_sz(struct hns_roce_dev *hr_dev,
919				      struct hns_roce_mtr *mtr,
920				      unsigned int pg_shift)
921{
922	unsigned long cap = hr_dev->caps.page_size_cap;
923	struct hns_roce_buf_region *re;
924	unsigned int pgs_per_l1ba;
925	unsigned int ba_per_bt;
926	unsigned int ba_num;
927	int i;
928
929	for_each_set_bit_from(pg_shift, &cap, sizeof(cap) * BITS_PER_BYTE) {
930		if (!(BIT(pg_shift) & cap))
931			continue;
932
933		ba_per_bt = BIT(pg_shift) / BA_BYTE_LEN;
934		ba_num = 0;
935		for (i = 0; i < mtr->hem_cfg.region_count; i++) {
936			re = &mtr->hem_cfg.region[i];
937			if (re->hopnum == 0)
938				continue;
939
940			pgs_per_l1ba = cal_pages_per_l1ba(ba_per_bt, re->hopnum);
941			ba_num += DIV_ROUND_UP(re->count, pgs_per_l1ba);
942		}
943
944		if (ba_num <= ba_per_bt)
945			return pg_shift;
946	}
947
948	return 0;
949}
950
951static int mtr_alloc_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
952			 unsigned int ba_page_shift)
953{
954	struct hns_roce_hem_cfg *cfg = &mtr->hem_cfg;
955	int ret;
956
957	hns_roce_hem_list_init(&mtr->hem_list);
958	if (!cfg->is_direct) {
959		ba_page_shift = cal_best_bt_pg_sz(hr_dev, mtr, ba_page_shift);
960		if (!ba_page_shift)
961			return -ERANGE;
962
963		ret = hns_roce_hem_list_request(hr_dev, &mtr->hem_list,
964						cfg->region, cfg->region_count,
965						ba_page_shift);
966		if (ret)
967			return ret;
968		cfg->root_ba = mtr->hem_list.root_ba;
969		cfg->ba_pg_shift = ba_page_shift;
970	} else {
971		cfg->ba_pg_shift = cfg->buf_pg_shift;
972	}
973
974	return 0;
975}
976
977static void mtr_free_mtt(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
978{
979	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
980}
981
982/**
983 * hns_roce_mtr_create - Create hns memory translate region.
984 *
985 * @hr_dev: RoCE device struct pointer
986 * @mtr: memory translate region
987 * @buf_attr: buffer attribute for creating mtr
988 * @ba_page_shift: page shift for multi-hop base address table
989 * @udata: user space context, if it's NULL, means kernel space
990 * @user_addr: userspace virtual address to start at
991 */
992int hns_roce_mtr_create(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr,
993			struct hns_roce_buf_attr *buf_attr,
994			unsigned int ba_page_shift, struct ib_udata *udata,
995			unsigned long user_addr)
996{
997	struct ib_device *ibdev = &hr_dev->ib_dev;
998	unsigned int buf_page_shift = 0;
999	int buf_page_cnt;
1000	int ret;
1001
1002	buf_page_cnt = mtr_init_buf_cfg(hr_dev, buf_attr, &mtr->hem_cfg,
1003					&buf_page_shift,
1004					udata ? user_addr & ~PAGE_MASK : 0);
1005	if (buf_page_cnt < 1 || buf_page_shift < HNS_HW_PAGE_SHIFT) {
1006		ibdev_err(ibdev, "failed to init mtr cfg, count %d shift %u.\n",
1007			  buf_page_cnt, buf_page_shift);
1008		return -EINVAL;
1009	}
1010
1011	ret = mtr_alloc_mtt(hr_dev, mtr, ba_page_shift);
1012	if (ret) {
1013		ibdev_err(ibdev, "failed to alloc mtr mtt, ret = %d.\n", ret);
1014		return ret;
1015	}
1016
1017	/* The caller has its own buffer list and invokes the hns_roce_mtr_map()
1018	 * to finish the MTT configuration.
1019	 */
1020	if (buf_attr->mtt_only) {
1021		mtr->umem = NULL;
1022		mtr->kmem = NULL;
1023		return 0;
1024	}
1025
1026	ret = mtr_alloc_bufs(hr_dev, mtr, buf_attr, udata, user_addr);
1027	if (ret) {
1028		ibdev_err(ibdev, "failed to alloc mtr bufs, ret = %d.\n", ret);
1029		goto err_alloc_mtt;
1030	}
1031
1032	/* Write buffer's dma address to MTT */
1033	ret = mtr_map_bufs(hr_dev, mtr, buf_page_cnt, buf_page_shift);
1034	if (ret)
1035		ibdev_err(ibdev, "failed to map mtr bufs, ret = %d.\n", ret);
1036	else
1037		return 0;
1038
1039	mtr_free_bufs(hr_dev, mtr);
1040err_alloc_mtt:
1041	mtr_free_mtt(hr_dev, mtr);
1042	return ret;
1043}
1044
1045void hns_roce_mtr_destroy(struct hns_roce_dev *hr_dev, struct hns_roce_mtr *mtr)
1046{
1047	/* release multi-hop addressing resource */
1048	hns_roce_hem_list_release(hr_dev, &mtr->hem_list);
1049
1050	/* free buffers */
1051	mtr_free_bufs(hr_dev, mtr);
1052}
1053