18c2ecf20Sopenharmony_ci/*
28c2ecf20Sopenharmony_ci * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved.
38c2ecf20Sopenharmony_ci * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
48c2ecf20Sopenharmony_ci *
58c2ecf20Sopenharmony_ci * This software is available to you under a choice of one of two
68c2ecf20Sopenharmony_ci * licenses.  You may choose to be licensed under the terms of the GNU
78c2ecf20Sopenharmony_ci * General Public License (GPL) Version 2, available from the file
88c2ecf20Sopenharmony_ci * COPYING in the main directory of this source tree, or the
98c2ecf20Sopenharmony_ci * OpenIB.org BSD license below:
108c2ecf20Sopenharmony_ci *
118c2ecf20Sopenharmony_ci *     Redistribution and use in source and binary forms, with or
128c2ecf20Sopenharmony_ci *     without modification, are permitted provided that the following
138c2ecf20Sopenharmony_ci *     conditions are met:
148c2ecf20Sopenharmony_ci *
158c2ecf20Sopenharmony_ci *      - Redistributions of source code must retain the above
168c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
178c2ecf20Sopenharmony_ci *        disclaimer.
188c2ecf20Sopenharmony_ci *
198c2ecf20Sopenharmony_ci *      - Redistributions in binary form must reproduce the above
208c2ecf20Sopenharmony_ci *        copyright notice, this list of conditions and the following
218c2ecf20Sopenharmony_ci *        disclaimer in the documentation and/or other materials
228c2ecf20Sopenharmony_ci *        provided with the distribution.
238c2ecf20Sopenharmony_ci *
248c2ecf20Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
258c2ecf20Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
268c2ecf20Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
278c2ecf20Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
288c2ecf20Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
298c2ecf20Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
308c2ecf20Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
318c2ecf20Sopenharmony_ci * SOFTWARE.
328c2ecf20Sopenharmony_ci */
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_ci#include <linux/slab.h>
358c2ecf20Sopenharmony_ci#include <rdma/ib_user_verbs.h>
368c2ecf20Sopenharmony_ci
378c2ecf20Sopenharmony_ci#include "mlx4_ib.h"
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_cistatic u32 convert_access(int acc)
408c2ecf20Sopenharmony_ci{
418c2ecf20Sopenharmony_ci	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC       : 0) |
428c2ecf20Sopenharmony_ci	       (acc & IB_ACCESS_REMOTE_WRITE  ? MLX4_PERM_REMOTE_WRITE : 0) |
438c2ecf20Sopenharmony_ci	       (acc & IB_ACCESS_REMOTE_READ   ? MLX4_PERM_REMOTE_READ  : 0) |
448c2ecf20Sopenharmony_ci	       (acc & IB_ACCESS_LOCAL_WRITE   ? MLX4_PERM_LOCAL_WRITE  : 0) |
458c2ecf20Sopenharmony_ci	       (acc & IB_ACCESS_MW_BIND	      ? MLX4_PERM_BIND_MW      : 0) |
468c2ecf20Sopenharmony_ci	       MLX4_PERM_LOCAL_READ;
478c2ecf20Sopenharmony_ci}
488c2ecf20Sopenharmony_ci
498c2ecf20Sopenharmony_cistatic enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type)
508c2ecf20Sopenharmony_ci{
518c2ecf20Sopenharmony_ci	switch (type) {
528c2ecf20Sopenharmony_ci	case IB_MW_TYPE_1:	return MLX4_MW_TYPE_1;
538c2ecf20Sopenharmony_ci	case IB_MW_TYPE_2:	return MLX4_MW_TYPE_2;
548c2ecf20Sopenharmony_ci	default:		return -1;
558c2ecf20Sopenharmony_ci	}
568c2ecf20Sopenharmony_ci}
578c2ecf20Sopenharmony_ci
588c2ecf20Sopenharmony_cistruct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc)
598c2ecf20Sopenharmony_ci{
608c2ecf20Sopenharmony_ci	struct mlx4_ib_mr *mr;
618c2ecf20Sopenharmony_ci	int err;
628c2ecf20Sopenharmony_ci
638c2ecf20Sopenharmony_ci	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
648c2ecf20Sopenharmony_ci	if (!mr)
658c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
668c2ecf20Sopenharmony_ci
678c2ecf20Sopenharmony_ci	err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0,
688c2ecf20Sopenharmony_ci			    ~0ull, convert_access(acc), 0, 0, &mr->mmr);
698c2ecf20Sopenharmony_ci	if (err)
708c2ecf20Sopenharmony_ci		goto err_free;
718c2ecf20Sopenharmony_ci
728c2ecf20Sopenharmony_ci	err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr);
738c2ecf20Sopenharmony_ci	if (err)
748c2ecf20Sopenharmony_ci		goto err_mr;
758c2ecf20Sopenharmony_ci
768c2ecf20Sopenharmony_ci	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
778c2ecf20Sopenharmony_ci	mr->umem = NULL;
788c2ecf20Sopenharmony_ci
798c2ecf20Sopenharmony_ci	return &mr->ibmr;
808c2ecf20Sopenharmony_ci
818c2ecf20Sopenharmony_cierr_mr:
828c2ecf20Sopenharmony_ci	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
838c2ecf20Sopenharmony_ci
848c2ecf20Sopenharmony_cierr_free:
858c2ecf20Sopenharmony_ci	kfree(mr);
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	return ERR_PTR(err);
888c2ecf20Sopenharmony_ci}
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_cienum {
918c2ecf20Sopenharmony_ci	MLX4_MAX_MTT_SHIFT = 31
928c2ecf20Sopenharmony_ci};
938c2ecf20Sopenharmony_ci
948c2ecf20Sopenharmony_cistatic int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev,
958c2ecf20Sopenharmony_ci					struct mlx4_mtt *mtt,
968c2ecf20Sopenharmony_ci					u64 mtt_size, u64 mtt_shift, u64 len,
978c2ecf20Sopenharmony_ci					u64 cur_start_addr, u64 *pages,
988c2ecf20Sopenharmony_ci					int *start_index, int *npages)
998c2ecf20Sopenharmony_ci{
1008c2ecf20Sopenharmony_ci	u64 cur_end_addr = cur_start_addr + len;
1018c2ecf20Sopenharmony_ci	u64 cur_end_addr_aligned = 0;
1028c2ecf20Sopenharmony_ci	u64 mtt_entries;
1038c2ecf20Sopenharmony_ci	int err = 0;
1048c2ecf20Sopenharmony_ci	int k;
1058c2ecf20Sopenharmony_ci
1068c2ecf20Sopenharmony_ci	len += (cur_start_addr & (mtt_size - 1ULL));
1078c2ecf20Sopenharmony_ci	cur_end_addr_aligned = round_up(cur_end_addr, mtt_size);
1088c2ecf20Sopenharmony_ci	len += (cur_end_addr_aligned - cur_end_addr);
1098c2ecf20Sopenharmony_ci	if (len & (mtt_size - 1ULL)) {
1108c2ecf20Sopenharmony_ci		pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n",
1118c2ecf20Sopenharmony_ci			len, mtt_size);
1128c2ecf20Sopenharmony_ci		return -EINVAL;
1138c2ecf20Sopenharmony_ci	}
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	mtt_entries = (len >> mtt_shift);
1168c2ecf20Sopenharmony_ci
1178c2ecf20Sopenharmony_ci	/*
1188c2ecf20Sopenharmony_ci	 * Align the MTT start address to the mtt_size.
1198c2ecf20Sopenharmony_ci	 * Required to handle cases when the MR starts in the middle of an MTT
1208c2ecf20Sopenharmony_ci	 * record. Was not required in old code since the physical addresses
1218c2ecf20Sopenharmony_ci	 * provided by the dma subsystem were page aligned, which was also the
1228c2ecf20Sopenharmony_ci	 * MTT size.
1238c2ecf20Sopenharmony_ci	 */
1248c2ecf20Sopenharmony_ci	cur_start_addr = round_down(cur_start_addr, mtt_size);
1258c2ecf20Sopenharmony_ci	/* A new block is started ... */
1268c2ecf20Sopenharmony_ci	for (k = 0; k < mtt_entries; ++k) {
1278c2ecf20Sopenharmony_ci		pages[*npages] = cur_start_addr + (mtt_size * k);
1288c2ecf20Sopenharmony_ci		(*npages)++;
1298c2ecf20Sopenharmony_ci		/*
1308c2ecf20Sopenharmony_ci		 * Be friendly to mlx4_write_mtt() and pass it chunks of
1318c2ecf20Sopenharmony_ci		 * appropriate size.
1328c2ecf20Sopenharmony_ci		 */
1338c2ecf20Sopenharmony_ci		if (*npages == PAGE_SIZE / sizeof(u64)) {
1348c2ecf20Sopenharmony_ci			err = mlx4_write_mtt(dev->dev, mtt, *start_index,
1358c2ecf20Sopenharmony_ci					     *npages, pages);
1368c2ecf20Sopenharmony_ci			if (err)
1378c2ecf20Sopenharmony_ci				return err;
1388c2ecf20Sopenharmony_ci
1398c2ecf20Sopenharmony_ci			(*start_index) += *npages;
1408c2ecf20Sopenharmony_ci			*npages = 0;
1418c2ecf20Sopenharmony_ci		}
1428c2ecf20Sopenharmony_ci	}
1438c2ecf20Sopenharmony_ci
1448c2ecf20Sopenharmony_ci	return 0;
1458c2ecf20Sopenharmony_ci}
1468c2ecf20Sopenharmony_ci
1478c2ecf20Sopenharmony_cistatic inline u64 alignment_of(u64 ptr)
1488c2ecf20Sopenharmony_ci{
1498c2ecf20Sopenharmony_ci	return ilog2(ptr & (~(ptr - 1)));
1508c2ecf20Sopenharmony_ci}
1518c2ecf20Sopenharmony_ci
1528c2ecf20Sopenharmony_cistatic int mlx4_ib_umem_calc_block_mtt(u64 next_block_start,
1538c2ecf20Sopenharmony_ci				       u64 current_block_end,
1548c2ecf20Sopenharmony_ci				       u64 block_shift)
1558c2ecf20Sopenharmony_ci{
1568c2ecf20Sopenharmony_ci	/* Check whether the alignment of the new block is aligned as well as
1578c2ecf20Sopenharmony_ci	 * the previous block.
1588c2ecf20Sopenharmony_ci	 * Block address must start with zeros till size of entity_size.
1598c2ecf20Sopenharmony_ci	 */
1608c2ecf20Sopenharmony_ci	if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0)
1618c2ecf20Sopenharmony_ci		/*
1628c2ecf20Sopenharmony_ci		 * It is not as well aligned as the previous block-reduce the
1638c2ecf20Sopenharmony_ci		 * mtt size accordingly. Here we take the last right bit which
1648c2ecf20Sopenharmony_ci		 * is 1.
1658c2ecf20Sopenharmony_ci		 */
1668c2ecf20Sopenharmony_ci		block_shift = alignment_of(next_block_start);
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_ci	/*
1698c2ecf20Sopenharmony_ci	 * Check whether the alignment of the end of previous block - is it
1708c2ecf20Sopenharmony_ci	 * aligned as well as the start of the block
1718c2ecf20Sopenharmony_ci	 */
1728c2ecf20Sopenharmony_ci	if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0)
1738c2ecf20Sopenharmony_ci		/*
1748c2ecf20Sopenharmony_ci		 * It is not as well aligned as the start of the block -
1758c2ecf20Sopenharmony_ci		 * reduce the mtt size accordingly.
1768c2ecf20Sopenharmony_ci		 */
1778c2ecf20Sopenharmony_ci		block_shift = alignment_of(current_block_end);
1788c2ecf20Sopenharmony_ci
1798c2ecf20Sopenharmony_ci	return block_shift;
1808c2ecf20Sopenharmony_ci}
1818c2ecf20Sopenharmony_ci
1828c2ecf20Sopenharmony_ciint mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt,
1838c2ecf20Sopenharmony_ci			   struct ib_umem *umem)
1848c2ecf20Sopenharmony_ci{
1858c2ecf20Sopenharmony_ci	u64 *pages;
1868c2ecf20Sopenharmony_ci	u64 len = 0;
1878c2ecf20Sopenharmony_ci	int err = 0;
1888c2ecf20Sopenharmony_ci	u64 mtt_size;
1898c2ecf20Sopenharmony_ci	u64 cur_start_addr = 0;
1908c2ecf20Sopenharmony_ci	u64 mtt_shift;
1918c2ecf20Sopenharmony_ci	int start_index = 0;
1928c2ecf20Sopenharmony_ci	int npages = 0;
1938c2ecf20Sopenharmony_ci	struct scatterlist *sg;
1948c2ecf20Sopenharmony_ci	int i;
1958c2ecf20Sopenharmony_ci
1968c2ecf20Sopenharmony_ci	pages = (u64 *) __get_free_page(GFP_KERNEL);
1978c2ecf20Sopenharmony_ci	if (!pages)
1988c2ecf20Sopenharmony_ci		return -ENOMEM;
1998c2ecf20Sopenharmony_ci
2008c2ecf20Sopenharmony_ci	mtt_shift = mtt->page_shift;
2018c2ecf20Sopenharmony_ci	mtt_size = 1ULL << mtt_shift;
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_ci	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
2048c2ecf20Sopenharmony_ci		if (cur_start_addr + len == sg_dma_address(sg)) {
2058c2ecf20Sopenharmony_ci			/* still the same block */
2068c2ecf20Sopenharmony_ci			len += sg_dma_len(sg);
2078c2ecf20Sopenharmony_ci			continue;
2088c2ecf20Sopenharmony_ci		}
2098c2ecf20Sopenharmony_ci		/*
2108c2ecf20Sopenharmony_ci		 * A new block is started ...
2118c2ecf20Sopenharmony_ci		 * If len is malaligned, write an extra mtt entry to cover the
2128c2ecf20Sopenharmony_ci		 * misaligned area (round up the division)
2138c2ecf20Sopenharmony_ci		 */
2148c2ecf20Sopenharmony_ci		err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
2158c2ecf20Sopenharmony_ci						   mtt_shift, len,
2168c2ecf20Sopenharmony_ci						   cur_start_addr,
2178c2ecf20Sopenharmony_ci						   pages, &start_index,
2188c2ecf20Sopenharmony_ci						   &npages);
2198c2ecf20Sopenharmony_ci		if (err)
2208c2ecf20Sopenharmony_ci			goto out;
2218c2ecf20Sopenharmony_ci
2228c2ecf20Sopenharmony_ci		cur_start_addr = sg_dma_address(sg);
2238c2ecf20Sopenharmony_ci		len = sg_dma_len(sg);
2248c2ecf20Sopenharmony_ci	}
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	/* Handle the last block */
2278c2ecf20Sopenharmony_ci	if (len > 0) {
2288c2ecf20Sopenharmony_ci		/*
2298c2ecf20Sopenharmony_ci		 * If len is malaligned, write an extra mtt entry to cover
2308c2ecf20Sopenharmony_ci		 * the misaligned area (round up the division)
2318c2ecf20Sopenharmony_ci		 */
2328c2ecf20Sopenharmony_ci		err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size,
2338c2ecf20Sopenharmony_ci						   mtt_shift, len,
2348c2ecf20Sopenharmony_ci						   cur_start_addr, pages,
2358c2ecf20Sopenharmony_ci						   &start_index, &npages);
2368c2ecf20Sopenharmony_ci		if (err)
2378c2ecf20Sopenharmony_ci			goto out;
2388c2ecf20Sopenharmony_ci	}
2398c2ecf20Sopenharmony_ci
2408c2ecf20Sopenharmony_ci	if (npages)
2418c2ecf20Sopenharmony_ci		err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages);
2428c2ecf20Sopenharmony_ci
2438c2ecf20Sopenharmony_ciout:
2448c2ecf20Sopenharmony_ci	free_page((unsigned long) pages);
2458c2ecf20Sopenharmony_ci	return err;
2468c2ecf20Sopenharmony_ci}
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci/*
2498c2ecf20Sopenharmony_ci * Calculate optimal mtt size based on contiguous pages.
2508c2ecf20Sopenharmony_ci * Function will return also the number of pages that are not aligned to the
2518c2ecf20Sopenharmony_ci * calculated mtt_size to be added to total number of pages. For that we should
2528c2ecf20Sopenharmony_ci * check the first chunk length & last chunk length and if not aligned to
2538c2ecf20Sopenharmony_ci * mtt_size we should increment the non_aligned_pages number. All chunks in the
2548c2ecf20Sopenharmony_ci * middle already handled as part of mtt shift calculation for both their start
2558c2ecf20Sopenharmony_ci * & end addresses.
2568c2ecf20Sopenharmony_ci */
2578c2ecf20Sopenharmony_ciint mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va,
2588c2ecf20Sopenharmony_ci				       int *num_of_mtts)
2598c2ecf20Sopenharmony_ci{
2608c2ecf20Sopenharmony_ci	u64 block_shift = MLX4_MAX_MTT_SHIFT;
2618c2ecf20Sopenharmony_ci	u64 min_shift = PAGE_SHIFT;
2628c2ecf20Sopenharmony_ci	u64 last_block_aligned_end = 0;
2638c2ecf20Sopenharmony_ci	u64 current_block_start = 0;
2648c2ecf20Sopenharmony_ci	u64 first_block_start = 0;
2658c2ecf20Sopenharmony_ci	u64 current_block_len = 0;
2668c2ecf20Sopenharmony_ci	u64 last_block_end = 0;
2678c2ecf20Sopenharmony_ci	struct scatterlist *sg;
2688c2ecf20Sopenharmony_ci	u64 current_block_end;
2698c2ecf20Sopenharmony_ci	u64 misalignment_bits;
2708c2ecf20Sopenharmony_ci	u64 next_block_start;
2718c2ecf20Sopenharmony_ci	u64 total_len = 0;
2728c2ecf20Sopenharmony_ci	int i;
2738c2ecf20Sopenharmony_ci
2748c2ecf20Sopenharmony_ci	*num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE);
2758c2ecf20Sopenharmony_ci
2768c2ecf20Sopenharmony_ci	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) {
2778c2ecf20Sopenharmony_ci		/*
2788c2ecf20Sopenharmony_ci		 * Initialization - save the first chunk start as the
2798c2ecf20Sopenharmony_ci		 * current_block_start - block means contiguous pages.
2808c2ecf20Sopenharmony_ci		 */
2818c2ecf20Sopenharmony_ci		if (current_block_len == 0 && current_block_start == 0) {
2828c2ecf20Sopenharmony_ci			current_block_start = sg_dma_address(sg);
2838c2ecf20Sopenharmony_ci			first_block_start = current_block_start;
2848c2ecf20Sopenharmony_ci			/*
2858c2ecf20Sopenharmony_ci			 * Find the bits that are different between the physical
2868c2ecf20Sopenharmony_ci			 * address and the virtual address for the start of the
2878c2ecf20Sopenharmony_ci			 * MR.
2888c2ecf20Sopenharmony_ci			 * umem_get aligned the start_va to a page boundary.
2898c2ecf20Sopenharmony_ci			 * Therefore, we need to align the start va to the same
2908c2ecf20Sopenharmony_ci			 * boundary.
2918c2ecf20Sopenharmony_ci			 * misalignment_bits is needed to handle the  case of a
2928c2ecf20Sopenharmony_ci			 * single memory region. In this case, the rest of the
2938c2ecf20Sopenharmony_ci			 * logic will not reduce the block size.  If we use a
2948c2ecf20Sopenharmony_ci			 * block size which is bigger than the alignment of the
2958c2ecf20Sopenharmony_ci			 * misalignment bits, we might use the virtual page
2968c2ecf20Sopenharmony_ci			 * number instead of the physical page number, resulting
2978c2ecf20Sopenharmony_ci			 * in access to the wrong data.
2988c2ecf20Sopenharmony_ci			 */
2998c2ecf20Sopenharmony_ci			misalignment_bits =
3008c2ecf20Sopenharmony_ci				(start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^
3018c2ecf20Sopenharmony_ci				current_block_start;
3028c2ecf20Sopenharmony_ci			block_shift = min(alignment_of(misalignment_bits),
3038c2ecf20Sopenharmony_ci					  block_shift);
3048c2ecf20Sopenharmony_ci		}
3058c2ecf20Sopenharmony_ci
3068c2ecf20Sopenharmony_ci		/*
3078c2ecf20Sopenharmony_ci		 * Go over the scatter entries and check if they continue the
3088c2ecf20Sopenharmony_ci		 * previous scatter entry.
3098c2ecf20Sopenharmony_ci		 */
3108c2ecf20Sopenharmony_ci		next_block_start = sg_dma_address(sg);
3118c2ecf20Sopenharmony_ci		current_block_end = current_block_start	+ current_block_len;
3128c2ecf20Sopenharmony_ci		/* If we have a split (non-contig.) between two blocks */
3138c2ecf20Sopenharmony_ci		if (current_block_end != next_block_start) {
3148c2ecf20Sopenharmony_ci			block_shift = mlx4_ib_umem_calc_block_mtt
3158c2ecf20Sopenharmony_ci					(next_block_start,
3168c2ecf20Sopenharmony_ci					 current_block_end,
3178c2ecf20Sopenharmony_ci					 block_shift);
3188c2ecf20Sopenharmony_ci
3198c2ecf20Sopenharmony_ci			/*
3208c2ecf20Sopenharmony_ci			 * If we reached the minimum shift for 4k page we stop
3218c2ecf20Sopenharmony_ci			 * the loop.
3228c2ecf20Sopenharmony_ci			 */
3238c2ecf20Sopenharmony_ci			if (block_shift <= min_shift)
3248c2ecf20Sopenharmony_ci				goto end;
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_ci			/*
3278c2ecf20Sopenharmony_ci			 * If not saved yet we are in first block - we save the
3288c2ecf20Sopenharmony_ci			 * length of first block to calculate the
3298c2ecf20Sopenharmony_ci			 * non_aligned_pages number at the end.
3308c2ecf20Sopenharmony_ci			 */
3318c2ecf20Sopenharmony_ci			total_len += current_block_len;
3328c2ecf20Sopenharmony_ci
3338c2ecf20Sopenharmony_ci			/* Start a new block */
3348c2ecf20Sopenharmony_ci			current_block_start = next_block_start;
3358c2ecf20Sopenharmony_ci			current_block_len = sg_dma_len(sg);
3368c2ecf20Sopenharmony_ci			continue;
3378c2ecf20Sopenharmony_ci		}
3388c2ecf20Sopenharmony_ci		/* The scatter entry is another part of the current block,
3398c2ecf20Sopenharmony_ci		 * increase the block size.
3408c2ecf20Sopenharmony_ci		 * An entry in the scatter can be larger than 4k (page) as of
3418c2ecf20Sopenharmony_ci		 * dma mapping which merge some blocks together.
3428c2ecf20Sopenharmony_ci		 */
3438c2ecf20Sopenharmony_ci		current_block_len += sg_dma_len(sg);
3448c2ecf20Sopenharmony_ci	}
3458c2ecf20Sopenharmony_ci
3468c2ecf20Sopenharmony_ci	/* Account for the last block in the total len */
3478c2ecf20Sopenharmony_ci	total_len += current_block_len;
3488c2ecf20Sopenharmony_ci	/* Add to the first block the misalignment that it suffers from. */
3498c2ecf20Sopenharmony_ci	total_len += (first_block_start & ((1ULL << block_shift) - 1ULL));
3508c2ecf20Sopenharmony_ci	last_block_end = current_block_start + current_block_len;
3518c2ecf20Sopenharmony_ci	last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift);
3528c2ecf20Sopenharmony_ci	total_len += (last_block_aligned_end - last_block_end);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci	if (total_len & ((1ULL << block_shift) - 1ULL))
3558c2ecf20Sopenharmony_ci		pr_warn("misaligned total length detected (%llu, %llu)!",
3568c2ecf20Sopenharmony_ci			total_len, block_shift);
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci	*num_of_mtts = total_len >> block_shift;
3598c2ecf20Sopenharmony_ciend:
3608c2ecf20Sopenharmony_ci	if (block_shift < min_shift) {
3618c2ecf20Sopenharmony_ci		/*
3628c2ecf20Sopenharmony_ci		 * If shift is less than the min we set a warning and return the
3638c2ecf20Sopenharmony_ci		 * min shift.
3648c2ecf20Sopenharmony_ci		 */
3658c2ecf20Sopenharmony_ci		pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift);
3668c2ecf20Sopenharmony_ci
3678c2ecf20Sopenharmony_ci		block_shift = min_shift;
3688c2ecf20Sopenharmony_ci	}
3698c2ecf20Sopenharmony_ci	return block_shift;
3708c2ecf20Sopenharmony_ci}
3718c2ecf20Sopenharmony_ci
3728c2ecf20Sopenharmony_cistatic struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start,
3738c2ecf20Sopenharmony_ci					u64 length, int access_flags)
3748c2ecf20Sopenharmony_ci{
3758c2ecf20Sopenharmony_ci	/*
3768c2ecf20Sopenharmony_ci	 * Force registering the memory as writable if the underlying pages
3778c2ecf20Sopenharmony_ci	 * are writable.  This is so rereg can change the access permissions
3788c2ecf20Sopenharmony_ci	 * from readable to writable without having to run through ib_umem_get
3798c2ecf20Sopenharmony_ci	 * again
3808c2ecf20Sopenharmony_ci	 */
3818c2ecf20Sopenharmony_ci	if (!ib_access_writable(access_flags)) {
3828c2ecf20Sopenharmony_ci		unsigned long untagged_start = untagged_addr(start);
3838c2ecf20Sopenharmony_ci		struct vm_area_struct *vma;
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci		mmap_read_lock(current->mm);
3868c2ecf20Sopenharmony_ci		/*
3878c2ecf20Sopenharmony_ci		 * FIXME: Ideally this would iterate over all the vmas that
3888c2ecf20Sopenharmony_ci		 * cover the memory, but for now it requires a single vma to
3898c2ecf20Sopenharmony_ci		 * entirely cover the MR to support RO mappings.
3908c2ecf20Sopenharmony_ci		 */
3918c2ecf20Sopenharmony_ci		vma = find_vma(current->mm, untagged_start);
3928c2ecf20Sopenharmony_ci		if (vma && vma->vm_end >= untagged_start + length &&
3938c2ecf20Sopenharmony_ci		    vma->vm_start <= untagged_start) {
3948c2ecf20Sopenharmony_ci			if (vma->vm_flags & VM_WRITE)
3958c2ecf20Sopenharmony_ci				access_flags |= IB_ACCESS_LOCAL_WRITE;
3968c2ecf20Sopenharmony_ci		} else {
3978c2ecf20Sopenharmony_ci			access_flags |= IB_ACCESS_LOCAL_WRITE;
3988c2ecf20Sopenharmony_ci		}
3998c2ecf20Sopenharmony_ci
4008c2ecf20Sopenharmony_ci		mmap_read_unlock(current->mm);
4018c2ecf20Sopenharmony_ci	}
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	return ib_umem_get(device, start, length, access_flags);
4048c2ecf20Sopenharmony_ci}
4058c2ecf20Sopenharmony_ci
4068c2ecf20Sopenharmony_cistruct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
4078c2ecf20Sopenharmony_ci				  u64 virt_addr, int access_flags,
4088c2ecf20Sopenharmony_ci				  struct ib_udata *udata)
4098c2ecf20Sopenharmony_ci{
4108c2ecf20Sopenharmony_ci	struct mlx4_ib_dev *dev = to_mdev(pd->device);
4118c2ecf20Sopenharmony_ci	struct mlx4_ib_mr *mr;
4128c2ecf20Sopenharmony_ci	int shift;
4138c2ecf20Sopenharmony_ci	int err;
4148c2ecf20Sopenharmony_ci	int n;
4158c2ecf20Sopenharmony_ci
4168c2ecf20Sopenharmony_ci	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
4178c2ecf20Sopenharmony_ci	if (!mr)
4188c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
4198c2ecf20Sopenharmony_ci
4208c2ecf20Sopenharmony_ci	mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags);
4218c2ecf20Sopenharmony_ci	if (IS_ERR(mr->umem)) {
4228c2ecf20Sopenharmony_ci		err = PTR_ERR(mr->umem);
4238c2ecf20Sopenharmony_ci		goto err_free;
4248c2ecf20Sopenharmony_ci	}
4258c2ecf20Sopenharmony_ci
4268c2ecf20Sopenharmony_ci	shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n);
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length,
4298c2ecf20Sopenharmony_ci			    convert_access(access_flags), n, shift, &mr->mmr);
4308c2ecf20Sopenharmony_ci	if (err)
4318c2ecf20Sopenharmony_ci		goto err_umem;
4328c2ecf20Sopenharmony_ci
4338c2ecf20Sopenharmony_ci	err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem);
4348c2ecf20Sopenharmony_ci	if (err)
4358c2ecf20Sopenharmony_ci		goto err_mr;
4368c2ecf20Sopenharmony_ci
4378c2ecf20Sopenharmony_ci	err = mlx4_mr_enable(dev->dev, &mr->mmr);
4388c2ecf20Sopenharmony_ci	if (err)
4398c2ecf20Sopenharmony_ci		goto err_mr;
4408c2ecf20Sopenharmony_ci
4418c2ecf20Sopenharmony_ci	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
4428c2ecf20Sopenharmony_ci	mr->ibmr.page_size = 1U << shift;
4438c2ecf20Sopenharmony_ci
4448c2ecf20Sopenharmony_ci	return &mr->ibmr;
4458c2ecf20Sopenharmony_ci
4468c2ecf20Sopenharmony_cierr_mr:
4478c2ecf20Sopenharmony_ci	(void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr);
4488c2ecf20Sopenharmony_ci
4498c2ecf20Sopenharmony_cierr_umem:
4508c2ecf20Sopenharmony_ci	ib_umem_release(mr->umem);
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_cierr_free:
4538c2ecf20Sopenharmony_ci	kfree(mr);
4548c2ecf20Sopenharmony_ci
4558c2ecf20Sopenharmony_ci	return ERR_PTR(err);
4568c2ecf20Sopenharmony_ci}
4578c2ecf20Sopenharmony_ci
4588c2ecf20Sopenharmony_ciint mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags,
4598c2ecf20Sopenharmony_ci			  u64 start, u64 length, u64 virt_addr,
4608c2ecf20Sopenharmony_ci			  int mr_access_flags, struct ib_pd *pd,
4618c2ecf20Sopenharmony_ci			  struct ib_udata *udata)
4628c2ecf20Sopenharmony_ci{
4638c2ecf20Sopenharmony_ci	struct mlx4_ib_dev *dev = to_mdev(mr->device);
4648c2ecf20Sopenharmony_ci	struct mlx4_ib_mr *mmr = to_mmr(mr);
4658c2ecf20Sopenharmony_ci	struct mlx4_mpt_entry *mpt_entry;
4668c2ecf20Sopenharmony_ci	struct mlx4_mpt_entry **pmpt_entry = &mpt_entry;
4678c2ecf20Sopenharmony_ci	int err;
4688c2ecf20Sopenharmony_ci
4698c2ecf20Sopenharmony_ci	/* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs,
4708c2ecf20Sopenharmony_ci	 * we assume that the calls can't run concurrently. Otherwise, a
4718c2ecf20Sopenharmony_ci	 * race exists.
4728c2ecf20Sopenharmony_ci	 */
4738c2ecf20Sopenharmony_ci	err =  mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry);
4748c2ecf20Sopenharmony_ci
4758c2ecf20Sopenharmony_ci	if (err)
4768c2ecf20Sopenharmony_ci		return err;
4778c2ecf20Sopenharmony_ci
4788c2ecf20Sopenharmony_ci	if (flags & IB_MR_REREG_PD) {
4798c2ecf20Sopenharmony_ci		err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry,
4808c2ecf20Sopenharmony_ci					   to_mpd(pd)->pdn);
4818c2ecf20Sopenharmony_ci
4828c2ecf20Sopenharmony_ci		if (err)
4838c2ecf20Sopenharmony_ci			goto release_mpt_entry;
4848c2ecf20Sopenharmony_ci	}
4858c2ecf20Sopenharmony_ci
4868c2ecf20Sopenharmony_ci	if (flags & IB_MR_REREG_ACCESS) {
4878c2ecf20Sopenharmony_ci		if (ib_access_writable(mr_access_flags) &&
4888c2ecf20Sopenharmony_ci		    !mmr->umem->writable) {
4898c2ecf20Sopenharmony_ci			err = -EPERM;
4908c2ecf20Sopenharmony_ci			goto release_mpt_entry;
4918c2ecf20Sopenharmony_ci		}
4928c2ecf20Sopenharmony_ci
4938c2ecf20Sopenharmony_ci		err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry,
4948c2ecf20Sopenharmony_ci					       convert_access(mr_access_flags));
4958c2ecf20Sopenharmony_ci
4968c2ecf20Sopenharmony_ci		if (err)
4978c2ecf20Sopenharmony_ci			goto release_mpt_entry;
4988c2ecf20Sopenharmony_ci	}
4998c2ecf20Sopenharmony_ci
5008c2ecf20Sopenharmony_ci	if (flags & IB_MR_REREG_TRANS) {
5018c2ecf20Sopenharmony_ci		int shift;
5028c2ecf20Sopenharmony_ci		int n;
5038c2ecf20Sopenharmony_ci
5048c2ecf20Sopenharmony_ci		mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
5058c2ecf20Sopenharmony_ci		ib_umem_release(mmr->umem);
5068c2ecf20Sopenharmony_ci		mmr->umem = mlx4_get_umem_mr(mr->device, start, length,
5078c2ecf20Sopenharmony_ci					     mr_access_flags);
5088c2ecf20Sopenharmony_ci		if (IS_ERR(mmr->umem)) {
5098c2ecf20Sopenharmony_ci			err = PTR_ERR(mmr->umem);
5108c2ecf20Sopenharmony_ci			/* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */
5118c2ecf20Sopenharmony_ci			mmr->umem = NULL;
5128c2ecf20Sopenharmony_ci			goto release_mpt_entry;
5138c2ecf20Sopenharmony_ci		}
5148c2ecf20Sopenharmony_ci		n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE);
5158c2ecf20Sopenharmony_ci		shift = PAGE_SHIFT;
5168c2ecf20Sopenharmony_ci
5178c2ecf20Sopenharmony_ci		err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr,
5188c2ecf20Sopenharmony_ci					      virt_addr, length, n, shift,
5198c2ecf20Sopenharmony_ci					      *pmpt_entry);
5208c2ecf20Sopenharmony_ci		if (err) {
5218c2ecf20Sopenharmony_ci			ib_umem_release(mmr->umem);
5228c2ecf20Sopenharmony_ci			goto release_mpt_entry;
5238c2ecf20Sopenharmony_ci		}
5248c2ecf20Sopenharmony_ci		mmr->mmr.iova       = virt_addr;
5258c2ecf20Sopenharmony_ci		mmr->mmr.size       = length;
5268c2ecf20Sopenharmony_ci
5278c2ecf20Sopenharmony_ci		err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem);
5288c2ecf20Sopenharmony_ci		if (err) {
5298c2ecf20Sopenharmony_ci			mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr);
5308c2ecf20Sopenharmony_ci			ib_umem_release(mmr->umem);
5318c2ecf20Sopenharmony_ci			goto release_mpt_entry;
5328c2ecf20Sopenharmony_ci		}
5338c2ecf20Sopenharmony_ci	}
5348c2ecf20Sopenharmony_ci
5358c2ecf20Sopenharmony_ci	/* If we couldn't transfer the MR to the HCA, just remember to
5368c2ecf20Sopenharmony_ci	 * return a failure. But dereg_mr will free the resources.
5378c2ecf20Sopenharmony_ci	 */
5388c2ecf20Sopenharmony_ci	err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry);
5398c2ecf20Sopenharmony_ci	if (!err && flags & IB_MR_REREG_ACCESS)
5408c2ecf20Sopenharmony_ci		mmr->mmr.access = mr_access_flags;
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_cirelease_mpt_entry:
5438c2ecf20Sopenharmony_ci	mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry);
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	return err;
5468c2ecf20Sopenharmony_ci}
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_cistatic int
5498c2ecf20Sopenharmony_cimlx4_alloc_priv_pages(struct ib_device *device,
5508c2ecf20Sopenharmony_ci		      struct mlx4_ib_mr *mr,
5518c2ecf20Sopenharmony_ci		      int max_pages)
5528c2ecf20Sopenharmony_ci{
5538c2ecf20Sopenharmony_ci	int ret;
5548c2ecf20Sopenharmony_ci
5558c2ecf20Sopenharmony_ci	/* Ensure that size is aligned to DMA cacheline
5568c2ecf20Sopenharmony_ci	 * requirements.
5578c2ecf20Sopenharmony_ci	 * max_pages is limited to MLX4_MAX_FAST_REG_PAGES
5588c2ecf20Sopenharmony_ci	 * so page_map_size will never cross PAGE_SIZE.
5598c2ecf20Sopenharmony_ci	 */
5608c2ecf20Sopenharmony_ci	mr->page_map_size = roundup(max_pages * sizeof(u64),
5618c2ecf20Sopenharmony_ci				    MLX4_MR_PAGES_ALIGN);
5628c2ecf20Sopenharmony_ci
5638c2ecf20Sopenharmony_ci	/* Prevent cross page boundary allocation. */
5648c2ecf20Sopenharmony_ci	mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL);
5658c2ecf20Sopenharmony_ci	if (!mr->pages)
5668c2ecf20Sopenharmony_ci		return -ENOMEM;
5678c2ecf20Sopenharmony_ci
5688c2ecf20Sopenharmony_ci	mr->page_map = dma_map_single(device->dev.parent, mr->pages,
5698c2ecf20Sopenharmony_ci				      mr->page_map_size, DMA_TO_DEVICE);
5708c2ecf20Sopenharmony_ci
5718c2ecf20Sopenharmony_ci	if (dma_mapping_error(device->dev.parent, mr->page_map)) {
5728c2ecf20Sopenharmony_ci		ret = -ENOMEM;
5738c2ecf20Sopenharmony_ci		goto err;
5748c2ecf20Sopenharmony_ci	}
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	return 0;
5778c2ecf20Sopenharmony_ci
5788c2ecf20Sopenharmony_cierr:
5798c2ecf20Sopenharmony_ci	free_page((unsigned long)mr->pages);
5808c2ecf20Sopenharmony_ci	return ret;
5818c2ecf20Sopenharmony_ci}
5828c2ecf20Sopenharmony_ci
5838c2ecf20Sopenharmony_cistatic void
5848c2ecf20Sopenharmony_cimlx4_free_priv_pages(struct mlx4_ib_mr *mr)
5858c2ecf20Sopenharmony_ci{
5868c2ecf20Sopenharmony_ci	if (mr->pages) {
5878c2ecf20Sopenharmony_ci		struct ib_device *device = mr->ibmr.device;
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci		dma_unmap_single(device->dev.parent, mr->page_map,
5908c2ecf20Sopenharmony_ci				 mr->page_map_size, DMA_TO_DEVICE);
5918c2ecf20Sopenharmony_ci		free_page((unsigned long)mr->pages);
5928c2ecf20Sopenharmony_ci		mr->pages = NULL;
5938c2ecf20Sopenharmony_ci	}
5948c2ecf20Sopenharmony_ci}
5958c2ecf20Sopenharmony_ci
5968c2ecf20Sopenharmony_ciint mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
5978c2ecf20Sopenharmony_ci{
5988c2ecf20Sopenharmony_ci	struct mlx4_ib_mr *mr = to_mmr(ibmr);
5998c2ecf20Sopenharmony_ci	int ret;
6008c2ecf20Sopenharmony_ci
6018c2ecf20Sopenharmony_ci	mlx4_free_priv_pages(mr);
6028c2ecf20Sopenharmony_ci
6038c2ecf20Sopenharmony_ci	ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr);
6048c2ecf20Sopenharmony_ci	if (ret)
6058c2ecf20Sopenharmony_ci		return ret;
6068c2ecf20Sopenharmony_ci	if (mr->umem)
6078c2ecf20Sopenharmony_ci		ib_umem_release(mr->umem);
6088c2ecf20Sopenharmony_ci	kfree(mr);
6098c2ecf20Sopenharmony_ci
6108c2ecf20Sopenharmony_ci	return 0;
6118c2ecf20Sopenharmony_ci}
6128c2ecf20Sopenharmony_ci
6138c2ecf20Sopenharmony_ciint mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata)
6148c2ecf20Sopenharmony_ci{
6158c2ecf20Sopenharmony_ci	struct mlx4_ib_dev *dev = to_mdev(ibmw->device);
6168c2ecf20Sopenharmony_ci	struct mlx4_ib_mw *mw = to_mmw(ibmw);
6178c2ecf20Sopenharmony_ci	int err;
6188c2ecf20Sopenharmony_ci
6198c2ecf20Sopenharmony_ci	err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn,
6208c2ecf20Sopenharmony_ci			    to_mlx4_type(ibmw->type), &mw->mmw);
6218c2ecf20Sopenharmony_ci	if (err)
6228c2ecf20Sopenharmony_ci		return err;
6238c2ecf20Sopenharmony_ci
6248c2ecf20Sopenharmony_ci	err = mlx4_mw_enable(dev->dev, &mw->mmw);
6258c2ecf20Sopenharmony_ci	if (err)
6268c2ecf20Sopenharmony_ci		goto err_mw;
6278c2ecf20Sopenharmony_ci
6288c2ecf20Sopenharmony_ci	ibmw->rkey = mw->mmw.key;
6298c2ecf20Sopenharmony_ci	return 0;
6308c2ecf20Sopenharmony_ci
6318c2ecf20Sopenharmony_cierr_mw:
6328c2ecf20Sopenharmony_ci	mlx4_mw_free(dev->dev, &mw->mmw);
6338c2ecf20Sopenharmony_ci	return err;
6348c2ecf20Sopenharmony_ci}
6358c2ecf20Sopenharmony_ci
6368c2ecf20Sopenharmony_ciint mlx4_ib_dealloc_mw(struct ib_mw *ibmw)
6378c2ecf20Sopenharmony_ci{
6388c2ecf20Sopenharmony_ci	struct mlx4_ib_mw *mw = to_mmw(ibmw);
6398c2ecf20Sopenharmony_ci
6408c2ecf20Sopenharmony_ci	mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw);
6418c2ecf20Sopenharmony_ci	return 0;
6428c2ecf20Sopenharmony_ci}
6438c2ecf20Sopenharmony_ci
6448c2ecf20Sopenharmony_cistruct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
6458c2ecf20Sopenharmony_ci			       u32 max_num_sg)
6468c2ecf20Sopenharmony_ci{
6478c2ecf20Sopenharmony_ci	struct mlx4_ib_dev *dev = to_mdev(pd->device);
6488c2ecf20Sopenharmony_ci	struct mlx4_ib_mr *mr;
6498c2ecf20Sopenharmony_ci	int err;
6508c2ecf20Sopenharmony_ci
6518c2ecf20Sopenharmony_ci	if (mr_type != IB_MR_TYPE_MEM_REG ||
6528c2ecf20Sopenharmony_ci	    max_num_sg > MLX4_MAX_FAST_REG_PAGES)
6538c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
6548c2ecf20Sopenharmony_ci
6558c2ecf20Sopenharmony_ci	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
6568c2ecf20Sopenharmony_ci	if (!mr)
6578c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
6588c2ecf20Sopenharmony_ci
6598c2ecf20Sopenharmony_ci	err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0,
6608c2ecf20Sopenharmony_ci			    max_num_sg, 0, &mr->mmr);
6618c2ecf20Sopenharmony_ci	if (err)
6628c2ecf20Sopenharmony_ci		goto err_free;
6638c2ecf20Sopenharmony_ci
6648c2ecf20Sopenharmony_ci	err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg);
6658c2ecf20Sopenharmony_ci	if (err)
6668c2ecf20Sopenharmony_ci		goto err_free_mr;
6678c2ecf20Sopenharmony_ci
6688c2ecf20Sopenharmony_ci	mr->max_pages = max_num_sg;
6698c2ecf20Sopenharmony_ci	err = mlx4_mr_enable(dev->dev, &mr->mmr);
6708c2ecf20Sopenharmony_ci	if (err)
6718c2ecf20Sopenharmony_ci		goto err_free_pl;
6728c2ecf20Sopenharmony_ci
6738c2ecf20Sopenharmony_ci	mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key;
6748c2ecf20Sopenharmony_ci	mr->umem = NULL;
6758c2ecf20Sopenharmony_ci
6768c2ecf20Sopenharmony_ci	return &mr->ibmr;
6778c2ecf20Sopenharmony_ci
6788c2ecf20Sopenharmony_cierr_free_pl:
6798c2ecf20Sopenharmony_ci	mr->ibmr.device = pd->device;
6808c2ecf20Sopenharmony_ci	mlx4_free_priv_pages(mr);
6818c2ecf20Sopenharmony_cierr_free_mr:
6828c2ecf20Sopenharmony_ci	(void) mlx4_mr_free(dev->dev, &mr->mmr);
6838c2ecf20Sopenharmony_cierr_free:
6848c2ecf20Sopenharmony_ci	kfree(mr);
6858c2ecf20Sopenharmony_ci	return ERR_PTR(err);
6868c2ecf20Sopenharmony_ci}
6878c2ecf20Sopenharmony_ci
6888c2ecf20Sopenharmony_cistatic int mlx4_set_page(struct ib_mr *ibmr, u64 addr)
6898c2ecf20Sopenharmony_ci{
6908c2ecf20Sopenharmony_ci	struct mlx4_ib_mr *mr = to_mmr(ibmr);
6918c2ecf20Sopenharmony_ci
6928c2ecf20Sopenharmony_ci	if (unlikely(mr->npages == mr->max_pages))
6938c2ecf20Sopenharmony_ci		return -ENOMEM;
6948c2ecf20Sopenharmony_ci
6958c2ecf20Sopenharmony_ci	mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT);
6968c2ecf20Sopenharmony_ci
6978c2ecf20Sopenharmony_ci	return 0;
6988c2ecf20Sopenharmony_ci}
6998c2ecf20Sopenharmony_ci
7008c2ecf20Sopenharmony_ciint mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
7018c2ecf20Sopenharmony_ci		      unsigned int *sg_offset)
7028c2ecf20Sopenharmony_ci{
7038c2ecf20Sopenharmony_ci	struct mlx4_ib_mr *mr = to_mmr(ibmr);
7048c2ecf20Sopenharmony_ci	int rc;
7058c2ecf20Sopenharmony_ci
7068c2ecf20Sopenharmony_ci	mr->npages = 0;
7078c2ecf20Sopenharmony_ci
7088c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map,
7098c2ecf20Sopenharmony_ci				   mr->page_map_size, DMA_TO_DEVICE);
7108c2ecf20Sopenharmony_ci
7118c2ecf20Sopenharmony_ci	rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page);
7128c2ecf20Sopenharmony_ci
7138c2ecf20Sopenharmony_ci	ib_dma_sync_single_for_device(ibmr->device, mr->page_map,
7148c2ecf20Sopenharmony_ci				      mr->page_map_size, DMA_TO_DEVICE);
7158c2ecf20Sopenharmony_ci
7168c2ecf20Sopenharmony_ci	return rc;
7178c2ecf20Sopenharmony_ci}
718