162306a36Sopenharmony_ci/* 262306a36Sopenharmony_ci * Copyright (c) 2007 Cisco Systems, Inc. All rights reserved. 362306a36Sopenharmony_ci * Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved. 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * This software is available to you under a choice of one of two 662306a36Sopenharmony_ci * licenses. You may choose to be licensed under the terms of the GNU 762306a36Sopenharmony_ci * General Public License (GPL) Version 2, available from the file 862306a36Sopenharmony_ci * COPYING in the main directory of this source tree, or the 962306a36Sopenharmony_ci * OpenIB.org BSD license below: 1062306a36Sopenharmony_ci * 1162306a36Sopenharmony_ci * Redistribution and use in source and binary forms, with or 1262306a36Sopenharmony_ci * without modification, are permitted provided that the following 1362306a36Sopenharmony_ci * conditions are met: 1462306a36Sopenharmony_ci * 1562306a36Sopenharmony_ci * - Redistributions of source code must retain the above 1662306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 1762306a36Sopenharmony_ci * disclaimer. 1862306a36Sopenharmony_ci * 1962306a36Sopenharmony_ci * - Redistributions in binary form must reproduce the above 2062306a36Sopenharmony_ci * copyright notice, this list of conditions and the following 2162306a36Sopenharmony_ci * disclaimer in the documentation and/or other materials 2262306a36Sopenharmony_ci * provided with the distribution. 2362306a36Sopenharmony_ci * 2462306a36Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 2562306a36Sopenharmony_ci * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 2662306a36Sopenharmony_ci * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 2762306a36Sopenharmony_ci * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 2862306a36Sopenharmony_ci * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 2962306a36Sopenharmony_ci * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 3062306a36Sopenharmony_ci * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 3162306a36Sopenharmony_ci * SOFTWARE. 3262306a36Sopenharmony_ci */ 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci#include <linux/slab.h> 3562306a36Sopenharmony_ci#include <rdma/ib_user_verbs.h> 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_ci#include "mlx4_ib.h" 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_cistatic u32 convert_access(int acc) 4062306a36Sopenharmony_ci{ 4162306a36Sopenharmony_ci return (acc & IB_ACCESS_REMOTE_ATOMIC ? MLX4_PERM_ATOMIC : 0) | 4262306a36Sopenharmony_ci (acc & IB_ACCESS_REMOTE_WRITE ? MLX4_PERM_REMOTE_WRITE : 0) | 4362306a36Sopenharmony_ci (acc & IB_ACCESS_REMOTE_READ ? MLX4_PERM_REMOTE_READ : 0) | 4462306a36Sopenharmony_ci (acc & IB_ACCESS_LOCAL_WRITE ? MLX4_PERM_LOCAL_WRITE : 0) | 4562306a36Sopenharmony_ci (acc & IB_ACCESS_MW_BIND ? MLX4_PERM_BIND_MW : 0) | 4662306a36Sopenharmony_ci MLX4_PERM_LOCAL_READ; 4762306a36Sopenharmony_ci} 4862306a36Sopenharmony_ci 4962306a36Sopenharmony_cistatic enum mlx4_mw_type to_mlx4_type(enum ib_mw_type type) 5062306a36Sopenharmony_ci{ 5162306a36Sopenharmony_ci switch (type) { 5262306a36Sopenharmony_ci case IB_MW_TYPE_1: return MLX4_MW_TYPE_1; 5362306a36Sopenharmony_ci case IB_MW_TYPE_2: return MLX4_MW_TYPE_2; 5462306a36Sopenharmony_ci default: return -1; 5562306a36Sopenharmony_ci } 5662306a36Sopenharmony_ci} 5762306a36Sopenharmony_ci 5862306a36Sopenharmony_cistruct ib_mr *mlx4_ib_get_dma_mr(struct ib_pd *pd, int acc) 5962306a36Sopenharmony_ci{ 6062306a36Sopenharmony_ci struct mlx4_ib_mr *mr; 6162306a36Sopenharmony_ci int err; 6262306a36Sopenharmony_ci 6362306a36Sopenharmony_ci mr = kzalloc(sizeof(*mr), GFP_KERNEL); 6462306a36Sopenharmony_ci if (!mr) 6562306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 6662306a36Sopenharmony_ci 6762306a36Sopenharmony_ci err = mlx4_mr_alloc(to_mdev(pd->device)->dev, to_mpd(pd)->pdn, 0, 6862306a36Sopenharmony_ci ~0ull, convert_access(acc), 0, 0, &mr->mmr); 6962306a36Sopenharmony_ci if (err) 7062306a36Sopenharmony_ci goto err_free; 7162306a36Sopenharmony_ci 7262306a36Sopenharmony_ci err = mlx4_mr_enable(to_mdev(pd->device)->dev, &mr->mmr); 7362306a36Sopenharmony_ci if (err) 7462306a36Sopenharmony_ci goto err_mr; 7562306a36Sopenharmony_ci 7662306a36Sopenharmony_ci mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 7762306a36Sopenharmony_ci mr->umem = NULL; 7862306a36Sopenharmony_ci 7962306a36Sopenharmony_ci return &mr->ibmr; 8062306a36Sopenharmony_ci 8162306a36Sopenharmony_cierr_mr: 8262306a36Sopenharmony_ci (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_cierr_free: 8562306a36Sopenharmony_ci kfree(mr); 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci return ERR_PTR(err); 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_cienum { 9162306a36Sopenharmony_ci MLX4_MAX_MTT_SHIFT = 31 9262306a36Sopenharmony_ci}; 9362306a36Sopenharmony_ci 9462306a36Sopenharmony_cistatic int mlx4_ib_umem_write_mtt_block(struct mlx4_ib_dev *dev, 9562306a36Sopenharmony_ci struct mlx4_mtt *mtt, 9662306a36Sopenharmony_ci u64 mtt_size, u64 mtt_shift, u64 len, 9762306a36Sopenharmony_ci u64 cur_start_addr, u64 *pages, 9862306a36Sopenharmony_ci int *start_index, int *npages) 9962306a36Sopenharmony_ci{ 10062306a36Sopenharmony_ci u64 cur_end_addr = cur_start_addr + len; 10162306a36Sopenharmony_ci u64 cur_end_addr_aligned = 0; 10262306a36Sopenharmony_ci u64 mtt_entries; 10362306a36Sopenharmony_ci int err = 0; 10462306a36Sopenharmony_ci int k; 10562306a36Sopenharmony_ci 10662306a36Sopenharmony_ci len += (cur_start_addr & (mtt_size - 1ULL)); 10762306a36Sopenharmony_ci cur_end_addr_aligned = round_up(cur_end_addr, mtt_size); 10862306a36Sopenharmony_ci len += (cur_end_addr_aligned - cur_end_addr); 10962306a36Sopenharmony_ci if (len & (mtt_size - 1ULL)) { 11062306a36Sopenharmony_ci pr_warn("write_block: len %llx is not aligned to mtt_size %llx\n", 11162306a36Sopenharmony_ci len, mtt_size); 11262306a36Sopenharmony_ci return -EINVAL; 11362306a36Sopenharmony_ci } 11462306a36Sopenharmony_ci 11562306a36Sopenharmony_ci mtt_entries = (len >> mtt_shift); 11662306a36Sopenharmony_ci 11762306a36Sopenharmony_ci /* 11862306a36Sopenharmony_ci * Align the MTT start address to the mtt_size. 11962306a36Sopenharmony_ci * Required to handle cases when the MR starts in the middle of an MTT 12062306a36Sopenharmony_ci * record. Was not required in old code since the physical addresses 12162306a36Sopenharmony_ci * provided by the dma subsystem were page aligned, which was also the 12262306a36Sopenharmony_ci * MTT size. 12362306a36Sopenharmony_ci */ 12462306a36Sopenharmony_ci cur_start_addr = round_down(cur_start_addr, mtt_size); 12562306a36Sopenharmony_ci /* A new block is started ... */ 12662306a36Sopenharmony_ci for (k = 0; k < mtt_entries; ++k) { 12762306a36Sopenharmony_ci pages[*npages] = cur_start_addr + (mtt_size * k); 12862306a36Sopenharmony_ci (*npages)++; 12962306a36Sopenharmony_ci /* 13062306a36Sopenharmony_ci * Be friendly to mlx4_write_mtt() and pass it chunks of 13162306a36Sopenharmony_ci * appropriate size. 13262306a36Sopenharmony_ci */ 13362306a36Sopenharmony_ci if (*npages == PAGE_SIZE / sizeof(u64)) { 13462306a36Sopenharmony_ci err = mlx4_write_mtt(dev->dev, mtt, *start_index, 13562306a36Sopenharmony_ci *npages, pages); 13662306a36Sopenharmony_ci if (err) 13762306a36Sopenharmony_ci return err; 13862306a36Sopenharmony_ci 13962306a36Sopenharmony_ci (*start_index) += *npages; 14062306a36Sopenharmony_ci *npages = 0; 14162306a36Sopenharmony_ci } 14262306a36Sopenharmony_ci } 14362306a36Sopenharmony_ci 14462306a36Sopenharmony_ci return 0; 14562306a36Sopenharmony_ci} 14662306a36Sopenharmony_ci 14762306a36Sopenharmony_cistatic inline u64 alignment_of(u64 ptr) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci return ilog2(ptr & (~(ptr - 1))); 15062306a36Sopenharmony_ci} 15162306a36Sopenharmony_ci 15262306a36Sopenharmony_cistatic int mlx4_ib_umem_calc_block_mtt(u64 next_block_start, 15362306a36Sopenharmony_ci u64 current_block_end, 15462306a36Sopenharmony_ci u64 block_shift) 15562306a36Sopenharmony_ci{ 15662306a36Sopenharmony_ci /* Check whether the alignment of the new block is aligned as well as 15762306a36Sopenharmony_ci * the previous block. 15862306a36Sopenharmony_ci * Block address must start with zeros till size of entity_size. 15962306a36Sopenharmony_ci */ 16062306a36Sopenharmony_ci if ((next_block_start & ((1ULL << block_shift) - 1ULL)) != 0) 16162306a36Sopenharmony_ci /* 16262306a36Sopenharmony_ci * It is not as well aligned as the previous block-reduce the 16362306a36Sopenharmony_ci * mtt size accordingly. Here we take the last right bit which 16462306a36Sopenharmony_ci * is 1. 16562306a36Sopenharmony_ci */ 16662306a36Sopenharmony_ci block_shift = alignment_of(next_block_start); 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci /* 16962306a36Sopenharmony_ci * Check whether the alignment of the end of previous block - is it 17062306a36Sopenharmony_ci * aligned as well as the start of the block 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_ci if (((current_block_end) & ((1ULL << block_shift) - 1ULL)) != 0) 17362306a36Sopenharmony_ci /* 17462306a36Sopenharmony_ci * It is not as well aligned as the start of the block - 17562306a36Sopenharmony_ci * reduce the mtt size accordingly. 17662306a36Sopenharmony_ci */ 17762306a36Sopenharmony_ci block_shift = alignment_of(current_block_end); 17862306a36Sopenharmony_ci 17962306a36Sopenharmony_ci return block_shift; 18062306a36Sopenharmony_ci} 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ciint mlx4_ib_umem_write_mtt(struct mlx4_ib_dev *dev, struct mlx4_mtt *mtt, 18362306a36Sopenharmony_ci struct ib_umem *umem) 18462306a36Sopenharmony_ci{ 18562306a36Sopenharmony_ci u64 *pages; 18662306a36Sopenharmony_ci u64 len = 0; 18762306a36Sopenharmony_ci int err = 0; 18862306a36Sopenharmony_ci u64 mtt_size; 18962306a36Sopenharmony_ci u64 cur_start_addr = 0; 19062306a36Sopenharmony_ci u64 mtt_shift; 19162306a36Sopenharmony_ci int start_index = 0; 19262306a36Sopenharmony_ci int npages = 0; 19362306a36Sopenharmony_ci struct scatterlist *sg; 19462306a36Sopenharmony_ci int i; 19562306a36Sopenharmony_ci 19662306a36Sopenharmony_ci pages = (u64 *) __get_free_page(GFP_KERNEL); 19762306a36Sopenharmony_ci if (!pages) 19862306a36Sopenharmony_ci return -ENOMEM; 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci mtt_shift = mtt->page_shift; 20162306a36Sopenharmony_ci mtt_size = 1ULL << mtt_shift; 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_ci for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { 20462306a36Sopenharmony_ci if (cur_start_addr + len == sg_dma_address(sg)) { 20562306a36Sopenharmony_ci /* still the same block */ 20662306a36Sopenharmony_ci len += sg_dma_len(sg); 20762306a36Sopenharmony_ci continue; 20862306a36Sopenharmony_ci } 20962306a36Sopenharmony_ci /* 21062306a36Sopenharmony_ci * A new block is started ... 21162306a36Sopenharmony_ci * If len is malaligned, write an extra mtt entry to cover the 21262306a36Sopenharmony_ci * misaligned area (round up the division) 21362306a36Sopenharmony_ci */ 21462306a36Sopenharmony_ci err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, 21562306a36Sopenharmony_ci mtt_shift, len, 21662306a36Sopenharmony_ci cur_start_addr, 21762306a36Sopenharmony_ci pages, &start_index, 21862306a36Sopenharmony_ci &npages); 21962306a36Sopenharmony_ci if (err) 22062306a36Sopenharmony_ci goto out; 22162306a36Sopenharmony_ci 22262306a36Sopenharmony_ci cur_start_addr = sg_dma_address(sg); 22362306a36Sopenharmony_ci len = sg_dma_len(sg); 22462306a36Sopenharmony_ci } 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_ci /* Handle the last block */ 22762306a36Sopenharmony_ci if (len > 0) { 22862306a36Sopenharmony_ci /* 22962306a36Sopenharmony_ci * If len is malaligned, write an extra mtt entry to cover 23062306a36Sopenharmony_ci * the misaligned area (round up the division) 23162306a36Sopenharmony_ci */ 23262306a36Sopenharmony_ci err = mlx4_ib_umem_write_mtt_block(dev, mtt, mtt_size, 23362306a36Sopenharmony_ci mtt_shift, len, 23462306a36Sopenharmony_ci cur_start_addr, pages, 23562306a36Sopenharmony_ci &start_index, &npages); 23662306a36Sopenharmony_ci if (err) 23762306a36Sopenharmony_ci goto out; 23862306a36Sopenharmony_ci } 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci if (npages) 24162306a36Sopenharmony_ci err = mlx4_write_mtt(dev->dev, mtt, start_index, npages, pages); 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ciout: 24462306a36Sopenharmony_ci free_page((unsigned long) pages); 24562306a36Sopenharmony_ci return err; 24662306a36Sopenharmony_ci} 24762306a36Sopenharmony_ci 24862306a36Sopenharmony_ci/* 24962306a36Sopenharmony_ci * Calculate optimal mtt size based on contiguous pages. 25062306a36Sopenharmony_ci * Function will return also the number of pages that are not aligned to the 25162306a36Sopenharmony_ci * calculated mtt_size to be added to total number of pages. For that we should 25262306a36Sopenharmony_ci * check the first chunk length & last chunk length and if not aligned to 25362306a36Sopenharmony_ci * mtt_size we should increment the non_aligned_pages number. All chunks in the 25462306a36Sopenharmony_ci * middle already handled as part of mtt shift calculation for both their start 25562306a36Sopenharmony_ci * & end addresses. 25662306a36Sopenharmony_ci */ 25762306a36Sopenharmony_ciint mlx4_ib_umem_calc_optimal_mtt_size(struct ib_umem *umem, u64 start_va, 25862306a36Sopenharmony_ci int *num_of_mtts) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci u64 block_shift = MLX4_MAX_MTT_SHIFT; 26162306a36Sopenharmony_ci u64 min_shift = PAGE_SHIFT; 26262306a36Sopenharmony_ci u64 last_block_aligned_end = 0; 26362306a36Sopenharmony_ci u64 current_block_start = 0; 26462306a36Sopenharmony_ci u64 first_block_start = 0; 26562306a36Sopenharmony_ci u64 current_block_len = 0; 26662306a36Sopenharmony_ci u64 last_block_end = 0; 26762306a36Sopenharmony_ci struct scatterlist *sg; 26862306a36Sopenharmony_ci u64 current_block_end; 26962306a36Sopenharmony_ci u64 misalignment_bits; 27062306a36Sopenharmony_ci u64 next_block_start; 27162306a36Sopenharmony_ci u64 total_len = 0; 27262306a36Sopenharmony_ci int i; 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_ci *num_of_mtts = ib_umem_num_dma_blocks(umem, PAGE_SIZE); 27562306a36Sopenharmony_ci 27662306a36Sopenharmony_ci for_each_sgtable_dma_sg(&umem->sgt_append.sgt, sg, i) { 27762306a36Sopenharmony_ci /* 27862306a36Sopenharmony_ci * Initialization - save the first chunk start as the 27962306a36Sopenharmony_ci * current_block_start - block means contiguous pages. 28062306a36Sopenharmony_ci */ 28162306a36Sopenharmony_ci if (current_block_len == 0 && current_block_start == 0) { 28262306a36Sopenharmony_ci current_block_start = sg_dma_address(sg); 28362306a36Sopenharmony_ci first_block_start = current_block_start; 28462306a36Sopenharmony_ci /* 28562306a36Sopenharmony_ci * Find the bits that are different between the physical 28662306a36Sopenharmony_ci * address and the virtual address for the start of the 28762306a36Sopenharmony_ci * MR. 28862306a36Sopenharmony_ci * umem_get aligned the start_va to a page boundary. 28962306a36Sopenharmony_ci * Therefore, we need to align the start va to the same 29062306a36Sopenharmony_ci * boundary. 29162306a36Sopenharmony_ci * misalignment_bits is needed to handle the case of a 29262306a36Sopenharmony_ci * single memory region. In this case, the rest of the 29362306a36Sopenharmony_ci * logic will not reduce the block size. If we use a 29462306a36Sopenharmony_ci * block size which is bigger than the alignment of the 29562306a36Sopenharmony_ci * misalignment bits, we might use the virtual page 29662306a36Sopenharmony_ci * number instead of the physical page number, resulting 29762306a36Sopenharmony_ci * in access to the wrong data. 29862306a36Sopenharmony_ci */ 29962306a36Sopenharmony_ci misalignment_bits = 30062306a36Sopenharmony_ci (start_va & (~(((u64)(PAGE_SIZE)) - 1ULL))) ^ 30162306a36Sopenharmony_ci current_block_start; 30262306a36Sopenharmony_ci block_shift = min(alignment_of(misalignment_bits), 30362306a36Sopenharmony_ci block_shift); 30462306a36Sopenharmony_ci } 30562306a36Sopenharmony_ci 30662306a36Sopenharmony_ci /* 30762306a36Sopenharmony_ci * Go over the scatter entries and check if they continue the 30862306a36Sopenharmony_ci * previous scatter entry. 30962306a36Sopenharmony_ci */ 31062306a36Sopenharmony_ci next_block_start = sg_dma_address(sg); 31162306a36Sopenharmony_ci current_block_end = current_block_start + current_block_len; 31262306a36Sopenharmony_ci /* If we have a split (non-contig.) between two blocks */ 31362306a36Sopenharmony_ci if (current_block_end != next_block_start) { 31462306a36Sopenharmony_ci block_shift = mlx4_ib_umem_calc_block_mtt 31562306a36Sopenharmony_ci (next_block_start, 31662306a36Sopenharmony_ci current_block_end, 31762306a36Sopenharmony_ci block_shift); 31862306a36Sopenharmony_ci 31962306a36Sopenharmony_ci /* 32062306a36Sopenharmony_ci * If we reached the minimum shift for 4k page we stop 32162306a36Sopenharmony_ci * the loop. 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_ci if (block_shift <= min_shift) 32462306a36Sopenharmony_ci goto end; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci /* 32762306a36Sopenharmony_ci * If not saved yet we are in first block - we save the 32862306a36Sopenharmony_ci * length of first block to calculate the 32962306a36Sopenharmony_ci * non_aligned_pages number at the end. 33062306a36Sopenharmony_ci */ 33162306a36Sopenharmony_ci total_len += current_block_len; 33262306a36Sopenharmony_ci 33362306a36Sopenharmony_ci /* Start a new block */ 33462306a36Sopenharmony_ci current_block_start = next_block_start; 33562306a36Sopenharmony_ci current_block_len = sg_dma_len(sg); 33662306a36Sopenharmony_ci continue; 33762306a36Sopenharmony_ci } 33862306a36Sopenharmony_ci /* The scatter entry is another part of the current block, 33962306a36Sopenharmony_ci * increase the block size. 34062306a36Sopenharmony_ci * An entry in the scatter can be larger than 4k (page) as of 34162306a36Sopenharmony_ci * dma mapping which merge some blocks together. 34262306a36Sopenharmony_ci */ 34362306a36Sopenharmony_ci current_block_len += sg_dma_len(sg); 34462306a36Sopenharmony_ci } 34562306a36Sopenharmony_ci 34662306a36Sopenharmony_ci /* Account for the last block in the total len */ 34762306a36Sopenharmony_ci total_len += current_block_len; 34862306a36Sopenharmony_ci /* Add to the first block the misalignment that it suffers from. */ 34962306a36Sopenharmony_ci total_len += (first_block_start & ((1ULL << block_shift) - 1ULL)); 35062306a36Sopenharmony_ci last_block_end = current_block_start + current_block_len; 35162306a36Sopenharmony_ci last_block_aligned_end = round_up(last_block_end, 1ULL << block_shift); 35262306a36Sopenharmony_ci total_len += (last_block_aligned_end - last_block_end); 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci if (total_len & ((1ULL << block_shift) - 1ULL)) 35562306a36Sopenharmony_ci pr_warn("misaligned total length detected (%llu, %llu)!", 35662306a36Sopenharmony_ci total_len, block_shift); 35762306a36Sopenharmony_ci 35862306a36Sopenharmony_ci *num_of_mtts = total_len >> block_shift; 35962306a36Sopenharmony_ciend: 36062306a36Sopenharmony_ci if (block_shift < min_shift) { 36162306a36Sopenharmony_ci /* 36262306a36Sopenharmony_ci * If shift is less than the min we set a warning and return the 36362306a36Sopenharmony_ci * min shift. 36462306a36Sopenharmony_ci */ 36562306a36Sopenharmony_ci pr_warn("umem_calc_optimal_mtt_size - unexpected shift %lld\n", block_shift); 36662306a36Sopenharmony_ci 36762306a36Sopenharmony_ci block_shift = min_shift; 36862306a36Sopenharmony_ci } 36962306a36Sopenharmony_ci return block_shift; 37062306a36Sopenharmony_ci} 37162306a36Sopenharmony_ci 37262306a36Sopenharmony_cistatic struct ib_umem *mlx4_get_umem_mr(struct ib_device *device, u64 start, 37362306a36Sopenharmony_ci u64 length, int access_flags) 37462306a36Sopenharmony_ci{ 37562306a36Sopenharmony_ci /* 37662306a36Sopenharmony_ci * Force registering the memory as writable if the underlying pages 37762306a36Sopenharmony_ci * are writable. This is so rereg can change the access permissions 37862306a36Sopenharmony_ci * from readable to writable without having to run through ib_umem_get 37962306a36Sopenharmony_ci * again 38062306a36Sopenharmony_ci */ 38162306a36Sopenharmony_ci if (!ib_access_writable(access_flags)) { 38262306a36Sopenharmony_ci unsigned long untagged_start = untagged_addr(start); 38362306a36Sopenharmony_ci struct vm_area_struct *vma; 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci mmap_read_lock(current->mm); 38662306a36Sopenharmony_ci /* 38762306a36Sopenharmony_ci * FIXME: Ideally this would iterate over all the vmas that 38862306a36Sopenharmony_ci * cover the memory, but for now it requires a single vma to 38962306a36Sopenharmony_ci * entirely cover the MR to support RO mappings. 39062306a36Sopenharmony_ci */ 39162306a36Sopenharmony_ci vma = find_vma(current->mm, untagged_start); 39262306a36Sopenharmony_ci if (vma && vma->vm_end >= untagged_start + length && 39362306a36Sopenharmony_ci vma->vm_start <= untagged_start) { 39462306a36Sopenharmony_ci if (vma->vm_flags & VM_WRITE) 39562306a36Sopenharmony_ci access_flags |= IB_ACCESS_LOCAL_WRITE; 39662306a36Sopenharmony_ci } else { 39762306a36Sopenharmony_ci access_flags |= IB_ACCESS_LOCAL_WRITE; 39862306a36Sopenharmony_ci } 39962306a36Sopenharmony_ci 40062306a36Sopenharmony_ci mmap_read_unlock(current->mm); 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ci 40362306a36Sopenharmony_ci return ib_umem_get(device, start, length, access_flags); 40462306a36Sopenharmony_ci} 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_cistruct ib_mr *mlx4_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 40762306a36Sopenharmony_ci u64 virt_addr, int access_flags, 40862306a36Sopenharmony_ci struct ib_udata *udata) 40962306a36Sopenharmony_ci{ 41062306a36Sopenharmony_ci struct mlx4_ib_dev *dev = to_mdev(pd->device); 41162306a36Sopenharmony_ci struct mlx4_ib_mr *mr; 41262306a36Sopenharmony_ci int shift; 41362306a36Sopenharmony_ci int err; 41462306a36Sopenharmony_ci int n; 41562306a36Sopenharmony_ci 41662306a36Sopenharmony_ci mr = kzalloc(sizeof(*mr), GFP_KERNEL); 41762306a36Sopenharmony_ci if (!mr) 41862306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 41962306a36Sopenharmony_ci 42062306a36Sopenharmony_ci mr->umem = mlx4_get_umem_mr(pd->device, start, length, access_flags); 42162306a36Sopenharmony_ci if (IS_ERR(mr->umem)) { 42262306a36Sopenharmony_ci err = PTR_ERR(mr->umem); 42362306a36Sopenharmony_ci goto err_free; 42462306a36Sopenharmony_ci } 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci shift = mlx4_ib_umem_calc_optimal_mtt_size(mr->umem, start, &n); 42762306a36Sopenharmony_ci 42862306a36Sopenharmony_ci err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, virt_addr, length, 42962306a36Sopenharmony_ci convert_access(access_flags), n, shift, &mr->mmr); 43062306a36Sopenharmony_ci if (err) 43162306a36Sopenharmony_ci goto err_umem; 43262306a36Sopenharmony_ci 43362306a36Sopenharmony_ci err = mlx4_ib_umem_write_mtt(dev, &mr->mmr.mtt, mr->umem); 43462306a36Sopenharmony_ci if (err) 43562306a36Sopenharmony_ci goto err_mr; 43662306a36Sopenharmony_ci 43762306a36Sopenharmony_ci err = mlx4_mr_enable(dev->dev, &mr->mmr); 43862306a36Sopenharmony_ci if (err) 43962306a36Sopenharmony_ci goto err_mr; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 44262306a36Sopenharmony_ci mr->ibmr.page_size = 1U << shift; 44362306a36Sopenharmony_ci 44462306a36Sopenharmony_ci return &mr->ibmr; 44562306a36Sopenharmony_ci 44662306a36Sopenharmony_cierr_mr: 44762306a36Sopenharmony_ci (void) mlx4_mr_free(to_mdev(pd->device)->dev, &mr->mmr); 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_cierr_umem: 45062306a36Sopenharmony_ci ib_umem_release(mr->umem); 45162306a36Sopenharmony_ci 45262306a36Sopenharmony_cierr_free: 45362306a36Sopenharmony_ci kfree(mr); 45462306a36Sopenharmony_ci 45562306a36Sopenharmony_ci return ERR_PTR(err); 45662306a36Sopenharmony_ci} 45762306a36Sopenharmony_ci 45862306a36Sopenharmony_cistruct ib_mr *mlx4_ib_rereg_user_mr(struct ib_mr *mr, int flags, u64 start, 45962306a36Sopenharmony_ci u64 length, u64 virt_addr, 46062306a36Sopenharmony_ci int mr_access_flags, struct ib_pd *pd, 46162306a36Sopenharmony_ci struct ib_udata *udata) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci struct mlx4_ib_dev *dev = to_mdev(mr->device); 46462306a36Sopenharmony_ci struct mlx4_ib_mr *mmr = to_mmr(mr); 46562306a36Sopenharmony_ci struct mlx4_mpt_entry *mpt_entry; 46662306a36Sopenharmony_ci struct mlx4_mpt_entry **pmpt_entry = &mpt_entry; 46762306a36Sopenharmony_ci int err; 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci /* Since we synchronize this call and mlx4_ib_dereg_mr via uverbs, 47062306a36Sopenharmony_ci * we assume that the calls can't run concurrently. Otherwise, a 47162306a36Sopenharmony_ci * race exists. 47262306a36Sopenharmony_ci */ 47362306a36Sopenharmony_ci err = mlx4_mr_hw_get_mpt(dev->dev, &mmr->mmr, &pmpt_entry); 47462306a36Sopenharmony_ci if (err) 47562306a36Sopenharmony_ci return ERR_PTR(err); 47662306a36Sopenharmony_ci 47762306a36Sopenharmony_ci if (flags & IB_MR_REREG_PD) { 47862306a36Sopenharmony_ci err = mlx4_mr_hw_change_pd(dev->dev, *pmpt_entry, 47962306a36Sopenharmony_ci to_mpd(pd)->pdn); 48062306a36Sopenharmony_ci 48162306a36Sopenharmony_ci if (err) 48262306a36Sopenharmony_ci goto release_mpt_entry; 48362306a36Sopenharmony_ci } 48462306a36Sopenharmony_ci 48562306a36Sopenharmony_ci if (flags & IB_MR_REREG_ACCESS) { 48662306a36Sopenharmony_ci if (ib_access_writable(mr_access_flags) && 48762306a36Sopenharmony_ci !mmr->umem->writable) { 48862306a36Sopenharmony_ci err = -EPERM; 48962306a36Sopenharmony_ci goto release_mpt_entry; 49062306a36Sopenharmony_ci } 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci err = mlx4_mr_hw_change_access(dev->dev, *pmpt_entry, 49362306a36Sopenharmony_ci convert_access(mr_access_flags)); 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci if (err) 49662306a36Sopenharmony_ci goto release_mpt_entry; 49762306a36Sopenharmony_ci } 49862306a36Sopenharmony_ci 49962306a36Sopenharmony_ci if (flags & IB_MR_REREG_TRANS) { 50062306a36Sopenharmony_ci int shift; 50162306a36Sopenharmony_ci int n; 50262306a36Sopenharmony_ci 50362306a36Sopenharmony_ci mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); 50462306a36Sopenharmony_ci ib_umem_release(mmr->umem); 50562306a36Sopenharmony_ci mmr->umem = mlx4_get_umem_mr(mr->device, start, length, 50662306a36Sopenharmony_ci mr_access_flags); 50762306a36Sopenharmony_ci if (IS_ERR(mmr->umem)) { 50862306a36Sopenharmony_ci err = PTR_ERR(mmr->umem); 50962306a36Sopenharmony_ci /* Prevent mlx4_ib_dereg_mr from free'ing invalid pointer */ 51062306a36Sopenharmony_ci mmr->umem = NULL; 51162306a36Sopenharmony_ci goto release_mpt_entry; 51262306a36Sopenharmony_ci } 51362306a36Sopenharmony_ci n = ib_umem_num_dma_blocks(mmr->umem, PAGE_SIZE); 51462306a36Sopenharmony_ci shift = PAGE_SHIFT; 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci err = mlx4_mr_rereg_mem_write(dev->dev, &mmr->mmr, 51762306a36Sopenharmony_ci virt_addr, length, n, shift, 51862306a36Sopenharmony_ci *pmpt_entry); 51962306a36Sopenharmony_ci if (err) { 52062306a36Sopenharmony_ci ib_umem_release(mmr->umem); 52162306a36Sopenharmony_ci goto release_mpt_entry; 52262306a36Sopenharmony_ci } 52362306a36Sopenharmony_ci mmr->mmr.iova = virt_addr; 52462306a36Sopenharmony_ci mmr->mmr.size = length; 52562306a36Sopenharmony_ci 52662306a36Sopenharmony_ci err = mlx4_ib_umem_write_mtt(dev, &mmr->mmr.mtt, mmr->umem); 52762306a36Sopenharmony_ci if (err) { 52862306a36Sopenharmony_ci mlx4_mr_rereg_mem_cleanup(dev->dev, &mmr->mmr); 52962306a36Sopenharmony_ci ib_umem_release(mmr->umem); 53062306a36Sopenharmony_ci goto release_mpt_entry; 53162306a36Sopenharmony_ci } 53262306a36Sopenharmony_ci } 53362306a36Sopenharmony_ci 53462306a36Sopenharmony_ci /* If we couldn't transfer the MR to the HCA, just remember to 53562306a36Sopenharmony_ci * return a failure. But dereg_mr will free the resources. 53662306a36Sopenharmony_ci */ 53762306a36Sopenharmony_ci err = mlx4_mr_hw_write_mpt(dev->dev, &mmr->mmr, pmpt_entry); 53862306a36Sopenharmony_ci if (!err && flags & IB_MR_REREG_ACCESS) 53962306a36Sopenharmony_ci mmr->mmr.access = mr_access_flags; 54062306a36Sopenharmony_ci 54162306a36Sopenharmony_cirelease_mpt_entry: 54262306a36Sopenharmony_ci mlx4_mr_hw_put_mpt(dev->dev, pmpt_entry); 54362306a36Sopenharmony_ci if (err) 54462306a36Sopenharmony_ci return ERR_PTR(err); 54562306a36Sopenharmony_ci return NULL; 54662306a36Sopenharmony_ci} 54762306a36Sopenharmony_ci 54862306a36Sopenharmony_cistatic int 54962306a36Sopenharmony_cimlx4_alloc_priv_pages(struct ib_device *device, 55062306a36Sopenharmony_ci struct mlx4_ib_mr *mr, 55162306a36Sopenharmony_ci int max_pages) 55262306a36Sopenharmony_ci{ 55362306a36Sopenharmony_ci int ret; 55462306a36Sopenharmony_ci 55562306a36Sopenharmony_ci /* Ensure that size is aligned to DMA cacheline 55662306a36Sopenharmony_ci * requirements. 55762306a36Sopenharmony_ci * max_pages is limited to MLX4_MAX_FAST_REG_PAGES 55862306a36Sopenharmony_ci * so page_map_size will never cross PAGE_SIZE. 55962306a36Sopenharmony_ci */ 56062306a36Sopenharmony_ci mr->page_map_size = roundup(max_pages * sizeof(u64), 56162306a36Sopenharmony_ci MLX4_MR_PAGES_ALIGN); 56262306a36Sopenharmony_ci 56362306a36Sopenharmony_ci /* Prevent cross page boundary allocation. */ 56462306a36Sopenharmony_ci mr->pages = (__be64 *)get_zeroed_page(GFP_KERNEL); 56562306a36Sopenharmony_ci if (!mr->pages) 56662306a36Sopenharmony_ci return -ENOMEM; 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci mr->page_map = dma_map_single(device->dev.parent, mr->pages, 56962306a36Sopenharmony_ci mr->page_map_size, DMA_TO_DEVICE); 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci if (dma_mapping_error(device->dev.parent, mr->page_map)) { 57262306a36Sopenharmony_ci ret = -ENOMEM; 57362306a36Sopenharmony_ci goto err; 57462306a36Sopenharmony_ci } 57562306a36Sopenharmony_ci 57662306a36Sopenharmony_ci return 0; 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_cierr: 57962306a36Sopenharmony_ci free_page((unsigned long)mr->pages); 58062306a36Sopenharmony_ci return ret; 58162306a36Sopenharmony_ci} 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_cistatic void 58462306a36Sopenharmony_cimlx4_free_priv_pages(struct mlx4_ib_mr *mr) 58562306a36Sopenharmony_ci{ 58662306a36Sopenharmony_ci if (mr->pages) { 58762306a36Sopenharmony_ci struct ib_device *device = mr->ibmr.device; 58862306a36Sopenharmony_ci 58962306a36Sopenharmony_ci dma_unmap_single(device->dev.parent, mr->page_map, 59062306a36Sopenharmony_ci mr->page_map_size, DMA_TO_DEVICE); 59162306a36Sopenharmony_ci free_page((unsigned long)mr->pages); 59262306a36Sopenharmony_ci mr->pages = NULL; 59362306a36Sopenharmony_ci } 59462306a36Sopenharmony_ci} 59562306a36Sopenharmony_ci 59662306a36Sopenharmony_ciint mlx4_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 59762306a36Sopenharmony_ci{ 59862306a36Sopenharmony_ci struct mlx4_ib_mr *mr = to_mmr(ibmr); 59962306a36Sopenharmony_ci int ret; 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci mlx4_free_priv_pages(mr); 60262306a36Sopenharmony_ci 60362306a36Sopenharmony_ci ret = mlx4_mr_free(to_mdev(ibmr->device)->dev, &mr->mmr); 60462306a36Sopenharmony_ci if (ret) 60562306a36Sopenharmony_ci return ret; 60662306a36Sopenharmony_ci if (mr->umem) 60762306a36Sopenharmony_ci ib_umem_release(mr->umem); 60862306a36Sopenharmony_ci kfree(mr); 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci return 0; 61162306a36Sopenharmony_ci} 61262306a36Sopenharmony_ci 61362306a36Sopenharmony_ciint mlx4_ib_alloc_mw(struct ib_mw *ibmw, struct ib_udata *udata) 61462306a36Sopenharmony_ci{ 61562306a36Sopenharmony_ci struct mlx4_ib_dev *dev = to_mdev(ibmw->device); 61662306a36Sopenharmony_ci struct mlx4_ib_mw *mw = to_mmw(ibmw); 61762306a36Sopenharmony_ci int err; 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci err = mlx4_mw_alloc(dev->dev, to_mpd(ibmw->pd)->pdn, 62062306a36Sopenharmony_ci to_mlx4_type(ibmw->type), &mw->mmw); 62162306a36Sopenharmony_ci if (err) 62262306a36Sopenharmony_ci return err; 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_ci err = mlx4_mw_enable(dev->dev, &mw->mmw); 62562306a36Sopenharmony_ci if (err) 62662306a36Sopenharmony_ci goto err_mw; 62762306a36Sopenharmony_ci 62862306a36Sopenharmony_ci ibmw->rkey = mw->mmw.key; 62962306a36Sopenharmony_ci return 0; 63062306a36Sopenharmony_ci 63162306a36Sopenharmony_cierr_mw: 63262306a36Sopenharmony_ci mlx4_mw_free(dev->dev, &mw->mmw); 63362306a36Sopenharmony_ci return err; 63462306a36Sopenharmony_ci} 63562306a36Sopenharmony_ci 63662306a36Sopenharmony_ciint mlx4_ib_dealloc_mw(struct ib_mw *ibmw) 63762306a36Sopenharmony_ci{ 63862306a36Sopenharmony_ci struct mlx4_ib_mw *mw = to_mmw(ibmw); 63962306a36Sopenharmony_ci 64062306a36Sopenharmony_ci mlx4_mw_free(to_mdev(ibmw->device)->dev, &mw->mmw); 64162306a36Sopenharmony_ci return 0; 64262306a36Sopenharmony_ci} 64362306a36Sopenharmony_ci 64462306a36Sopenharmony_cistruct ib_mr *mlx4_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 64562306a36Sopenharmony_ci u32 max_num_sg) 64662306a36Sopenharmony_ci{ 64762306a36Sopenharmony_ci struct mlx4_ib_dev *dev = to_mdev(pd->device); 64862306a36Sopenharmony_ci struct mlx4_ib_mr *mr; 64962306a36Sopenharmony_ci int err; 65062306a36Sopenharmony_ci 65162306a36Sopenharmony_ci if (mr_type != IB_MR_TYPE_MEM_REG || 65262306a36Sopenharmony_ci max_num_sg > MLX4_MAX_FAST_REG_PAGES) 65362306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 65462306a36Sopenharmony_ci 65562306a36Sopenharmony_ci mr = kzalloc(sizeof(*mr), GFP_KERNEL); 65662306a36Sopenharmony_ci if (!mr) 65762306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 65862306a36Sopenharmony_ci 65962306a36Sopenharmony_ci err = mlx4_mr_alloc(dev->dev, to_mpd(pd)->pdn, 0, 0, 0, 66062306a36Sopenharmony_ci max_num_sg, 0, &mr->mmr); 66162306a36Sopenharmony_ci if (err) 66262306a36Sopenharmony_ci goto err_free; 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_ci err = mlx4_alloc_priv_pages(pd->device, mr, max_num_sg); 66562306a36Sopenharmony_ci if (err) 66662306a36Sopenharmony_ci goto err_free_mr; 66762306a36Sopenharmony_ci 66862306a36Sopenharmony_ci mr->max_pages = max_num_sg; 66962306a36Sopenharmony_ci err = mlx4_mr_enable(dev->dev, &mr->mmr); 67062306a36Sopenharmony_ci if (err) 67162306a36Sopenharmony_ci goto err_free_pl; 67262306a36Sopenharmony_ci 67362306a36Sopenharmony_ci mr->ibmr.rkey = mr->ibmr.lkey = mr->mmr.key; 67462306a36Sopenharmony_ci mr->umem = NULL; 67562306a36Sopenharmony_ci 67662306a36Sopenharmony_ci return &mr->ibmr; 67762306a36Sopenharmony_ci 67862306a36Sopenharmony_cierr_free_pl: 67962306a36Sopenharmony_ci mr->ibmr.device = pd->device; 68062306a36Sopenharmony_ci mlx4_free_priv_pages(mr); 68162306a36Sopenharmony_cierr_free_mr: 68262306a36Sopenharmony_ci (void) mlx4_mr_free(dev->dev, &mr->mmr); 68362306a36Sopenharmony_cierr_free: 68462306a36Sopenharmony_ci kfree(mr); 68562306a36Sopenharmony_ci return ERR_PTR(err); 68662306a36Sopenharmony_ci} 68762306a36Sopenharmony_ci 68862306a36Sopenharmony_cistatic int mlx4_set_page(struct ib_mr *ibmr, u64 addr) 68962306a36Sopenharmony_ci{ 69062306a36Sopenharmony_ci struct mlx4_ib_mr *mr = to_mmr(ibmr); 69162306a36Sopenharmony_ci 69262306a36Sopenharmony_ci if (unlikely(mr->npages == mr->max_pages)) 69362306a36Sopenharmony_ci return -ENOMEM; 69462306a36Sopenharmony_ci 69562306a36Sopenharmony_ci mr->pages[mr->npages++] = cpu_to_be64(addr | MLX4_MTT_FLAG_PRESENT); 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ci return 0; 69862306a36Sopenharmony_ci} 69962306a36Sopenharmony_ci 70062306a36Sopenharmony_ciint mlx4_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 70162306a36Sopenharmony_ci unsigned int *sg_offset) 70262306a36Sopenharmony_ci{ 70362306a36Sopenharmony_ci struct mlx4_ib_mr *mr = to_mmr(ibmr); 70462306a36Sopenharmony_ci int rc; 70562306a36Sopenharmony_ci 70662306a36Sopenharmony_ci mr->npages = 0; 70762306a36Sopenharmony_ci 70862306a36Sopenharmony_ci ib_dma_sync_single_for_cpu(ibmr->device, mr->page_map, 70962306a36Sopenharmony_ci mr->page_map_size, DMA_TO_DEVICE); 71062306a36Sopenharmony_ci 71162306a36Sopenharmony_ci rc = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, mlx4_set_page); 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ci ib_dma_sync_single_for_device(ibmr->device, mr->page_map, 71462306a36Sopenharmony_ci mr->page_map_size, DMA_TO_DEVICE); 71562306a36Sopenharmony_ci 71662306a36Sopenharmony_ci return rc; 71762306a36Sopenharmony_ci} 718