18c2ecf20Sopenharmony_ci/* 28c2ecf20Sopenharmony_ci * Copyright(c) 2020 Cornelis Networks, Inc. 38c2ecf20Sopenharmony_ci * Copyright(c) 2015-2018 Intel Corporation. 48c2ecf20Sopenharmony_ci * 58c2ecf20Sopenharmony_ci * This file is provided under a dual BSD/GPLv2 license. When using or 68c2ecf20Sopenharmony_ci * redistributing this file, you may do so under either license. 78c2ecf20Sopenharmony_ci * 88c2ecf20Sopenharmony_ci * GPL LICENSE SUMMARY 98c2ecf20Sopenharmony_ci * 108c2ecf20Sopenharmony_ci * This program is free software; you can redistribute it and/or modify 118c2ecf20Sopenharmony_ci * it under the terms of version 2 of the GNU General Public License as 128c2ecf20Sopenharmony_ci * published by the Free Software Foundation. 138c2ecf20Sopenharmony_ci * 148c2ecf20Sopenharmony_ci * This program is distributed in the hope that it will be useful, but 158c2ecf20Sopenharmony_ci * WITHOUT ANY WARRANTY; without even the implied warranty of 168c2ecf20Sopenharmony_ci * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 178c2ecf20Sopenharmony_ci * General Public License for more details. 188c2ecf20Sopenharmony_ci * 198c2ecf20Sopenharmony_ci * BSD LICENSE 208c2ecf20Sopenharmony_ci * 218c2ecf20Sopenharmony_ci * Redistribution and use in source and binary forms, with or without 228c2ecf20Sopenharmony_ci * modification, are permitted provided that the following conditions 238c2ecf20Sopenharmony_ci * are met: 248c2ecf20Sopenharmony_ci * 258c2ecf20Sopenharmony_ci * - Redistributions of source code must retain the above copyright 268c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer. 278c2ecf20Sopenharmony_ci * - Redistributions in binary form must reproduce the above copyright 288c2ecf20Sopenharmony_ci * notice, this list of conditions and the following disclaimer in 298c2ecf20Sopenharmony_ci * the documentation and/or other materials provided with the 308c2ecf20Sopenharmony_ci * distribution. 318c2ecf20Sopenharmony_ci * - Neither the name of Intel Corporation nor the names of its 328c2ecf20Sopenharmony_ci * contributors may be used to endorse or promote products derived 338c2ecf20Sopenharmony_ci * from this software without specific prior written permission. 348c2ecf20Sopenharmony_ci * 358c2ecf20Sopenharmony_ci * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 368c2ecf20Sopenharmony_ci * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 378c2ecf20Sopenharmony_ci * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 388c2ecf20Sopenharmony_ci * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 398c2ecf20Sopenharmony_ci * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 408c2ecf20Sopenharmony_ci * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 418c2ecf20Sopenharmony_ci * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 428c2ecf20Sopenharmony_ci * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 438c2ecf20Sopenharmony_ci * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 448c2ecf20Sopenharmony_ci * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 458c2ecf20Sopenharmony_ci * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 468c2ecf20Sopenharmony_ci * 478c2ecf20Sopenharmony_ci */ 488c2ecf20Sopenharmony_ci#include <asm/page.h> 498c2ecf20Sopenharmony_ci#include <linux/string.h> 508c2ecf20Sopenharmony_ci 518c2ecf20Sopenharmony_ci#include "mmu_rb.h" 528c2ecf20Sopenharmony_ci#include "user_exp_rcv.h" 538c2ecf20Sopenharmony_ci#include "trace.h" 548c2ecf20Sopenharmony_ci 558c2ecf20Sopenharmony_cistatic void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, 568c2ecf20Sopenharmony_ci struct exp_tid_set *set, 578c2ecf20Sopenharmony_ci struct hfi1_filedata *fd); 588c2ecf20Sopenharmony_cistatic u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages); 598c2ecf20Sopenharmony_cistatic int set_rcvarray_entry(struct hfi1_filedata *fd, 608c2ecf20Sopenharmony_ci struct tid_user_buf *tbuf, 618c2ecf20Sopenharmony_ci u32 rcventry, struct tid_group *grp, 628c2ecf20Sopenharmony_ci u16 pageidx, unsigned int npages); 638c2ecf20Sopenharmony_cistatic void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, 648c2ecf20Sopenharmony_ci struct tid_rb_node *tnode); 658c2ecf20Sopenharmony_cistatic bool tid_rb_invalidate(struct mmu_interval_notifier *mni, 668c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range, 678c2ecf20Sopenharmony_ci unsigned long cur_seq); 688c2ecf20Sopenharmony_cistatic bool tid_cover_invalidate(struct mmu_interval_notifier *mni, 698c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range, 708c2ecf20Sopenharmony_ci unsigned long cur_seq); 718c2ecf20Sopenharmony_cistatic int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *, 728c2ecf20Sopenharmony_ci struct tid_group *grp, 738c2ecf20Sopenharmony_ci unsigned int start, u16 count, 748c2ecf20Sopenharmony_ci u32 *tidlist, unsigned int *tididx, 758c2ecf20Sopenharmony_ci unsigned int *pmapped); 768c2ecf20Sopenharmony_cistatic int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo); 778c2ecf20Sopenharmony_cistatic void __clear_tid_node(struct hfi1_filedata *fd, 788c2ecf20Sopenharmony_ci struct tid_rb_node *node); 798c2ecf20Sopenharmony_cistatic void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node); 808c2ecf20Sopenharmony_ci 818c2ecf20Sopenharmony_cistatic const struct mmu_interval_notifier_ops tid_mn_ops = { 828c2ecf20Sopenharmony_ci .invalidate = tid_rb_invalidate, 838c2ecf20Sopenharmony_ci}; 848c2ecf20Sopenharmony_cistatic const struct mmu_interval_notifier_ops tid_cover_ops = { 858c2ecf20Sopenharmony_ci .invalidate = tid_cover_invalidate, 868c2ecf20Sopenharmony_ci}; 878c2ecf20Sopenharmony_ci 888c2ecf20Sopenharmony_ci/* 898c2ecf20Sopenharmony_ci * Initialize context and file private data needed for Expected 908c2ecf20Sopenharmony_ci * receive caching. This needs to be done after the context has 918c2ecf20Sopenharmony_ci * been configured with the eager/expected RcvEntry counts. 928c2ecf20Sopenharmony_ci */ 938c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_init(struct hfi1_filedata *fd, 948c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt) 958c2ecf20Sopenharmony_ci{ 968c2ecf20Sopenharmony_ci int ret = 0; 978c2ecf20Sopenharmony_ci 988c2ecf20Sopenharmony_ci fd->entry_to_rb = kcalloc(uctxt->expected_count, 998c2ecf20Sopenharmony_ci sizeof(struct rb_node *), 1008c2ecf20Sopenharmony_ci GFP_KERNEL); 1018c2ecf20Sopenharmony_ci if (!fd->entry_to_rb) 1028c2ecf20Sopenharmony_ci return -ENOMEM; 1038c2ecf20Sopenharmony_ci 1048c2ecf20Sopenharmony_ci if (!HFI1_CAP_UGET_MASK(uctxt->flags, TID_UNMAP)) { 1058c2ecf20Sopenharmony_ci fd->invalid_tid_idx = 0; 1068c2ecf20Sopenharmony_ci fd->invalid_tids = kcalloc(uctxt->expected_count, 1078c2ecf20Sopenharmony_ci sizeof(*fd->invalid_tids), 1088c2ecf20Sopenharmony_ci GFP_KERNEL); 1098c2ecf20Sopenharmony_ci if (!fd->invalid_tids) { 1108c2ecf20Sopenharmony_ci kfree(fd->entry_to_rb); 1118c2ecf20Sopenharmony_ci fd->entry_to_rb = NULL; 1128c2ecf20Sopenharmony_ci return -ENOMEM; 1138c2ecf20Sopenharmony_ci } 1148c2ecf20Sopenharmony_ci fd->use_mn = true; 1158c2ecf20Sopenharmony_ci } 1168c2ecf20Sopenharmony_ci 1178c2ecf20Sopenharmony_ci /* 1188c2ecf20Sopenharmony_ci * PSM does not have a good way to separate, count, and 1198c2ecf20Sopenharmony_ci * effectively enforce a limit on RcvArray entries used by 1208c2ecf20Sopenharmony_ci * subctxts (when context sharing is used) when TID caching 1218c2ecf20Sopenharmony_ci * is enabled. To help with that, we calculate a per-process 1228c2ecf20Sopenharmony_ci * RcvArray entry share and enforce that. 1238c2ecf20Sopenharmony_ci * If TID caching is not in use, PSM deals with usage on its 1248c2ecf20Sopenharmony_ci * own. In that case, we allow any subctxt to take all of the 1258c2ecf20Sopenharmony_ci * entries. 1268c2ecf20Sopenharmony_ci * 1278c2ecf20Sopenharmony_ci * Make sure that we set the tid counts only after successful 1288c2ecf20Sopenharmony_ci * init. 1298c2ecf20Sopenharmony_ci */ 1308c2ecf20Sopenharmony_ci spin_lock(&fd->tid_lock); 1318c2ecf20Sopenharmony_ci if (uctxt->subctxt_cnt && fd->use_mn) { 1328c2ecf20Sopenharmony_ci u16 remainder; 1338c2ecf20Sopenharmony_ci 1348c2ecf20Sopenharmony_ci fd->tid_limit = uctxt->expected_count / uctxt->subctxt_cnt; 1358c2ecf20Sopenharmony_ci remainder = uctxt->expected_count % uctxt->subctxt_cnt; 1368c2ecf20Sopenharmony_ci if (remainder && fd->subctxt < remainder) 1378c2ecf20Sopenharmony_ci fd->tid_limit++; 1388c2ecf20Sopenharmony_ci } else { 1398c2ecf20Sopenharmony_ci fd->tid_limit = uctxt->expected_count; 1408c2ecf20Sopenharmony_ci } 1418c2ecf20Sopenharmony_ci spin_unlock(&fd->tid_lock); 1428c2ecf20Sopenharmony_ci 1438c2ecf20Sopenharmony_ci return ret; 1448c2ecf20Sopenharmony_ci} 1458c2ecf20Sopenharmony_ci 1468c2ecf20Sopenharmony_civoid hfi1_user_exp_rcv_free(struct hfi1_filedata *fd) 1478c2ecf20Sopenharmony_ci{ 1488c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 1498c2ecf20Sopenharmony_ci 1508c2ecf20Sopenharmony_ci mutex_lock(&uctxt->exp_mutex); 1518c2ecf20Sopenharmony_ci if (!EXP_TID_SET_EMPTY(uctxt->tid_full_list)) 1528c2ecf20Sopenharmony_ci unlock_exp_tids(uctxt, &uctxt->tid_full_list, fd); 1538c2ecf20Sopenharmony_ci if (!EXP_TID_SET_EMPTY(uctxt->tid_used_list)) 1548c2ecf20Sopenharmony_ci unlock_exp_tids(uctxt, &uctxt->tid_used_list, fd); 1558c2ecf20Sopenharmony_ci mutex_unlock(&uctxt->exp_mutex); 1568c2ecf20Sopenharmony_ci 1578c2ecf20Sopenharmony_ci kfree(fd->invalid_tids); 1588c2ecf20Sopenharmony_ci fd->invalid_tids = NULL; 1598c2ecf20Sopenharmony_ci 1608c2ecf20Sopenharmony_ci kfree(fd->entry_to_rb); 1618c2ecf20Sopenharmony_ci fd->entry_to_rb = NULL; 1628c2ecf20Sopenharmony_ci} 1638c2ecf20Sopenharmony_ci 1648c2ecf20Sopenharmony_ci/** 1658c2ecf20Sopenharmony_ci * Release pinned receive buffer pages. 1668c2ecf20Sopenharmony_ci * 1678c2ecf20Sopenharmony_ci * @mapped - true if the pages have been DMA mapped. false otherwise. 1688c2ecf20Sopenharmony_ci * @idx - Index of the first page to unpin. 1698c2ecf20Sopenharmony_ci * @npages - No of pages to unpin. 1708c2ecf20Sopenharmony_ci * 1718c2ecf20Sopenharmony_ci * If the pages have been DMA mapped (indicated by mapped parameter), their 1728c2ecf20Sopenharmony_ci * info will be passed via a struct tid_rb_node. If they haven't been mapped, 1738c2ecf20Sopenharmony_ci * their info will be passed via a struct tid_user_buf. 1748c2ecf20Sopenharmony_ci */ 1758c2ecf20Sopenharmony_cistatic void unpin_rcv_pages(struct hfi1_filedata *fd, 1768c2ecf20Sopenharmony_ci struct tid_user_buf *tidbuf, 1778c2ecf20Sopenharmony_ci struct tid_rb_node *node, 1788c2ecf20Sopenharmony_ci unsigned int idx, 1798c2ecf20Sopenharmony_ci unsigned int npages, 1808c2ecf20Sopenharmony_ci bool mapped) 1818c2ecf20Sopenharmony_ci{ 1828c2ecf20Sopenharmony_ci struct page **pages; 1838c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = fd->uctxt->dd; 1848c2ecf20Sopenharmony_ci struct mm_struct *mm; 1858c2ecf20Sopenharmony_ci 1868c2ecf20Sopenharmony_ci if (mapped) { 1878c2ecf20Sopenharmony_ci pci_unmap_single(dd->pcidev, node->dma_addr, 1888c2ecf20Sopenharmony_ci node->npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); 1898c2ecf20Sopenharmony_ci pages = &node->pages[idx]; 1908c2ecf20Sopenharmony_ci mm = mm_from_tid_node(node); 1918c2ecf20Sopenharmony_ci } else { 1928c2ecf20Sopenharmony_ci pages = &tidbuf->pages[idx]; 1938c2ecf20Sopenharmony_ci mm = current->mm; 1948c2ecf20Sopenharmony_ci } 1958c2ecf20Sopenharmony_ci hfi1_release_user_pages(mm, pages, npages, mapped); 1968c2ecf20Sopenharmony_ci fd->tid_n_pinned -= npages; 1978c2ecf20Sopenharmony_ci} 1988c2ecf20Sopenharmony_ci 1998c2ecf20Sopenharmony_ci/** 2008c2ecf20Sopenharmony_ci * Pin receive buffer pages. 2018c2ecf20Sopenharmony_ci */ 2028c2ecf20Sopenharmony_cistatic int pin_rcv_pages(struct hfi1_filedata *fd, struct tid_user_buf *tidbuf) 2038c2ecf20Sopenharmony_ci{ 2048c2ecf20Sopenharmony_ci int pinned; 2058c2ecf20Sopenharmony_ci unsigned int npages = tidbuf->npages; 2068c2ecf20Sopenharmony_ci unsigned long vaddr = tidbuf->vaddr; 2078c2ecf20Sopenharmony_ci struct page **pages = NULL; 2088c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = fd->uctxt->dd; 2098c2ecf20Sopenharmony_ci 2108c2ecf20Sopenharmony_ci if (npages > fd->uctxt->expected_count) { 2118c2ecf20Sopenharmony_ci dd_dev_err(dd, "Expected buffer too big\n"); 2128c2ecf20Sopenharmony_ci return -EINVAL; 2138c2ecf20Sopenharmony_ci } 2148c2ecf20Sopenharmony_ci 2158c2ecf20Sopenharmony_ci /* Allocate the array of struct page pointers needed for pinning */ 2168c2ecf20Sopenharmony_ci pages = kcalloc(npages, sizeof(*pages), GFP_KERNEL); 2178c2ecf20Sopenharmony_ci if (!pages) 2188c2ecf20Sopenharmony_ci return -ENOMEM; 2198c2ecf20Sopenharmony_ci 2208c2ecf20Sopenharmony_ci /* 2218c2ecf20Sopenharmony_ci * Pin all the pages of the user buffer. If we can't pin all the 2228c2ecf20Sopenharmony_ci * pages, accept the amount pinned so far and program only that. 2238c2ecf20Sopenharmony_ci * User space knows how to deal with partially programmed buffers. 2248c2ecf20Sopenharmony_ci */ 2258c2ecf20Sopenharmony_ci if (!hfi1_can_pin_pages(dd, current->mm, fd->tid_n_pinned, npages)) { 2268c2ecf20Sopenharmony_ci kfree(pages); 2278c2ecf20Sopenharmony_ci return -ENOMEM; 2288c2ecf20Sopenharmony_ci } 2298c2ecf20Sopenharmony_ci 2308c2ecf20Sopenharmony_ci pinned = hfi1_acquire_user_pages(current->mm, vaddr, npages, true, pages); 2318c2ecf20Sopenharmony_ci if (pinned <= 0) { 2328c2ecf20Sopenharmony_ci kfree(pages); 2338c2ecf20Sopenharmony_ci return pinned; 2348c2ecf20Sopenharmony_ci } 2358c2ecf20Sopenharmony_ci tidbuf->pages = pages; 2368c2ecf20Sopenharmony_ci fd->tid_n_pinned += pinned; 2378c2ecf20Sopenharmony_ci return pinned; 2388c2ecf20Sopenharmony_ci} 2398c2ecf20Sopenharmony_ci 2408c2ecf20Sopenharmony_ci/* 2418c2ecf20Sopenharmony_ci * RcvArray entry allocation for Expected Receives is done by the 2428c2ecf20Sopenharmony_ci * following algorithm: 2438c2ecf20Sopenharmony_ci * 2448c2ecf20Sopenharmony_ci * The context keeps 3 lists of groups of RcvArray entries: 2458c2ecf20Sopenharmony_ci * 1. List of empty groups - tid_group_list 2468c2ecf20Sopenharmony_ci * This list is created during user context creation and 2478c2ecf20Sopenharmony_ci * contains elements which describe sets (of 8) of empty 2488c2ecf20Sopenharmony_ci * RcvArray entries. 2498c2ecf20Sopenharmony_ci * 2. List of partially used groups - tid_used_list 2508c2ecf20Sopenharmony_ci * This list contains sets of RcvArray entries which are 2518c2ecf20Sopenharmony_ci * not completely used up. Another mapping request could 2528c2ecf20Sopenharmony_ci * use some of all of the remaining entries. 2538c2ecf20Sopenharmony_ci * 3. List of full groups - tid_full_list 2548c2ecf20Sopenharmony_ci * This is the list where sets that are completely used 2558c2ecf20Sopenharmony_ci * up go. 2568c2ecf20Sopenharmony_ci * 2578c2ecf20Sopenharmony_ci * An attempt to optimize the usage of RcvArray entries is 2588c2ecf20Sopenharmony_ci * made by finding all sets of physically contiguous pages in a 2598c2ecf20Sopenharmony_ci * user's buffer. 2608c2ecf20Sopenharmony_ci * These physically contiguous sets are further split into 2618c2ecf20Sopenharmony_ci * sizes supported by the receive engine of the HFI. The 2628c2ecf20Sopenharmony_ci * resulting sets of pages are stored in struct tid_pageset, 2638c2ecf20Sopenharmony_ci * which describes the sets as: 2648c2ecf20Sopenharmony_ci * * .count - number of pages in this set 2658c2ecf20Sopenharmony_ci * * .idx - starting index into struct page ** array 2668c2ecf20Sopenharmony_ci * of this set 2678c2ecf20Sopenharmony_ci * 2688c2ecf20Sopenharmony_ci * From this point on, the algorithm deals with the page sets 2698c2ecf20Sopenharmony_ci * described above. The number of pagesets is divided by the 2708c2ecf20Sopenharmony_ci * RcvArray group size to produce the number of full groups 2718c2ecf20Sopenharmony_ci * needed. 2728c2ecf20Sopenharmony_ci * 2738c2ecf20Sopenharmony_ci * Groups from the 3 lists are manipulated using the following 2748c2ecf20Sopenharmony_ci * rules: 2758c2ecf20Sopenharmony_ci * 1. For each set of 8 pagesets, a complete group from 2768c2ecf20Sopenharmony_ci * tid_group_list is taken, programmed, and moved to 2778c2ecf20Sopenharmony_ci * the tid_full_list list. 2788c2ecf20Sopenharmony_ci * 2. For all remaining pagesets: 2798c2ecf20Sopenharmony_ci * 2.1 If the tid_used_list is empty and the tid_group_list 2808c2ecf20Sopenharmony_ci * is empty, stop processing pageset and return only 2818c2ecf20Sopenharmony_ci * what has been programmed up to this point. 2828c2ecf20Sopenharmony_ci * 2.2 If the tid_used_list is empty and the tid_group_list 2838c2ecf20Sopenharmony_ci * is not empty, move a group from tid_group_list to 2848c2ecf20Sopenharmony_ci * tid_used_list. 2858c2ecf20Sopenharmony_ci * 2.3 For each group is tid_used_group, program as much as 2868c2ecf20Sopenharmony_ci * can fit into the group. If the group becomes fully 2878c2ecf20Sopenharmony_ci * used, move it to tid_full_list. 2888c2ecf20Sopenharmony_ci */ 2898c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, 2908c2ecf20Sopenharmony_ci struct hfi1_tid_info *tinfo) 2918c2ecf20Sopenharmony_ci{ 2928c2ecf20Sopenharmony_ci int ret = 0, need_group = 0, pinned; 2938c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 2948c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 2958c2ecf20Sopenharmony_ci unsigned int ngroups, pageidx = 0, pageset_count, 2968c2ecf20Sopenharmony_ci tididx = 0, mapped, mapped_pages = 0; 2978c2ecf20Sopenharmony_ci u32 *tidlist = NULL; 2988c2ecf20Sopenharmony_ci struct tid_user_buf *tidbuf; 2998c2ecf20Sopenharmony_ci unsigned long mmu_seq = 0; 3008c2ecf20Sopenharmony_ci 3018c2ecf20Sopenharmony_ci if (!PAGE_ALIGNED(tinfo->vaddr)) 3028c2ecf20Sopenharmony_ci return -EINVAL; 3038c2ecf20Sopenharmony_ci if (tinfo->length == 0) 3048c2ecf20Sopenharmony_ci return -EINVAL; 3058c2ecf20Sopenharmony_ci 3068c2ecf20Sopenharmony_ci tidbuf = kzalloc(sizeof(*tidbuf), GFP_KERNEL); 3078c2ecf20Sopenharmony_ci if (!tidbuf) 3088c2ecf20Sopenharmony_ci return -ENOMEM; 3098c2ecf20Sopenharmony_ci 3108c2ecf20Sopenharmony_ci mutex_init(&tidbuf->cover_mutex); 3118c2ecf20Sopenharmony_ci tidbuf->vaddr = tinfo->vaddr; 3128c2ecf20Sopenharmony_ci tidbuf->length = tinfo->length; 3138c2ecf20Sopenharmony_ci tidbuf->npages = num_user_pages(tidbuf->vaddr, tidbuf->length); 3148c2ecf20Sopenharmony_ci tidbuf->psets = kcalloc(uctxt->expected_count, sizeof(*tidbuf->psets), 3158c2ecf20Sopenharmony_ci GFP_KERNEL); 3168c2ecf20Sopenharmony_ci if (!tidbuf->psets) { 3178c2ecf20Sopenharmony_ci ret = -ENOMEM; 3188c2ecf20Sopenharmony_ci goto fail_release_mem; 3198c2ecf20Sopenharmony_ci } 3208c2ecf20Sopenharmony_ci 3218c2ecf20Sopenharmony_ci if (fd->use_mn) { 3228c2ecf20Sopenharmony_ci ret = mmu_interval_notifier_insert( 3238c2ecf20Sopenharmony_ci &tidbuf->notifier, current->mm, 3248c2ecf20Sopenharmony_ci tidbuf->vaddr, tidbuf->npages * PAGE_SIZE, 3258c2ecf20Sopenharmony_ci &tid_cover_ops); 3268c2ecf20Sopenharmony_ci if (ret) 3278c2ecf20Sopenharmony_ci goto fail_release_mem; 3288c2ecf20Sopenharmony_ci mmu_seq = mmu_interval_read_begin(&tidbuf->notifier); 3298c2ecf20Sopenharmony_ci } 3308c2ecf20Sopenharmony_ci 3318c2ecf20Sopenharmony_ci pinned = pin_rcv_pages(fd, tidbuf); 3328c2ecf20Sopenharmony_ci if (pinned <= 0) { 3338c2ecf20Sopenharmony_ci ret = (pinned < 0) ? pinned : -ENOSPC; 3348c2ecf20Sopenharmony_ci goto fail_unpin; 3358c2ecf20Sopenharmony_ci } 3368c2ecf20Sopenharmony_ci 3378c2ecf20Sopenharmony_ci /* Find sets of physically contiguous pages */ 3388c2ecf20Sopenharmony_ci tidbuf->n_psets = find_phys_blocks(tidbuf, pinned); 3398c2ecf20Sopenharmony_ci 3408c2ecf20Sopenharmony_ci /* Reserve the number of expected tids to be used. */ 3418c2ecf20Sopenharmony_ci spin_lock(&fd->tid_lock); 3428c2ecf20Sopenharmony_ci if (fd->tid_used + tidbuf->n_psets > fd->tid_limit) 3438c2ecf20Sopenharmony_ci pageset_count = fd->tid_limit - fd->tid_used; 3448c2ecf20Sopenharmony_ci else 3458c2ecf20Sopenharmony_ci pageset_count = tidbuf->n_psets; 3468c2ecf20Sopenharmony_ci fd->tid_used += pageset_count; 3478c2ecf20Sopenharmony_ci spin_unlock(&fd->tid_lock); 3488c2ecf20Sopenharmony_ci 3498c2ecf20Sopenharmony_ci if (!pageset_count) { 3508c2ecf20Sopenharmony_ci ret = -ENOSPC; 3518c2ecf20Sopenharmony_ci goto fail_unreserve; 3528c2ecf20Sopenharmony_ci } 3538c2ecf20Sopenharmony_ci 3548c2ecf20Sopenharmony_ci ngroups = pageset_count / dd->rcv_entries.group_size; 3558c2ecf20Sopenharmony_ci tidlist = kcalloc(pageset_count, sizeof(*tidlist), GFP_KERNEL); 3568c2ecf20Sopenharmony_ci if (!tidlist) { 3578c2ecf20Sopenharmony_ci ret = -ENOMEM; 3588c2ecf20Sopenharmony_ci goto fail_unreserve; 3598c2ecf20Sopenharmony_ci } 3608c2ecf20Sopenharmony_ci 3618c2ecf20Sopenharmony_ci tididx = 0; 3628c2ecf20Sopenharmony_ci 3638c2ecf20Sopenharmony_ci /* 3648c2ecf20Sopenharmony_ci * From this point on, we are going to be using shared (between master 3658c2ecf20Sopenharmony_ci * and subcontexts) context resources. We need to take the lock. 3668c2ecf20Sopenharmony_ci */ 3678c2ecf20Sopenharmony_ci mutex_lock(&uctxt->exp_mutex); 3688c2ecf20Sopenharmony_ci /* 3698c2ecf20Sopenharmony_ci * The first step is to program the RcvArray entries which are complete 3708c2ecf20Sopenharmony_ci * groups. 3718c2ecf20Sopenharmony_ci */ 3728c2ecf20Sopenharmony_ci while (ngroups && uctxt->tid_group_list.count) { 3738c2ecf20Sopenharmony_ci struct tid_group *grp = 3748c2ecf20Sopenharmony_ci tid_group_pop(&uctxt->tid_group_list); 3758c2ecf20Sopenharmony_ci 3768c2ecf20Sopenharmony_ci ret = program_rcvarray(fd, tidbuf, grp, 3778c2ecf20Sopenharmony_ci pageidx, dd->rcv_entries.group_size, 3788c2ecf20Sopenharmony_ci tidlist, &tididx, &mapped); 3798c2ecf20Sopenharmony_ci /* 3808c2ecf20Sopenharmony_ci * If there was a failure to program the RcvArray 3818c2ecf20Sopenharmony_ci * entries for the entire group, reset the grp fields 3828c2ecf20Sopenharmony_ci * and add the grp back to the free group list. 3838c2ecf20Sopenharmony_ci */ 3848c2ecf20Sopenharmony_ci if (ret <= 0) { 3858c2ecf20Sopenharmony_ci tid_group_add_tail(grp, &uctxt->tid_group_list); 3868c2ecf20Sopenharmony_ci hfi1_cdbg(TID, 3878c2ecf20Sopenharmony_ci "Failed to program RcvArray group %d", ret); 3888c2ecf20Sopenharmony_ci goto unlock; 3898c2ecf20Sopenharmony_ci } 3908c2ecf20Sopenharmony_ci 3918c2ecf20Sopenharmony_ci tid_group_add_tail(grp, &uctxt->tid_full_list); 3928c2ecf20Sopenharmony_ci ngroups--; 3938c2ecf20Sopenharmony_ci pageidx += ret; 3948c2ecf20Sopenharmony_ci mapped_pages += mapped; 3958c2ecf20Sopenharmony_ci } 3968c2ecf20Sopenharmony_ci 3978c2ecf20Sopenharmony_ci while (pageidx < pageset_count) { 3988c2ecf20Sopenharmony_ci struct tid_group *grp, *ptr; 3998c2ecf20Sopenharmony_ci /* 4008c2ecf20Sopenharmony_ci * If we don't have any partially used tid groups, check 4018c2ecf20Sopenharmony_ci * if we have empty groups. If so, take one from there and 4028c2ecf20Sopenharmony_ci * put in the partially used list. 4038c2ecf20Sopenharmony_ci */ 4048c2ecf20Sopenharmony_ci if (!uctxt->tid_used_list.count || need_group) { 4058c2ecf20Sopenharmony_ci if (!uctxt->tid_group_list.count) 4068c2ecf20Sopenharmony_ci goto unlock; 4078c2ecf20Sopenharmony_ci 4088c2ecf20Sopenharmony_ci grp = tid_group_pop(&uctxt->tid_group_list); 4098c2ecf20Sopenharmony_ci tid_group_add_tail(grp, &uctxt->tid_used_list); 4108c2ecf20Sopenharmony_ci need_group = 0; 4118c2ecf20Sopenharmony_ci } 4128c2ecf20Sopenharmony_ci /* 4138c2ecf20Sopenharmony_ci * There is an optimization opportunity here - instead of 4148c2ecf20Sopenharmony_ci * fitting as many page sets as we can, check for a group 4158c2ecf20Sopenharmony_ci * later on in the list that could fit all of them. 4168c2ecf20Sopenharmony_ci */ 4178c2ecf20Sopenharmony_ci list_for_each_entry_safe(grp, ptr, &uctxt->tid_used_list.list, 4188c2ecf20Sopenharmony_ci list) { 4198c2ecf20Sopenharmony_ci unsigned use = min_t(unsigned, pageset_count - pageidx, 4208c2ecf20Sopenharmony_ci grp->size - grp->used); 4218c2ecf20Sopenharmony_ci 4228c2ecf20Sopenharmony_ci ret = program_rcvarray(fd, tidbuf, grp, 4238c2ecf20Sopenharmony_ci pageidx, use, tidlist, 4248c2ecf20Sopenharmony_ci &tididx, &mapped); 4258c2ecf20Sopenharmony_ci if (ret < 0) { 4268c2ecf20Sopenharmony_ci hfi1_cdbg(TID, 4278c2ecf20Sopenharmony_ci "Failed to program RcvArray entries %d", 4288c2ecf20Sopenharmony_ci ret); 4298c2ecf20Sopenharmony_ci goto unlock; 4308c2ecf20Sopenharmony_ci } else if (ret > 0) { 4318c2ecf20Sopenharmony_ci if (grp->used == grp->size) 4328c2ecf20Sopenharmony_ci tid_group_move(grp, 4338c2ecf20Sopenharmony_ci &uctxt->tid_used_list, 4348c2ecf20Sopenharmony_ci &uctxt->tid_full_list); 4358c2ecf20Sopenharmony_ci pageidx += ret; 4368c2ecf20Sopenharmony_ci mapped_pages += mapped; 4378c2ecf20Sopenharmony_ci need_group = 0; 4388c2ecf20Sopenharmony_ci /* Check if we are done so we break out early */ 4398c2ecf20Sopenharmony_ci if (pageidx >= pageset_count) 4408c2ecf20Sopenharmony_ci break; 4418c2ecf20Sopenharmony_ci } else if (WARN_ON(ret == 0)) { 4428c2ecf20Sopenharmony_ci /* 4438c2ecf20Sopenharmony_ci * If ret is 0, we did not program any entries 4448c2ecf20Sopenharmony_ci * into this group, which can only happen if 4458c2ecf20Sopenharmony_ci * we've screwed up the accounting somewhere. 4468c2ecf20Sopenharmony_ci * Warn and try to continue. 4478c2ecf20Sopenharmony_ci */ 4488c2ecf20Sopenharmony_ci need_group = 1; 4498c2ecf20Sopenharmony_ci } 4508c2ecf20Sopenharmony_ci } 4518c2ecf20Sopenharmony_ci } 4528c2ecf20Sopenharmony_ciunlock: 4538c2ecf20Sopenharmony_ci mutex_unlock(&uctxt->exp_mutex); 4548c2ecf20Sopenharmony_ci hfi1_cdbg(TID, "total mapped: tidpairs:%u pages:%u (%d)", tididx, 4558c2ecf20Sopenharmony_ci mapped_pages, ret); 4568c2ecf20Sopenharmony_ci 4578c2ecf20Sopenharmony_ci /* fail if nothing was programmed, set error if none provided */ 4588c2ecf20Sopenharmony_ci if (tididx == 0) { 4598c2ecf20Sopenharmony_ci if (ret >= 0) 4608c2ecf20Sopenharmony_ci ret = -ENOSPC; 4618c2ecf20Sopenharmony_ci goto fail_unreserve; 4628c2ecf20Sopenharmony_ci } 4638c2ecf20Sopenharmony_ci 4648c2ecf20Sopenharmony_ci /* adjust reserved tid_used to actual count */ 4658c2ecf20Sopenharmony_ci spin_lock(&fd->tid_lock); 4668c2ecf20Sopenharmony_ci fd->tid_used -= pageset_count - tididx; 4678c2ecf20Sopenharmony_ci spin_unlock(&fd->tid_lock); 4688c2ecf20Sopenharmony_ci 4698c2ecf20Sopenharmony_ci /* unpin all pages not covered by a TID */ 4708c2ecf20Sopenharmony_ci unpin_rcv_pages(fd, tidbuf, NULL, mapped_pages, pinned - mapped_pages, 4718c2ecf20Sopenharmony_ci false); 4728c2ecf20Sopenharmony_ci 4738c2ecf20Sopenharmony_ci if (fd->use_mn) { 4748c2ecf20Sopenharmony_ci /* check for an invalidate during setup */ 4758c2ecf20Sopenharmony_ci bool fail = false; 4768c2ecf20Sopenharmony_ci 4778c2ecf20Sopenharmony_ci mutex_lock(&tidbuf->cover_mutex); 4788c2ecf20Sopenharmony_ci fail = mmu_interval_read_retry(&tidbuf->notifier, mmu_seq); 4798c2ecf20Sopenharmony_ci mutex_unlock(&tidbuf->cover_mutex); 4808c2ecf20Sopenharmony_ci 4818c2ecf20Sopenharmony_ci if (fail) { 4828c2ecf20Sopenharmony_ci ret = -EBUSY; 4838c2ecf20Sopenharmony_ci goto fail_unprogram; 4848c2ecf20Sopenharmony_ci } 4858c2ecf20Sopenharmony_ci } 4868c2ecf20Sopenharmony_ci 4878c2ecf20Sopenharmony_ci tinfo->tidcnt = tididx; 4888c2ecf20Sopenharmony_ci tinfo->length = mapped_pages * PAGE_SIZE; 4898c2ecf20Sopenharmony_ci 4908c2ecf20Sopenharmony_ci if (copy_to_user(u64_to_user_ptr(tinfo->tidlist), 4918c2ecf20Sopenharmony_ci tidlist, sizeof(tidlist[0]) * tididx)) { 4928c2ecf20Sopenharmony_ci ret = -EFAULT; 4938c2ecf20Sopenharmony_ci goto fail_unprogram; 4948c2ecf20Sopenharmony_ci } 4958c2ecf20Sopenharmony_ci 4968c2ecf20Sopenharmony_ci if (fd->use_mn) 4978c2ecf20Sopenharmony_ci mmu_interval_notifier_remove(&tidbuf->notifier); 4988c2ecf20Sopenharmony_ci kfree(tidbuf->pages); 4998c2ecf20Sopenharmony_ci kfree(tidbuf->psets); 5008c2ecf20Sopenharmony_ci kfree(tidbuf); 5018c2ecf20Sopenharmony_ci kfree(tidlist); 5028c2ecf20Sopenharmony_ci return 0; 5038c2ecf20Sopenharmony_ci 5048c2ecf20Sopenharmony_cifail_unprogram: 5058c2ecf20Sopenharmony_ci /* unprogram, unmap, and unpin all allocated TIDs */ 5068c2ecf20Sopenharmony_ci tinfo->tidlist = (unsigned long)tidlist; 5078c2ecf20Sopenharmony_ci hfi1_user_exp_rcv_clear(fd, tinfo); 5088c2ecf20Sopenharmony_ci tinfo->tidlist = 0; 5098c2ecf20Sopenharmony_ci pinned = 0; /* nothing left to unpin */ 5108c2ecf20Sopenharmony_ci pageset_count = 0; /* nothing left reserved */ 5118c2ecf20Sopenharmony_cifail_unreserve: 5128c2ecf20Sopenharmony_ci spin_lock(&fd->tid_lock); 5138c2ecf20Sopenharmony_ci fd->tid_used -= pageset_count; 5148c2ecf20Sopenharmony_ci spin_unlock(&fd->tid_lock); 5158c2ecf20Sopenharmony_cifail_unpin: 5168c2ecf20Sopenharmony_ci if (fd->use_mn) 5178c2ecf20Sopenharmony_ci mmu_interval_notifier_remove(&tidbuf->notifier); 5188c2ecf20Sopenharmony_ci if (pinned > 0) 5198c2ecf20Sopenharmony_ci unpin_rcv_pages(fd, tidbuf, NULL, 0, pinned, false); 5208c2ecf20Sopenharmony_cifail_release_mem: 5218c2ecf20Sopenharmony_ci kfree(tidbuf->pages); 5228c2ecf20Sopenharmony_ci kfree(tidbuf->psets); 5238c2ecf20Sopenharmony_ci kfree(tidbuf); 5248c2ecf20Sopenharmony_ci kfree(tidlist); 5258c2ecf20Sopenharmony_ci return ret; 5268c2ecf20Sopenharmony_ci} 5278c2ecf20Sopenharmony_ci 5288c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, 5298c2ecf20Sopenharmony_ci struct hfi1_tid_info *tinfo) 5308c2ecf20Sopenharmony_ci{ 5318c2ecf20Sopenharmony_ci int ret = 0; 5328c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 5338c2ecf20Sopenharmony_ci u32 *tidinfo; 5348c2ecf20Sopenharmony_ci unsigned tididx; 5358c2ecf20Sopenharmony_ci 5368c2ecf20Sopenharmony_ci if (unlikely(tinfo->tidcnt > fd->tid_used)) 5378c2ecf20Sopenharmony_ci return -EINVAL; 5388c2ecf20Sopenharmony_ci 5398c2ecf20Sopenharmony_ci tidinfo = memdup_user(u64_to_user_ptr(tinfo->tidlist), 5408c2ecf20Sopenharmony_ci sizeof(tidinfo[0]) * tinfo->tidcnt); 5418c2ecf20Sopenharmony_ci if (IS_ERR(tidinfo)) 5428c2ecf20Sopenharmony_ci return PTR_ERR(tidinfo); 5438c2ecf20Sopenharmony_ci 5448c2ecf20Sopenharmony_ci mutex_lock(&uctxt->exp_mutex); 5458c2ecf20Sopenharmony_ci for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { 5468c2ecf20Sopenharmony_ci ret = unprogram_rcvarray(fd, tidinfo[tididx]); 5478c2ecf20Sopenharmony_ci if (ret) { 5488c2ecf20Sopenharmony_ci hfi1_cdbg(TID, "Failed to unprogram rcv array %d", 5498c2ecf20Sopenharmony_ci ret); 5508c2ecf20Sopenharmony_ci break; 5518c2ecf20Sopenharmony_ci } 5528c2ecf20Sopenharmony_ci } 5538c2ecf20Sopenharmony_ci spin_lock(&fd->tid_lock); 5548c2ecf20Sopenharmony_ci fd->tid_used -= tididx; 5558c2ecf20Sopenharmony_ci spin_unlock(&fd->tid_lock); 5568c2ecf20Sopenharmony_ci tinfo->tidcnt = tididx; 5578c2ecf20Sopenharmony_ci mutex_unlock(&uctxt->exp_mutex); 5588c2ecf20Sopenharmony_ci 5598c2ecf20Sopenharmony_ci kfree(tidinfo); 5608c2ecf20Sopenharmony_ci return ret; 5618c2ecf20Sopenharmony_ci} 5628c2ecf20Sopenharmony_ci 5638c2ecf20Sopenharmony_ciint hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, 5648c2ecf20Sopenharmony_ci struct hfi1_tid_info *tinfo) 5658c2ecf20Sopenharmony_ci{ 5668c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 5678c2ecf20Sopenharmony_ci unsigned long *ev = uctxt->dd->events + 5688c2ecf20Sopenharmony_ci (uctxt_offset(uctxt) + fd->subctxt); 5698c2ecf20Sopenharmony_ci u32 *array; 5708c2ecf20Sopenharmony_ci int ret = 0; 5718c2ecf20Sopenharmony_ci 5728c2ecf20Sopenharmony_ci /* 5738c2ecf20Sopenharmony_ci * copy_to_user() can sleep, which will leave the invalid_lock 5748c2ecf20Sopenharmony_ci * locked and cause the MMU notifier to be blocked on the lock 5758c2ecf20Sopenharmony_ci * for a long time. 5768c2ecf20Sopenharmony_ci * Copy the data to a local buffer so we can release the lock. 5778c2ecf20Sopenharmony_ci */ 5788c2ecf20Sopenharmony_ci array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL); 5798c2ecf20Sopenharmony_ci if (!array) 5808c2ecf20Sopenharmony_ci return -EFAULT; 5818c2ecf20Sopenharmony_ci 5828c2ecf20Sopenharmony_ci spin_lock(&fd->invalid_lock); 5838c2ecf20Sopenharmony_ci if (fd->invalid_tid_idx) { 5848c2ecf20Sopenharmony_ci memcpy(array, fd->invalid_tids, sizeof(*array) * 5858c2ecf20Sopenharmony_ci fd->invalid_tid_idx); 5868c2ecf20Sopenharmony_ci memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) * 5878c2ecf20Sopenharmony_ci fd->invalid_tid_idx); 5888c2ecf20Sopenharmony_ci tinfo->tidcnt = fd->invalid_tid_idx; 5898c2ecf20Sopenharmony_ci fd->invalid_tid_idx = 0; 5908c2ecf20Sopenharmony_ci /* 5918c2ecf20Sopenharmony_ci * Reset the user flag while still holding the lock. 5928c2ecf20Sopenharmony_ci * Otherwise, PSM can miss events. 5938c2ecf20Sopenharmony_ci */ 5948c2ecf20Sopenharmony_ci clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); 5958c2ecf20Sopenharmony_ci } else { 5968c2ecf20Sopenharmony_ci tinfo->tidcnt = 0; 5978c2ecf20Sopenharmony_ci } 5988c2ecf20Sopenharmony_ci spin_unlock(&fd->invalid_lock); 5998c2ecf20Sopenharmony_ci 6008c2ecf20Sopenharmony_ci if (tinfo->tidcnt) { 6018c2ecf20Sopenharmony_ci if (copy_to_user((void __user *)tinfo->tidlist, 6028c2ecf20Sopenharmony_ci array, sizeof(*array) * tinfo->tidcnt)) 6038c2ecf20Sopenharmony_ci ret = -EFAULT; 6048c2ecf20Sopenharmony_ci } 6058c2ecf20Sopenharmony_ci kfree(array); 6068c2ecf20Sopenharmony_ci 6078c2ecf20Sopenharmony_ci return ret; 6088c2ecf20Sopenharmony_ci} 6098c2ecf20Sopenharmony_ci 6108c2ecf20Sopenharmony_cistatic u32 find_phys_blocks(struct tid_user_buf *tidbuf, unsigned int npages) 6118c2ecf20Sopenharmony_ci{ 6128c2ecf20Sopenharmony_ci unsigned pagecount, pageidx, setcount = 0, i; 6138c2ecf20Sopenharmony_ci unsigned long pfn, this_pfn; 6148c2ecf20Sopenharmony_ci struct page **pages = tidbuf->pages; 6158c2ecf20Sopenharmony_ci struct tid_pageset *list = tidbuf->psets; 6168c2ecf20Sopenharmony_ci 6178c2ecf20Sopenharmony_ci if (!npages) 6188c2ecf20Sopenharmony_ci return 0; 6198c2ecf20Sopenharmony_ci 6208c2ecf20Sopenharmony_ci /* 6218c2ecf20Sopenharmony_ci * Look for sets of physically contiguous pages in the user buffer. 6228c2ecf20Sopenharmony_ci * This will allow us to optimize Expected RcvArray entry usage by 6238c2ecf20Sopenharmony_ci * using the bigger supported sizes. 6248c2ecf20Sopenharmony_ci */ 6258c2ecf20Sopenharmony_ci pfn = page_to_pfn(pages[0]); 6268c2ecf20Sopenharmony_ci for (pageidx = 0, pagecount = 1, i = 1; i <= npages; i++) { 6278c2ecf20Sopenharmony_ci this_pfn = i < npages ? page_to_pfn(pages[i]) : 0; 6288c2ecf20Sopenharmony_ci 6298c2ecf20Sopenharmony_ci /* 6308c2ecf20Sopenharmony_ci * If the pfn's are not sequential, pages are not physically 6318c2ecf20Sopenharmony_ci * contiguous. 6328c2ecf20Sopenharmony_ci */ 6338c2ecf20Sopenharmony_ci if (this_pfn != ++pfn) { 6348c2ecf20Sopenharmony_ci /* 6358c2ecf20Sopenharmony_ci * At this point we have to loop over the set of 6368c2ecf20Sopenharmony_ci * physically contiguous pages and break them down it 6378c2ecf20Sopenharmony_ci * sizes supported by the HW. 6388c2ecf20Sopenharmony_ci * There are two main constraints: 6398c2ecf20Sopenharmony_ci * 1. The max buffer size is MAX_EXPECTED_BUFFER. 6408c2ecf20Sopenharmony_ci * If the total set size is bigger than that 6418c2ecf20Sopenharmony_ci * program only a MAX_EXPECTED_BUFFER chunk. 6428c2ecf20Sopenharmony_ci * 2. The buffer size has to be a power of two. If 6438c2ecf20Sopenharmony_ci * it is not, round down to the closes power of 6448c2ecf20Sopenharmony_ci * 2 and program that size. 6458c2ecf20Sopenharmony_ci */ 6468c2ecf20Sopenharmony_ci while (pagecount) { 6478c2ecf20Sopenharmony_ci int maxpages = pagecount; 6488c2ecf20Sopenharmony_ci u32 bufsize = pagecount * PAGE_SIZE; 6498c2ecf20Sopenharmony_ci 6508c2ecf20Sopenharmony_ci if (bufsize > MAX_EXPECTED_BUFFER) 6518c2ecf20Sopenharmony_ci maxpages = 6528c2ecf20Sopenharmony_ci MAX_EXPECTED_BUFFER >> 6538c2ecf20Sopenharmony_ci PAGE_SHIFT; 6548c2ecf20Sopenharmony_ci else if (!is_power_of_2(bufsize)) 6558c2ecf20Sopenharmony_ci maxpages = 6568c2ecf20Sopenharmony_ci rounddown_pow_of_two(bufsize) >> 6578c2ecf20Sopenharmony_ci PAGE_SHIFT; 6588c2ecf20Sopenharmony_ci 6598c2ecf20Sopenharmony_ci list[setcount].idx = pageidx; 6608c2ecf20Sopenharmony_ci list[setcount].count = maxpages; 6618c2ecf20Sopenharmony_ci pagecount -= maxpages; 6628c2ecf20Sopenharmony_ci pageidx += maxpages; 6638c2ecf20Sopenharmony_ci setcount++; 6648c2ecf20Sopenharmony_ci } 6658c2ecf20Sopenharmony_ci pageidx = i; 6668c2ecf20Sopenharmony_ci pagecount = 1; 6678c2ecf20Sopenharmony_ci pfn = this_pfn; 6688c2ecf20Sopenharmony_ci } else { 6698c2ecf20Sopenharmony_ci pagecount++; 6708c2ecf20Sopenharmony_ci } 6718c2ecf20Sopenharmony_ci } 6728c2ecf20Sopenharmony_ci return setcount; 6738c2ecf20Sopenharmony_ci} 6748c2ecf20Sopenharmony_ci 6758c2ecf20Sopenharmony_ci/** 6768c2ecf20Sopenharmony_ci * program_rcvarray() - program an RcvArray group with receive buffers 6778c2ecf20Sopenharmony_ci * @fd: filedata pointer 6788c2ecf20Sopenharmony_ci * @tbuf: pointer to struct tid_user_buf that has the user buffer starting 6798c2ecf20Sopenharmony_ci * virtual address, buffer length, page pointers, pagesets (array of 6808c2ecf20Sopenharmony_ci * struct tid_pageset holding information on physically contiguous 6818c2ecf20Sopenharmony_ci * chunks from the user buffer), and other fields. 6828c2ecf20Sopenharmony_ci * @grp: RcvArray group 6838c2ecf20Sopenharmony_ci * @start: starting index into sets array 6848c2ecf20Sopenharmony_ci * @count: number of struct tid_pageset's to program 6858c2ecf20Sopenharmony_ci * @tidlist: the array of u32 elements when the information about the 6868c2ecf20Sopenharmony_ci * programmed RcvArray entries is to be encoded. 6878c2ecf20Sopenharmony_ci * @tididx: starting offset into tidlist 6888c2ecf20Sopenharmony_ci * @pmapped: (output parameter) number of pages programmed into the RcvArray 6898c2ecf20Sopenharmony_ci * entries. 6908c2ecf20Sopenharmony_ci * 6918c2ecf20Sopenharmony_ci * This function will program up to 'count' number of RcvArray entries from the 6928c2ecf20Sopenharmony_ci * group 'grp'. To make best use of write-combining writes, the function will 6938c2ecf20Sopenharmony_ci * perform writes to the unused RcvArray entries which will be ignored by the 6948c2ecf20Sopenharmony_ci * HW. Each RcvArray entry will be programmed with a physically contiguous 6958c2ecf20Sopenharmony_ci * buffer chunk from the user's virtual buffer. 6968c2ecf20Sopenharmony_ci * 6978c2ecf20Sopenharmony_ci * Return: 6988c2ecf20Sopenharmony_ci * -EINVAL if the requested count is larger than the size of the group, 6998c2ecf20Sopenharmony_ci * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or 7008c2ecf20Sopenharmony_ci * number of RcvArray entries programmed. 7018c2ecf20Sopenharmony_ci */ 7028c2ecf20Sopenharmony_cistatic int program_rcvarray(struct hfi1_filedata *fd, struct tid_user_buf *tbuf, 7038c2ecf20Sopenharmony_ci struct tid_group *grp, 7048c2ecf20Sopenharmony_ci unsigned int start, u16 count, 7058c2ecf20Sopenharmony_ci u32 *tidlist, unsigned int *tididx, 7068c2ecf20Sopenharmony_ci unsigned int *pmapped) 7078c2ecf20Sopenharmony_ci{ 7088c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 7098c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 7108c2ecf20Sopenharmony_ci u16 idx; 7118c2ecf20Sopenharmony_ci u32 tidinfo = 0, rcventry, useidx = 0; 7128c2ecf20Sopenharmony_ci int mapped = 0; 7138c2ecf20Sopenharmony_ci 7148c2ecf20Sopenharmony_ci /* Count should never be larger than the group size */ 7158c2ecf20Sopenharmony_ci if (count > grp->size) 7168c2ecf20Sopenharmony_ci return -EINVAL; 7178c2ecf20Sopenharmony_ci 7188c2ecf20Sopenharmony_ci /* Find the first unused entry in the group */ 7198c2ecf20Sopenharmony_ci for (idx = 0; idx < grp->size; idx++) { 7208c2ecf20Sopenharmony_ci if (!(grp->map & (1 << idx))) { 7218c2ecf20Sopenharmony_ci useidx = idx; 7228c2ecf20Sopenharmony_ci break; 7238c2ecf20Sopenharmony_ci } 7248c2ecf20Sopenharmony_ci rcv_array_wc_fill(dd, grp->base + idx); 7258c2ecf20Sopenharmony_ci } 7268c2ecf20Sopenharmony_ci 7278c2ecf20Sopenharmony_ci idx = 0; 7288c2ecf20Sopenharmony_ci while (idx < count) { 7298c2ecf20Sopenharmony_ci u16 npages, pageidx, setidx = start + idx; 7308c2ecf20Sopenharmony_ci int ret = 0; 7318c2ecf20Sopenharmony_ci 7328c2ecf20Sopenharmony_ci /* 7338c2ecf20Sopenharmony_ci * If this entry in the group is used, move to the next one. 7348c2ecf20Sopenharmony_ci * If we go past the end of the group, exit the loop. 7358c2ecf20Sopenharmony_ci */ 7368c2ecf20Sopenharmony_ci if (useidx >= grp->size) { 7378c2ecf20Sopenharmony_ci break; 7388c2ecf20Sopenharmony_ci } else if (grp->map & (1 << useidx)) { 7398c2ecf20Sopenharmony_ci rcv_array_wc_fill(dd, grp->base + useidx); 7408c2ecf20Sopenharmony_ci useidx++; 7418c2ecf20Sopenharmony_ci continue; 7428c2ecf20Sopenharmony_ci } 7438c2ecf20Sopenharmony_ci 7448c2ecf20Sopenharmony_ci rcventry = grp->base + useidx; 7458c2ecf20Sopenharmony_ci npages = tbuf->psets[setidx].count; 7468c2ecf20Sopenharmony_ci pageidx = tbuf->psets[setidx].idx; 7478c2ecf20Sopenharmony_ci 7488c2ecf20Sopenharmony_ci ret = set_rcvarray_entry(fd, tbuf, 7498c2ecf20Sopenharmony_ci rcventry, grp, pageidx, 7508c2ecf20Sopenharmony_ci npages); 7518c2ecf20Sopenharmony_ci if (ret) 7528c2ecf20Sopenharmony_ci return ret; 7538c2ecf20Sopenharmony_ci mapped += npages; 7548c2ecf20Sopenharmony_ci 7558c2ecf20Sopenharmony_ci tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) | 7568c2ecf20Sopenharmony_ci EXP_TID_SET(LEN, npages); 7578c2ecf20Sopenharmony_ci tidlist[(*tididx)++] = tidinfo; 7588c2ecf20Sopenharmony_ci grp->used++; 7598c2ecf20Sopenharmony_ci grp->map |= 1 << useidx++; 7608c2ecf20Sopenharmony_ci idx++; 7618c2ecf20Sopenharmony_ci } 7628c2ecf20Sopenharmony_ci 7638c2ecf20Sopenharmony_ci /* Fill the rest of the group with "blank" writes */ 7648c2ecf20Sopenharmony_ci for (; useidx < grp->size; useidx++) 7658c2ecf20Sopenharmony_ci rcv_array_wc_fill(dd, grp->base + useidx); 7668c2ecf20Sopenharmony_ci *pmapped = mapped; 7678c2ecf20Sopenharmony_ci return idx; 7688c2ecf20Sopenharmony_ci} 7698c2ecf20Sopenharmony_ci 7708c2ecf20Sopenharmony_cistatic int set_rcvarray_entry(struct hfi1_filedata *fd, 7718c2ecf20Sopenharmony_ci struct tid_user_buf *tbuf, 7728c2ecf20Sopenharmony_ci u32 rcventry, struct tid_group *grp, 7738c2ecf20Sopenharmony_ci u16 pageidx, unsigned int npages) 7748c2ecf20Sopenharmony_ci{ 7758c2ecf20Sopenharmony_ci int ret; 7768c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 7778c2ecf20Sopenharmony_ci struct tid_rb_node *node; 7788c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 7798c2ecf20Sopenharmony_ci dma_addr_t phys; 7808c2ecf20Sopenharmony_ci struct page **pages = tbuf->pages + pageidx; 7818c2ecf20Sopenharmony_ci 7828c2ecf20Sopenharmony_ci /* 7838c2ecf20Sopenharmony_ci * Allocate the node first so we can handle a potential 7848c2ecf20Sopenharmony_ci * failure before we've programmed anything. 7858c2ecf20Sopenharmony_ci */ 7868c2ecf20Sopenharmony_ci node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), 7878c2ecf20Sopenharmony_ci GFP_KERNEL); 7888c2ecf20Sopenharmony_ci if (!node) 7898c2ecf20Sopenharmony_ci return -ENOMEM; 7908c2ecf20Sopenharmony_ci 7918c2ecf20Sopenharmony_ci phys = pci_map_single(dd->pcidev, 7928c2ecf20Sopenharmony_ci __va(page_to_phys(pages[0])), 7938c2ecf20Sopenharmony_ci npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); 7948c2ecf20Sopenharmony_ci if (dma_mapping_error(&dd->pcidev->dev, phys)) { 7958c2ecf20Sopenharmony_ci dd_dev_err(dd, "Failed to DMA map Exp Rcv pages 0x%llx\n", 7968c2ecf20Sopenharmony_ci phys); 7978c2ecf20Sopenharmony_ci kfree(node); 7988c2ecf20Sopenharmony_ci return -EFAULT; 7998c2ecf20Sopenharmony_ci } 8008c2ecf20Sopenharmony_ci 8018c2ecf20Sopenharmony_ci node->fdata = fd; 8028c2ecf20Sopenharmony_ci mutex_init(&node->invalidate_mutex); 8038c2ecf20Sopenharmony_ci node->phys = page_to_phys(pages[0]); 8048c2ecf20Sopenharmony_ci node->npages = npages; 8058c2ecf20Sopenharmony_ci node->rcventry = rcventry; 8068c2ecf20Sopenharmony_ci node->dma_addr = phys; 8078c2ecf20Sopenharmony_ci node->grp = grp; 8088c2ecf20Sopenharmony_ci node->freed = false; 8098c2ecf20Sopenharmony_ci memcpy(node->pages, pages, sizeof(struct page *) * npages); 8108c2ecf20Sopenharmony_ci 8118c2ecf20Sopenharmony_ci if (fd->use_mn) { 8128c2ecf20Sopenharmony_ci ret = mmu_interval_notifier_insert( 8138c2ecf20Sopenharmony_ci &node->notifier, current->mm, 8148c2ecf20Sopenharmony_ci tbuf->vaddr + (pageidx * PAGE_SIZE), npages * PAGE_SIZE, 8158c2ecf20Sopenharmony_ci &tid_mn_ops); 8168c2ecf20Sopenharmony_ci if (ret) 8178c2ecf20Sopenharmony_ci goto out_unmap; 8188c2ecf20Sopenharmony_ci } 8198c2ecf20Sopenharmony_ci fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node; 8208c2ecf20Sopenharmony_ci 8218c2ecf20Sopenharmony_ci hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, ilog2(npages) + 1); 8228c2ecf20Sopenharmony_ci trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, 8238c2ecf20Sopenharmony_ci node->notifier.interval_tree.start, node->phys, 8248c2ecf20Sopenharmony_ci phys); 8258c2ecf20Sopenharmony_ci return 0; 8268c2ecf20Sopenharmony_ci 8278c2ecf20Sopenharmony_ciout_unmap: 8288c2ecf20Sopenharmony_ci hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", 8298c2ecf20Sopenharmony_ci node->rcventry, node->notifier.interval_tree.start, 8308c2ecf20Sopenharmony_ci node->phys, ret); 8318c2ecf20Sopenharmony_ci pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, 8328c2ecf20Sopenharmony_ci PCI_DMA_FROMDEVICE); 8338c2ecf20Sopenharmony_ci kfree(node); 8348c2ecf20Sopenharmony_ci return -EFAULT; 8358c2ecf20Sopenharmony_ci} 8368c2ecf20Sopenharmony_ci 8378c2ecf20Sopenharmony_cistatic int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo) 8388c2ecf20Sopenharmony_ci{ 8398c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 8408c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 8418c2ecf20Sopenharmony_ci struct tid_rb_node *node; 8428c2ecf20Sopenharmony_ci u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); 8438c2ecf20Sopenharmony_ci u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; 8448c2ecf20Sopenharmony_ci 8458c2ecf20Sopenharmony_ci if (tididx >= uctxt->expected_count) { 8468c2ecf20Sopenharmony_ci dd_dev_err(dd, "Invalid RcvArray entry (%u) index for ctxt %u\n", 8478c2ecf20Sopenharmony_ci tididx, uctxt->ctxt); 8488c2ecf20Sopenharmony_ci return -EINVAL; 8498c2ecf20Sopenharmony_ci } 8508c2ecf20Sopenharmony_ci 8518c2ecf20Sopenharmony_ci if (tidctrl == 0x3) 8528c2ecf20Sopenharmony_ci return -EINVAL; 8538c2ecf20Sopenharmony_ci 8548c2ecf20Sopenharmony_ci rcventry = tididx + (tidctrl - 1); 8558c2ecf20Sopenharmony_ci 8568c2ecf20Sopenharmony_ci node = fd->entry_to_rb[rcventry]; 8578c2ecf20Sopenharmony_ci if (!node || node->rcventry != (uctxt->expected_base + rcventry)) 8588c2ecf20Sopenharmony_ci return -EBADF; 8598c2ecf20Sopenharmony_ci 8608c2ecf20Sopenharmony_ci if (fd->use_mn) 8618c2ecf20Sopenharmony_ci mmu_interval_notifier_remove(&node->notifier); 8628c2ecf20Sopenharmony_ci cacheless_tid_rb_remove(fd, node); 8638c2ecf20Sopenharmony_ci 8648c2ecf20Sopenharmony_ci return 0; 8658c2ecf20Sopenharmony_ci} 8668c2ecf20Sopenharmony_ci 8678c2ecf20Sopenharmony_cistatic void __clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) 8688c2ecf20Sopenharmony_ci{ 8698c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 8708c2ecf20Sopenharmony_ci struct hfi1_devdata *dd = uctxt->dd; 8718c2ecf20Sopenharmony_ci 8728c2ecf20Sopenharmony_ci mutex_lock(&node->invalidate_mutex); 8738c2ecf20Sopenharmony_ci if (node->freed) 8748c2ecf20Sopenharmony_ci goto done; 8758c2ecf20Sopenharmony_ci node->freed = true; 8768c2ecf20Sopenharmony_ci 8778c2ecf20Sopenharmony_ci trace_hfi1_exp_tid_unreg(uctxt->ctxt, fd->subctxt, node->rcventry, 8788c2ecf20Sopenharmony_ci node->npages, 8798c2ecf20Sopenharmony_ci node->notifier.interval_tree.start, node->phys, 8808c2ecf20Sopenharmony_ci node->dma_addr); 8818c2ecf20Sopenharmony_ci 8828c2ecf20Sopenharmony_ci /* Make sure device has seen the write before pages are unpinned */ 8838c2ecf20Sopenharmony_ci hfi1_put_tid(dd, node->rcventry, PT_INVALID_FLUSH, 0, 0); 8848c2ecf20Sopenharmony_ci 8858c2ecf20Sopenharmony_ci unpin_rcv_pages(fd, NULL, node, 0, node->npages, true); 8868c2ecf20Sopenharmony_cidone: 8878c2ecf20Sopenharmony_ci mutex_unlock(&node->invalidate_mutex); 8888c2ecf20Sopenharmony_ci} 8898c2ecf20Sopenharmony_ci 8908c2ecf20Sopenharmony_cistatic void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node) 8918c2ecf20Sopenharmony_ci{ 8928c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fd->uctxt; 8938c2ecf20Sopenharmony_ci 8948c2ecf20Sopenharmony_ci __clear_tid_node(fd, node); 8958c2ecf20Sopenharmony_ci 8968c2ecf20Sopenharmony_ci node->grp->used--; 8978c2ecf20Sopenharmony_ci node->grp->map &= ~(1 << (node->rcventry - node->grp->base)); 8988c2ecf20Sopenharmony_ci 8998c2ecf20Sopenharmony_ci if (node->grp->used == node->grp->size - 1) 9008c2ecf20Sopenharmony_ci tid_group_move(node->grp, &uctxt->tid_full_list, 9018c2ecf20Sopenharmony_ci &uctxt->tid_used_list); 9028c2ecf20Sopenharmony_ci else if (!node->grp->used) 9038c2ecf20Sopenharmony_ci tid_group_move(node->grp, &uctxt->tid_used_list, 9048c2ecf20Sopenharmony_ci &uctxt->tid_group_list); 9058c2ecf20Sopenharmony_ci kfree(node); 9068c2ecf20Sopenharmony_ci} 9078c2ecf20Sopenharmony_ci 9088c2ecf20Sopenharmony_ci/* 9098c2ecf20Sopenharmony_ci * As a simple helper for hfi1_user_exp_rcv_free, this function deals with 9108c2ecf20Sopenharmony_ci * clearing nodes in the non-cached case. 9118c2ecf20Sopenharmony_ci */ 9128c2ecf20Sopenharmony_cistatic void unlock_exp_tids(struct hfi1_ctxtdata *uctxt, 9138c2ecf20Sopenharmony_ci struct exp_tid_set *set, 9148c2ecf20Sopenharmony_ci struct hfi1_filedata *fd) 9158c2ecf20Sopenharmony_ci{ 9168c2ecf20Sopenharmony_ci struct tid_group *grp, *ptr; 9178c2ecf20Sopenharmony_ci int i; 9188c2ecf20Sopenharmony_ci 9198c2ecf20Sopenharmony_ci list_for_each_entry_safe(grp, ptr, &set->list, list) { 9208c2ecf20Sopenharmony_ci list_del_init(&grp->list); 9218c2ecf20Sopenharmony_ci 9228c2ecf20Sopenharmony_ci for (i = 0; i < grp->size; i++) { 9238c2ecf20Sopenharmony_ci if (grp->map & (1 << i)) { 9248c2ecf20Sopenharmony_ci u16 rcventry = grp->base + i; 9258c2ecf20Sopenharmony_ci struct tid_rb_node *node; 9268c2ecf20Sopenharmony_ci 9278c2ecf20Sopenharmony_ci node = fd->entry_to_rb[rcventry - 9288c2ecf20Sopenharmony_ci uctxt->expected_base]; 9298c2ecf20Sopenharmony_ci if (!node || node->rcventry != rcventry) 9308c2ecf20Sopenharmony_ci continue; 9318c2ecf20Sopenharmony_ci 9328c2ecf20Sopenharmony_ci if (fd->use_mn) 9338c2ecf20Sopenharmony_ci mmu_interval_notifier_remove( 9348c2ecf20Sopenharmony_ci &node->notifier); 9358c2ecf20Sopenharmony_ci cacheless_tid_rb_remove(fd, node); 9368c2ecf20Sopenharmony_ci } 9378c2ecf20Sopenharmony_ci } 9388c2ecf20Sopenharmony_ci } 9398c2ecf20Sopenharmony_ci} 9408c2ecf20Sopenharmony_ci 9418c2ecf20Sopenharmony_cistatic bool tid_rb_invalidate(struct mmu_interval_notifier *mni, 9428c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range, 9438c2ecf20Sopenharmony_ci unsigned long cur_seq) 9448c2ecf20Sopenharmony_ci{ 9458c2ecf20Sopenharmony_ci struct tid_rb_node *node = 9468c2ecf20Sopenharmony_ci container_of(mni, struct tid_rb_node, notifier); 9478c2ecf20Sopenharmony_ci struct hfi1_filedata *fdata = node->fdata; 9488c2ecf20Sopenharmony_ci struct hfi1_ctxtdata *uctxt = fdata->uctxt; 9498c2ecf20Sopenharmony_ci 9508c2ecf20Sopenharmony_ci if (node->freed) 9518c2ecf20Sopenharmony_ci return true; 9528c2ecf20Sopenharmony_ci 9538c2ecf20Sopenharmony_ci /* take action only if unmapping */ 9548c2ecf20Sopenharmony_ci if (range->event != MMU_NOTIFY_UNMAP) 9558c2ecf20Sopenharmony_ci return true; 9568c2ecf20Sopenharmony_ci 9578c2ecf20Sopenharmony_ci trace_hfi1_exp_tid_inval(uctxt->ctxt, fdata->subctxt, 9588c2ecf20Sopenharmony_ci node->notifier.interval_tree.start, 9598c2ecf20Sopenharmony_ci node->rcventry, node->npages, node->dma_addr); 9608c2ecf20Sopenharmony_ci 9618c2ecf20Sopenharmony_ci /* clear the hardware rcvarray entry */ 9628c2ecf20Sopenharmony_ci __clear_tid_node(fdata, node); 9638c2ecf20Sopenharmony_ci 9648c2ecf20Sopenharmony_ci spin_lock(&fdata->invalid_lock); 9658c2ecf20Sopenharmony_ci if (fdata->invalid_tid_idx < uctxt->expected_count) { 9668c2ecf20Sopenharmony_ci fdata->invalid_tids[fdata->invalid_tid_idx] = 9678c2ecf20Sopenharmony_ci rcventry2tidinfo(node->rcventry - uctxt->expected_base); 9688c2ecf20Sopenharmony_ci fdata->invalid_tids[fdata->invalid_tid_idx] |= 9698c2ecf20Sopenharmony_ci EXP_TID_SET(LEN, node->npages); 9708c2ecf20Sopenharmony_ci if (!fdata->invalid_tid_idx) { 9718c2ecf20Sopenharmony_ci unsigned long *ev; 9728c2ecf20Sopenharmony_ci 9738c2ecf20Sopenharmony_ci /* 9748c2ecf20Sopenharmony_ci * hfi1_set_uevent_bits() sets a user event flag 9758c2ecf20Sopenharmony_ci * for all processes. Because calling into the 9768c2ecf20Sopenharmony_ci * driver to process TID cache invalidations is 9778c2ecf20Sopenharmony_ci * expensive and TID cache invalidations are 9788c2ecf20Sopenharmony_ci * handled on a per-process basis, we can 9798c2ecf20Sopenharmony_ci * optimize this to set the flag only for the 9808c2ecf20Sopenharmony_ci * process in question. 9818c2ecf20Sopenharmony_ci */ 9828c2ecf20Sopenharmony_ci ev = uctxt->dd->events + 9838c2ecf20Sopenharmony_ci (uctxt_offset(uctxt) + fdata->subctxt); 9848c2ecf20Sopenharmony_ci set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); 9858c2ecf20Sopenharmony_ci } 9868c2ecf20Sopenharmony_ci fdata->invalid_tid_idx++; 9878c2ecf20Sopenharmony_ci } 9888c2ecf20Sopenharmony_ci spin_unlock(&fdata->invalid_lock); 9898c2ecf20Sopenharmony_ci return true; 9908c2ecf20Sopenharmony_ci} 9918c2ecf20Sopenharmony_ci 9928c2ecf20Sopenharmony_cistatic bool tid_cover_invalidate(struct mmu_interval_notifier *mni, 9938c2ecf20Sopenharmony_ci const struct mmu_notifier_range *range, 9948c2ecf20Sopenharmony_ci unsigned long cur_seq) 9958c2ecf20Sopenharmony_ci{ 9968c2ecf20Sopenharmony_ci struct tid_user_buf *tidbuf = 9978c2ecf20Sopenharmony_ci container_of(mni, struct tid_user_buf, notifier); 9988c2ecf20Sopenharmony_ci 9998c2ecf20Sopenharmony_ci /* take action only if unmapping */ 10008c2ecf20Sopenharmony_ci if (range->event == MMU_NOTIFY_UNMAP) { 10018c2ecf20Sopenharmony_ci mutex_lock(&tidbuf->cover_mutex); 10028c2ecf20Sopenharmony_ci mmu_interval_set_seq(mni, cur_seq); 10038c2ecf20Sopenharmony_ci mutex_unlock(&tidbuf->cover_mutex); 10048c2ecf20Sopenharmony_ci } 10058c2ecf20Sopenharmony_ci 10068c2ecf20Sopenharmony_ci return true; 10078c2ecf20Sopenharmony_ci} 10088c2ecf20Sopenharmony_ci 10098c2ecf20Sopenharmony_cistatic void cacheless_tid_rb_remove(struct hfi1_filedata *fdata, 10108c2ecf20Sopenharmony_ci struct tid_rb_node *tnode) 10118c2ecf20Sopenharmony_ci{ 10128c2ecf20Sopenharmony_ci u32 base = fdata->uctxt->expected_base; 10138c2ecf20Sopenharmony_ci 10148c2ecf20Sopenharmony_ci fdata->entry_to_rb[tnode->rcventry - base] = NULL; 10158c2ecf20Sopenharmony_ci clear_tid_node(fdata, tnode); 10168c2ecf20Sopenharmony_ci} 1017